dsp - CMSIS DSP Library from CMSIS 2.0. See http://www.…

Users » simon » Code » dsp

Simon Ford / dsp

CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents: K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

Files at this revision

API Documentation at this revision

Revision 0:1014af42efd9, committed 2011-03-10

Comitter:: simon
Date:: Thu Mar 10 15:07:50 2011 +0000
Child:: 1:2ec9aa7241dc
Commit message:

Changed in this revision

inc/arm_common_tables.h	Show annotated file Show diff for this revision Revisions of this file
inc/arm_math.h	Show annotated file Show diff for this revision Revisions of this file
src/Common/Include/math_helper.h	Show annotated file Show diff for this revision Revisions of this file
src/Common/Source/math_helper.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_abs_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_abs_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_abs_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_abs_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_add_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_add_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_add_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_add_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_dot_prod_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_dot_prod_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_dot_prod_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_dot_prod_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_mult_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_mult_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_mult_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_mult_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_negate_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_negate_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_negate_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_negate_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_offset_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_offset_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_offset_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_offset_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_scale_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_scale_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_scale_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_scale_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_shift_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_shift_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_shift_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_sub_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_sub_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_sub_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/BasicMathFunctions/arm_sub_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/CommonTables/arm_common_tables.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_conj_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_conj_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_conj_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_real_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_real_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_real_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ControllerFunctions/arm_pid_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ControllerFunctions/arm_pid_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ControllerFunctions/arm_pid_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ControllerFunctions/arm_pid_reset_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ControllerFunctions/arm_pid_reset_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ControllerFunctions/arm_pid_reset_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ControllerFunctions/arm_sin_cos_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/ControllerFunctions/arm_sin_cos_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FastMathFunctions/arm_cos_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FastMathFunctions/arm_cos_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FastMathFunctions/arm_cos_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FastMathFunctions/arm_sin_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FastMathFunctions/arm_sin_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FastMathFunctions/arm_sin_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FastMathFunctions/arm_sqrt_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FastMathFunctions/arm_sqrt_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_32x64_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_fast_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_fast_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df2T_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_fast_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_fast_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_fast_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_fast_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_conv_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_correlate_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_correlate_fast_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_correlate_fast_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_correlate_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_correlate_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_correlate_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_fast_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_fast_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_fast_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_fast_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_init_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_init_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/FilteringFunctions/arm_lms_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_add_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_add_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_add_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_inverse_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_fast_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_fast_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_scale_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_scale_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_scale_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_sub_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_sub_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_sub_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_trans_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_trans_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/MatrixFunctions/arm_mat_trans_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_max_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_max_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_max_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_max_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_mean_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_mean_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_mean_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_mean_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_min_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_min_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_min_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_min_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_power_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_power_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_power_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_power_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_rms_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_rms_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_rms_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_std_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_std_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_std_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_var_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_var_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/StatisticsFunctions/arm_var_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_copy_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_copy_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_copy_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_copy_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_fill_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_fill_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_fill_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_fill_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_float_to_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_float_to_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_float_to_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_q15_to_float.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_q15_to_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_q15_to_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_q31_to_float.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_q31_to_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_q31_to_q7.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_q7_to_float.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_q7_to_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/SupportFunctions/arm_q7_to_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_dct4_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_dct4_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_dct4_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_dct4_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_dct4_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_dct4_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_rfft_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_rfft_init_f32.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_rfft_init_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_rfft_init_q31.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_rfft_q15.c	Show annotated file Show diff for this revision Revisions of this file
src/Cortex-M4-M3/TransformFunctions/arm_rfft_q31.c	Show annotated file Show diff for this revision Revisions of this file

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/inc/arm_common_tables.h	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,41 @@
+/* ---------------------------------------------------------------------- 
+* Copyright (C) 2010 ARM Limited. All rights reserved. 
+* 
+* $Date:        11. November 2010  
+* $Revision: 	V1.0.2  
+* 
+* Project: 	    CMSIS DSP Library 
+* Title:	    arm_common_tables.h 
+* 
+* Description:	This file has extern declaration for common tables like Bitreverse, reciprocal etc which are used across different functions 
+* 
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.2 2010/11/11 
+*    Documentation updated.  
+* 
+* Version 1.0.1 2010/10/05  
+*    Production release and review comments incorporated. 
+* 
+* Version 1.0.0 2010/09/20  
+*    Production release and review comments incorporated. 
+* -------------------------------------------------------------------- */ 
+ 
+#ifndef _ARM_COMMON_TABLES_H 
+#define _ARM_COMMON_TABLES_H 
+ 
+#include "arm_math.h" 
+ 
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+extern const uint16_t armBitRevTable[256]; 
+extern const q15_t armRecipTableQ15[64]; 
+extern const q31_t armRecipTableQ31[64]; 
+
+#ifdef	__cplusplus
+}
+#endif
+ 
+#endif /*  ARM_COMMON_TABLES_H */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/inc/arm_math.h	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,6983 @@
+/* ----------------------------------------------------------------------
+ * Copyright (C) 2010 ARM Limited. All rights reserved.
+ *
+ * $Date:        29. November 2010  
+ * $Revision: 	 V1.0.3  
+ *
+ * Project:      CMSIS DSP Library
+ * Title:	     arm_math.h
+ *
+ * Description:	 Public header file for CMSIS DSP Library
+ *
+ * Target Processor: Cortex-M4/Cortex-M3
+ * 
+ * Version 1.0.3 2010/11/29 
+ *    Re-organized the CMSIS folders and updated documentation. 
+ *
+ * Version 1.0.2 2010/11/11 
+ *    Documentation updated.  
+ * 
+ * Version 1.0.1 2010/10/05  
+ *    Production release and review comments incorporated. 
+ * 
+ * Version 1.0.0 2010/09/20  
+ *    Production release and review comments incorporated. 
+ *-------------------------------------------------------------------*/
+
+/**
+   \mainpage CMSIS DSP Software Library
+   *
+   * <b>Introduction</b>
+   *
+   * This user manual describes the CMSIS DSP software library, 
+   * a suite of common signal processing functions for use on Cortex-M processor based devices.
+   *
+   * The library is divided into a number of modules each covering a specific category:
+   * - Basic math functions
+   * - Fast math functions
+   * - Complex math functions
+   * - Filters
+   * - Matrix functions
+   * - Transforms
+   * - Motor control functions
+   * - Statistical functions
+   * - Support functions
+   * - Interpolation functions
+   *
+   * The library has separate functions for operating on 8-bit integers, 16-bit integers,
+   * 32-bit integer and 32-bit floating-point values. 
+   *
+   * <b>Processor Support</b>
+   *
+   * The library is completely written in C and is fully CMSIS compliant. 
+   * High performance is achieved through maximum use of Cortex-M4 intrinsics. 
+   *
+   * The supplied library source code also builds and runs on the Cortex-M3 processor,
+   * with the DSP intrinsics being emulated through software. 
+   *
+   * A Cortex-M0 version of the library is also being developed ; 
+   * updates on this activity will be made available shortly.
+
+   *
+   * <b>Toolchain Support</b>
+   *
+   * The library has been developed and tested with MDK-ARM version 4.12. 
+   * The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly.
+   *
+   * <b>Using the Library</b>
+   *
+   * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
+   * - arm_cortexM4_math.lib
+   * - arm_cortexM3_math.lib
+   *
+   * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
+   * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single 
+   * public header file <code> arm_math.h</code> for Cortex-M4, Cortex-M3 and Cortex-M0. Define the appropriate pre processor MACRO ARM_MATH_CM4 or  ARM_MATH_CM3 or 
+   * ARM_MATH_CM0 depending on the target processor in the application. 
+   *
+   * <b>Examples</b>
+   *
+   * The library ships with a number of examples which demonstrate how to use the library functions.
+   *
+   * <b>Building the Library</b>
+   *
+   * The library installer also contains project files to re build the library on MDK Tool chain in the <code>Lib</code> folder.
+   * - arm_cortexM4_math.uvproj
+   * - arm_cortexM3_math.uvproj
+   *
+   * The library supports size checking on the input and output matrices. To enable this feature, 
+   * define the MACRO ARM_MATH_MATRIX_CHECK in pre processor settings in project options. 
+   * The Library also supports rounding on support functions by using ARM_MATH_ROUNDING in pre processor settings   
+   *
+   * The project can be built by opening the appropriate project in MDK-ARM 4.12 chain and defining the optional pre processor MACROs detailed above.
+   *
+   * <b>Copyright Notice</b>
+   *
+   * Copyright (C) 2010 ARM Limited. All rights reserved.
+   */
+
+
+/**
+ * @defgroup groupMath Basic Math Functions
+ */
+
+/**
+ * @defgroup groupFastMath Fast Math Functions
+ * This set of functions provides a fast approximation to sine, cosine, and square root.
+ * As compared to most of the other functions in the CMSIS math library, the fast math functions
+ * operate on individual values and not arrays.
+ * There are separate functions for Q15, Q31, and floating-point data.
+ *
+ */
+
+/**
+ * @defgroup groupCmplxMath Complex Math Functions
+ * This set of functions operates on complex data vectors.
+ * The data in the complex arrays is stored in an interleaved fashion
+ * (real, imag, real, imag, ...).
+ * In the API functions, the number of samples in a complex array refers
+ * to the number of complex values; the array contains twice this number of
+ * real values.
+ */
+
+/**
+ * @defgroup groupFilters Filtering Functions
+ */
+
+/**
+ * @defgroup groupMatrix Matrix Functions
+ *
+ * This set of functions provides basic matrix math operations.
+ * The functions operate on matrix data structures.  For example,
+ * the type
+ * definition for the floating-point matrix structure is shown
+ * below:
+ * <pre>
+ *     typedef struct
+ *     {
+ *       uint16_t numRows;     // number of rows of the matrix.
+ *       uint16_t numCols;     // number of columns of the matrix.
+ *       float32_t *pData;     // points to the data of the matrix.
+ *     } arm_matrix_instance_f32;
+ * </pre>
+ * There are similar definitions for Q15 and Q31 data types.
+ *
+ * The structure specifies the size of the matrix and then points to
+ * an array of data.  The array is of size <code>numRows X numCols</code>
+ * and the values are arranged in row order.  That is, the
+ * matrix element (i, j) is stored at:
+ * <pre>
+ *     pData[i*numCols + j]
+ * </pre>
+ *
+ * \par Init Functions
+ * There is an associated initialization function for each type of matrix
+ * data structure.
+ * The initialization function sets the values of the internal structure fields.
+ * Refer to the function <code>arm_mat_init_f32()</code>, <code>arm_mat_init_q31()</code>
+ * and <code>arm_mat_init_q15()</code> for floating-point, Q31 and Q15 types,  respectively.
+ *
+ * \par
+ * Use of the initialization function is optional. However, if initialization function is used
+ * then the instance structure cannot be placed into a const data section.
+ * To place the instance structure in a const data
+ * section, manually initialize the data structure.  For example:
+ * <pre>
+ * <code>arm_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
+ * <code>arm_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
+ * <code>arm_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
+ * </pre>
+ * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
+ * specifies the number of columns, and <code>pData</code> points to the
+ * data array.
+ *
+ * \par Size Checking
+ * By default all of the matrix functions perform size checking on the input and
+ * output matrices.  For example, the matrix addition function verifies that the
+ * two input matrices and the output matrix all have the same number of rows and
+ * columns.  If the size check fails the functions return:
+ * <pre>
+ *     ARM_MATH_SIZE_MISMATCH
+ * </pre>
+ * Otherwise the functions return
+ * <pre>
+ *     ARM_MATH_SUCCESS
+ * </pre>
+ * There is some overhead associated with this matrix size checking.
+ * The matrix size checking is enabled via the #define
+ * <pre>
+ *     ARM_MATH_MATRIX_CHECK
+ * </pre>
+ * within the library project settings.  By default this macro is defined
+ * and size checking is enabled.  By changing the project settings and
+ * undefining this macro size checking is eliminated and the functions
+ * run a bit faster.  With size checking disabled the functions always
+ * return <code>ARM_MATH_SUCCESS</code>.
+ */
+
+/**
+ * @defgroup groupTransforms Transform Functions
+ */
+
+/**
+ * @defgroup groupController Controller Functions
+ */
+
+/**
+ * @defgroup groupStats Statistics Functions
+ */
+/**
+ * @defgroup groupSupport Support Functions
+ */
+
+/**
+ * @defgroup groupInterpolation Interpolation Functions
+ * These functions perform 1- and 2-dimensional interpolation of data.
+ * Linear interpolation is used for 1-dimensional data and
+ * bilinear interpolation is used for 2-dimensional data.
+ */
+
+/**
+ * @defgroup groupExamples Examples
+ */
+#ifndef _ARM_MATH_H
+#define _ARM_MATH_H
+
+#define ARM_MATH_CM3 // mbed: set to CM£ and include appropriate target cmsis library
+#include "cmsis.h"
+
+// #define __CMSIS_GENERIC              /* disable NVIC and Systick functions */
+
+/*
+#if defined (ARM_MATH_CM4)
+  #include "core_cm4.h"
+#elif defined (ARM_MATH_CM3)
+  #include "core_cm3.h"
+#elif defined (ARM_MATH_CM0)
+  #include "core_cm0.h"
+#else
+#include "ARMCM4.h"
+#warning "Define either ARM_MATH_CM4 OR ARM_MATH_CM3...By Default building on ARM_MATH_CM4....."
+#endif
+*/
+
+//#undef  __CMSIS_GENERIC              /* enable NVIC and Systick functions */
+
+#include "string.h"
+
+#ifdef	__cplusplus
+extern "C"
+{
+#endif
+
+
+  /**
+   * @brief Macros required for reciprocal calculation in Normalized LMS
+   */
+
+#define DELTA_Q31 			(0x100)
+#define DELTA_Q15 			0x5
+#define INDEX_MASK 			0x0000003F
+#define PI					3.14159265358979f
+
+  /**
+   * @brief Macros required for SINE and COSINE Fast math approximations
+   */
+
+#define TABLE_SIZE			256
+#define TABLE_SPACING_Q31	0x800000
+#define TABLE_SPACING_Q15	0x80
+
+  /**
+   * @brief Macros required for SINE and COSINE Controller functions
+   */
+  /* 1.31(q31) Fixed value of 2/360 */
+  /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
+#define INPUT_SPACING			0xB60B61
+
+
+  /**
+   * @brief Error status returned by some functions in the library.
+   */
+
+  typedef enum
+    {
+      ARM_MATH_SUCCESS = 0,              /**< No error */
+      ARM_MATH_ARGUMENT_ERROR = -1,      /**< One or more arguments are incorrect */
+      ARM_MATH_LENGTH_ERROR = -2,        /**< Length of data buffer is incorrect */
+      ARM_MATH_SIZE_MISMATCH = -3,       /**< Size of matrices is not compatible with the operation. */
+      ARM_MATH_NANINF = -4,              /**< Not-a-number (NaN) or infinity is generated */
+      ARM_MATH_SINGULAR = -5,            /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
+      ARM_MATH_TEST_FAILURE = -6         /**< Test Failed  */
+    } arm_status;
+
+  /**
+   * @brief 8-bit fractional data type in 1.7 format.
+   */
+  typedef int8_t q7_t;
+
+  /**
+   * @brief 16-bit fractional data type in 1.15 format.
+   */
+  typedef int16_t q15_t;
+
+  /**
+   * @brief 32-bit fractional data type in 1.31 format.
+   */
+  typedef int32_t q31_t;
+
+  /**
+   * @brief 64-bit fractional data type in 1.63 format.
+   */
+  typedef int64_t q63_t;
+
+  /**
+   * @brief 32-bit floating-point type definition.
+   */
+  typedef float float32_t;
+
+  /**
+   * @brief 64-bit floating-point type definition.
+   */
+  typedef double float64_t;
+
+  /**
+   * @brief definition to read/write two 16 bit values.
+   */
+#define __SIMD32(addr)  (*(int32_t **) & (addr))
+
+#if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0)
+  /**
+   * @brief definition to pack two 16 bit values.
+   */
+#define __PKHBT(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0x0000FFFF) | \
+                                         (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
+
+#endif
+
+  /**
+   * @brief definition to pack four 8 bit values.
+   */
+#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) |	\
+                                (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) |	\
+							    (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) |	\
+							    (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
+
+
+  /**
+   * @brief Clips Q63 to Q31 values.
+   */
+  static __INLINE q31_t clip_q63_to_q31(
+					q63_t x)
+  {
+    return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
+      ((0x7FFFFFFF ^ ((q31_t) (x >> 62)))) : (q31_t) x;
+  }
+
+  /**
+   * @brief Clips Q63 to Q15 values.
+   */
+  static __INLINE q15_t clip_q63_to_q15(
+					q63_t x)
+  {
+    return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
+      ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
+  }
+
+  /**
+   * @brief Clips Q31 to Q7 values.
+   */
+  static __INLINE q7_t clip_q31_to_q7(
+				      q31_t x)
+  {
+    return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
+      ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
+  }
+
+  /**
+   * @brief Clips Q31 to Q15 values.
+   */
+  static __INLINE q15_t clip_q31_to_q15(
+					q31_t x)
+  {
+    return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
+      ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
+  }
+
+  /**
+   * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
+   */
+
+  static __INLINE q63_t mult32x64(
+				  q63_t x,
+				  q31_t y)
+  {
+    return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
+            (((q63_t) (x >> 32) * y)));
+  }
+
+
+  /**
+   * @brief Function to Calculates 1/in(reciprocal) value of Q31 Data type.
+   */
+
+  static __INLINE uint32_t arm_recip_q31(
+					 q31_t in,
+					 q31_t * dst,
+					 q31_t * pRecipTable)
+  {
+
+    uint32_t out, tempVal;
+    uint32_t index, i;
+    uint32_t signBits;
+
+    if(in > 0)
+      {
+	signBits = __CLZ(in) - 1;
+      }
+    else
+      {
+	signBits = __CLZ(-in) - 1;
+      }
+
+    /* Convert input sample to 1.31 format */
+    in = in << signBits;
+
+    /* calculation of index for initial approximated Val */
+    index = (uint32_t) (in >> 24u);
+    index = (index & INDEX_MASK);
+
+    /* 1.31 with exp 1 */
+    out = pRecipTable[index];
+
+    /* calculation of reciprocal value */
+    /* running approximation for two iterations */
+    for (i = 0u; i < 2u; i++)
+      {
+	tempVal = (q31_t) (((q63_t) in * out) >> 31u);
+	tempVal = 0x7FFFFFFF - tempVal;
+	/*      1.31 with exp 1 */
+	out = (q31_t) (((q63_t) out * tempVal) >> 30u);
+      }
+
+    /* write output */
+    *dst = out;
+
+    /* return num of signbits of out = 1/in value */
+    return (signBits + 1u);
+
+  }
+
+  /**
+   * @brief Function to Calculates 1/in(reciprocal) value of Q15 Data type.
+   */
+  static __INLINE uint32_t arm_recip_q15(
+					 q15_t in,
+					 q15_t * dst,
+					 q15_t * pRecipTable)
+  {
+
+    uint32_t out = 0, tempVal = 0;
+    uint32_t index = 0, i = 0;
+    uint32_t signBits = 0;
+
+    if(in > 0)
+      {
+	signBits = __CLZ(in) - 17;
+      }
+    else
+      {
+	signBits = __CLZ(-in) - 17;
+      }
+
+    /* Convert input sample to 1.15 format */
+    in = in << signBits;
+
+    /* calculation of index for initial approximated Val */
+    index = in >> 8;
+    index = (index & INDEX_MASK);
+
+    /*      1.15 with exp 1  */
+    out = pRecipTable[index];
+
+    /* calculation of reciprocal value */
+    /* running approximation for two iterations */
+    for (i = 0; i < 2; i++)
+      {
+	tempVal = (q15_t) (((q31_t) in * out) >> 15);
+	tempVal = 0x7FFF - tempVal;
+	/*      1.15 with exp 1 */
+	out = (q15_t) (((q31_t) out * tempVal) >> 14);
+      }
+
+    /* write output */
+    *dst = out;
+
+    /* return num of signbits of out = 1/in value */
+    return (signBits + 1);
+
+  }
+
+
+  /*
+   * @brief C custom defined intrinisic function for only M0 processors
+   */
+#if defined(ARM_MATH_CM0)
+
+  static __INLINE q31_t __SSAT(
+			       q31_t x,
+			       uint32_t y)
+  {
+    int32_t posMax, negMin;
+    uint32_t i;
+
+    posMax = 1;
+    for (i = 0; i < (y - 1); i++)
+      {
+	posMax = posMax * 2;
+      }
+
+    if(x > 0)
+      {
+	posMax = (posMax - 1);
+
+	if(x > posMax)
+	  {
+	    x = posMax;
+	  }
+      }
+    else
+      {
+	negMin = -posMax;
+
+	if(x < negMin)
+	  {
+	    x = negMin;
+	  }
+      }
+    return (x);
+
+
+  }
+
+#endif /* end of ARM_MATH_CM0 */
+
+
+
+  /*
+   * @brief C custom defined intrinsic function for M3 and M0 processors
+   */
+#if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0)
+
+  /*
+   * @brief C custom defined QADD8 for M3 and M0 processors
+   */
+  static __INLINE q31_t __QADD8(
+				q31_t x,
+				q31_t y)
+  {
+
+    q31_t sum;
+    q7_t r, s, t, u;
+
+    r = (char) x;
+    s = (char) y;
+
+    r = __SSAT((q31_t) (r + s), 8);
+    s = __SSAT(((q31_t) (((x << 16) >> 24) + ((y << 16) >> 24))), 8);
+    t = __SSAT(((q31_t) (((x << 8) >> 24) + ((y << 8) >> 24))), 8);
+    u = __SSAT(((q31_t) ((x >> 24) + (y >> 24))), 8);
+
+    sum = (((q31_t) u << 24) & 0xFF000000) | (((q31_t) t << 16) & 0x00FF0000) |
+      (((q31_t) s << 8) & 0x0000FF00) | (r & 0x000000FF);
+
+    return sum;
+
+  }
+
+  /*
+   * @brief C custom defined QSUB8 for M3 and M0 processors
+   */
+  static __INLINE q31_t __QSUB8(
+				q31_t x,
+				q31_t y)
+  {
+
+    q31_t sum;
+    q31_t r, s, t, u;
+
+    r = (char) x;
+    s = (char) y;
+
+    r = __SSAT((r - s), 8);
+    s = __SSAT(((q31_t) (((x << 16) >> 24) - ((y << 16) >> 24))), 8) << 8;
+    t = __SSAT(((q31_t) (((x << 8) >> 24) - ((y << 8) >> 24))), 8) << 16;
+    u = __SSAT(((q31_t) ((x >> 24) - (y >> 24))), 8) << 24;
+
+    sum =
+      (u & 0xFF000000) | (t & 0x00FF0000) | (s & 0x0000FF00) | (r & 0x000000FF);
+
+    return sum;
+  }
+
+  /*
+   * @brief C custom defined QADD16 for M3 and M0 processors
+   */
+
+  /*
+   * @brief C custom defined QADD16 for M3 and M0 processors
+   */
+  static __INLINE q31_t __QADD16(
+				 q31_t x,
+				 q31_t y)
+  {
+
+    q31_t sum;
+    q31_t r, s;
+
+    r = (short) x;
+    s = (short) y;
+
+    r = __SSAT(r + s, 16);
+    s = __SSAT(((q31_t) ((x >> 16) + (y >> 16))), 16) << 16;
+
+    sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
+
+    return sum;
+
+  }
+
+  /*
+   * @brief C custom defined SHADD16 for M3 and M0 processors
+   */
+  static __INLINE q31_t __SHADD16(
+				  q31_t x,
+				  q31_t y)
+  {
+
+    q31_t sum;
+    q31_t r, s;
+
+    r = (short) x;
+    s = (short) y;
+
+    r = ((r >> 1) + (s >> 1));
+    s = ((q31_t) ((x >> 17) + (y >> 17))) << 16;
+
+    sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
+
+    return sum;
+
+  }
+
+  /*
+   * @brief C custom defined QSUB16 for M3 and M0 processors
+   */
+  static __INLINE q31_t __QSUB16(
+				 q31_t x,
+				 q31_t y)
+  {
+
+    q31_t sum;
+    q31_t r, s;
+
+    r = (short) x;
+    s = (short) y;
+
+    r = __SSAT(r - s, 16);
+    s = __SSAT(((q31_t) ((x >> 16) - (y >> 16))), 16) << 16;
+
+    sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
+
+    return sum;
+  }
+
+  /*
+   * @brief C custom defined SHSUB16 for M3 and M0 processors
+   */
+  static __INLINE q31_t __SHSUB16(
+				  q31_t x,
+				  q31_t y)
+  {
+
+    q31_t diff;
+    q31_t r, s;
+
+    r = (short) x;
+    s = (short) y;
+
+    r = ((r >> 1) - (s >> 1));
+    s = (((x >> 17) - (y >> 17)) << 16);
+
+    diff = (s & 0xFFFF0000) | (r & 0x0000FFFF);
+
+    return diff;
+  }
+
+  /*
+   * @brief C custom defined QASX for M3 and M0 processors
+   */
+  static __INLINE q31_t __QASX(
+			       q31_t x,
+			       q31_t y)
+  {
+
+    q31_t sum = 0;
+
+    sum = ((sum + clip_q31_to_q15((q31_t) ((short) (x >> 16) + (short) y))) << 16) +
+      clip_q31_to_q15((q31_t) ((short) x - (short) (y >> 16)));
+
+    return sum;
+  }
+
+  /*
+   * @brief C custom defined SHASX for M3 and M0 processors
+   */
+  static __INLINE q31_t __SHASX(
+				q31_t x,
+				q31_t y)
+  {
+
+    q31_t sum;
+    q31_t r, s;
+
+    r = (short) x;
+    s = (short) y;
+
+    r = ((r >> 1) - (y >> 17));
+    s = (((x >> 17) + (s >> 1)) << 16);
+
+    sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
+
+    return sum;
+  }
+
+
+  /*
+   * @brief C custom defined QSAX for M3 and M0 processors
+   */
+  static __INLINE q31_t __QSAX(
+			       q31_t x,
+			       q31_t y)
+  {
+
+    q31_t sum = 0;
+
+    sum = ((sum + clip_q31_to_q15((q31_t) ((short) (x >> 16) - (short) y))) << 16) +
+      clip_q31_to_q15((q31_t) ((short) x + (short) (y >> 16)));
+
+    return sum;
+  }
+
+  /*
+   * @brief C custom defined SHSAX for M3 and M0 processors
+   */
+  static __INLINE q31_t __SHSAX(
+				q31_t x,
+				q31_t y)
+  {
+
+    q31_t sum;
+    q31_t r, s;
+
+    r = (short) x;
+    s = (short) y;
+
+    r = ((r >> 1) + (y >> 17));
+    s = (((x >> 17) - (s >> 1)) << 16);
+
+    sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
+
+    return sum;
+  }
+
+  /*
+   * @brief C custom defined SMUSDX for M3 and M0 processors
+   */
+  static __INLINE q31_t __SMUSDX(
+				 q31_t x,
+				 q31_t y)
+  {
+
+    return ((q31_t)(((short) x * (short) (y >> 16)) -
+		    ((short) (x >> 16) * (short) y)));
+  }
+
+  /*
+   * @brief C custom defined SMUADX for M3 and M0 processors
+   */
+  static __INLINE q31_t __SMUADX(
+				 q31_t x,
+				 q31_t y)
+  {
+
+    return ((q31_t)(((short) x * (short) (y >> 16)) +
+		    ((short) (x >> 16) * (short) y)));
+  }
+
+  /*
+   * @brief C custom defined QADD for M3 and M0 processors
+   */
+  static __INLINE q31_t __QADD(
+			       q31_t x,
+			       q31_t y)
+  {
+    return clip_q63_to_q31((q63_t) x + y);
+  }
+
+  /*
+   * @brief C custom defined QSUB for M3 and M0 processors
+   */
+  static __INLINE q31_t __QSUB(
+			       q31_t x,
+			       q31_t y)
+  {
+    return clip_q63_to_q31((q63_t) x - y);
+  }
+
+  /*
+   * @brief C custom defined SMLAD for M3 and M0 processors
+   */
+  static __INLINE q31_t __SMLAD(
+				q31_t x,
+				q31_t y,
+				q31_t sum)
+  {
+
+    return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
+            ((short) x * (short) y));
+  }
+
+  /*
+   * @brief C custom defined SMLADX for M3 and M0 processors
+   */
+  static __INLINE q31_t __SMLADX(
+				 q31_t x,
+				 q31_t y,
+				 q31_t sum)
+  {
+
+    return (sum + ((short) (x >> 16) * (short) (y)) +
+            ((short) x * (short) (y >> 16)));
+  }
+
+  /*
+   * @brief C custom defined SMLSDX for M3 and M0 processors
+   */
+  static __INLINE q31_t __SMLSDX(
+				 q31_t x,
+				 q31_t y,
+				 q31_t sum)
+  {
+
+    return (sum - ((short) (x >> 16) * (short) (y)) +
+            ((short) x * (short) (y >> 16)));
+  }
+
+  /*
+   * @brief C custom defined SMLALD for M3 and M0 processors
+   */
+  static __INLINE q63_t __SMLALD(
+				 q31_t x,
+				 q31_t y,
+				 q63_t sum)
+  {
+
+    return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
+            ((short) x * (short) y));
+  }
+
+  /*
+   * @brief C custom defined SMLALDX for M3 and M0 processors
+   */
+  static __INLINE q63_t __SMLALDX(
+				  q31_t x,
+				  q31_t y,
+				  q63_t sum)
+  {
+
+    return (sum + ((short) (x >> 16) * (short) y)) +
+      ((short) x * (short) (y >> 16));
+  }
+
+  /*
+   * @brief C custom defined SMUAD for M3 and M0 processors
+   */
+  static __INLINE q31_t __SMUAD(
+				q31_t x,
+				q31_t y)
+  {
+
+    return (((x >> 16) * (y >> 16)) +
+            (((x << 16) >> 16) * ((y << 16) >> 16)));
+  }
+
+  /*
+   * @brief C custom defined SMUSD for M3 and M0 processors
+   */
+  static __INLINE q31_t __SMUSD(
+				q31_t x,
+				q31_t y)
+  {
+
+    return (-((x >> 16) * (y >> 16)) +
+            (((x << 16) >> 16) * ((y << 16) >> 16)));
+  }
+
+
+
+
+#endif /* (ARM_MATH_CM3) || defined (ARM_MATH_CM0) */
+
+
+  /**
+   * @brief Instance structure for the Q7 FIR filter.
+   */
+  typedef struct
+  {
+    uint16_t numTaps;        /**< number of filter coefficients in the filter. */
+    q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
+  } arm_fir_instance_q7;
+
+  /**
+   * @brief Instance structure for the Q15 FIR filter.
+   */
+  typedef struct
+  {
+    uint16_t numTaps;         /**< number of filter coefficients in the filter. */
+    q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
+  } arm_fir_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q31 FIR filter.
+   */
+  typedef struct
+  {
+    uint16_t numTaps;         /**< number of filter coefficients in the filter. */
+    q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
+  } arm_fir_instance_q31;
+
+  /**
+   * @brief Instance structure for the floating-point FIR filter.
+   */
+  typedef struct
+  {
+    uint16_t numTaps;     /**< number of filter coefficients in the filter. */
+    float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+  } arm_fir_instance_f32;
+
+
+  /**
+   * @brief Processing function for the Q7 FIR filter.
+   * @param[in] *S points to an instance of the Q7 FIR filter structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+  void arm_fir_q7(
+		  const arm_fir_instance_q7 * S,
+		   q7_t * pSrc,
+		  q7_t * pDst,
+		  uint32_t blockSize);
+
+
+  /**
+   * @brief  Initialization function for the Q7 FIR filter.
+   * @param[in,out] *S points to an instance of the Q7 FIR structure.
+   * @param[in] numTaps  Number of filter coefficients in the filter.
+   * @param[in] *pCoeffs points to the filter coefficients.
+   * @param[in] *pState points to the state buffer.
+   * @param[in] blockSize number of samples that are processed.
+   * @return none
+   */
+  void arm_fir_init_q7(
+		       arm_fir_instance_q7 * S,
+		       uint16_t numTaps,
+		       q7_t * pCoeffs,
+		       q7_t * pState,
+		       uint32_t blockSize);
+
+
+  /**
+   * @brief Processing function for the Q15 FIR filter.
+   * @param[in] *S points to an instance of the Q15 FIR structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+  void arm_fir_q15(
+		   const arm_fir_instance_q15 * S,
+		    q15_t * pSrc,
+		   q15_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the fast Q15 FIR filter.
+   * @param[in] *S points to an instance of the Q15 FIR filter structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+  void arm_fir_fast_q15(
+			const arm_fir_instance_q15 * S,
+			 q15_t * pSrc,
+			q15_t * pDst,
+			uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the Q15 FIR filter.
+   * @param[in,out] *S points to an instance of the Q15 FIR filter structure.
+   * @param[in] numTaps  Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
+   * @param[in] *pCoeffs points to the filter coefficients.
+   * @param[in] *pState points to the state buffer.
+   * @param[in] blockSize number of samples that are processed at a time.
+   * @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
+   * <code>numTaps</code> is not a supported value.
+   */
+  arm_status arm_fir_init_q15(
+			      arm_fir_instance_q15 * S,
+			      uint16_t numTaps,
+			      q15_t * pCoeffs,
+			      q15_t * pState,
+			      uint32_t blockSize);
+
+
+  /**
+   * @brief Processing function for the Q31 FIR filter.
+   * @param[in] *S points to an instance of the Q31 FIR filter structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+  void arm_fir_q31(
+		   const arm_fir_instance_q31 * S,
+		    q31_t * pSrc,
+		   q31_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the fast Q31 FIR filter.
+   * @param[in] *S points to an instance of the Q31 FIR structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+  void arm_fir_fast_q31(
+			const arm_fir_instance_q31 * S,
+			 q31_t * pSrc,
+			q31_t * pDst,
+			uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the Q31 FIR filter.
+   * @param[in,out] *S points to an instance of the Q31 FIR structure.
+   * @param[in] 	numTaps  Number of filter coefficients in the filter.
+   * @param[in] 	*pCoeffs points to the filter coefficients.
+   * @param[in] 	*pState points to the state buffer.
+   * @param[in] 	blockSize number of samples that are processed at a time.
+   * @return 		none.
+   */
+  void arm_fir_init_q31(
+			arm_fir_instance_q31 * S,
+			uint16_t numTaps,
+			q31_t * pCoeffs,
+			q31_t * pState,
+			uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the floating-point FIR filter.
+   * @param[in] *S points to an instance of the floating-point FIR structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+  void arm_fir_f32(
+		   const arm_fir_instance_f32 * S,
+		    float32_t * pSrc,
+		   float32_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the floating-point FIR filter.
+   * @param[in,out] *S points to an instance of the floating-point FIR filter structure.
+   * @param[in] 	numTaps  Number of filter coefficients in the filter.
+   * @param[in] 	*pCoeffs points to the filter coefficients.
+   * @param[in] 	*pState points to the state buffer.
+   * @param[in] 	blockSize number of samples that are processed at a time.
+   * @return    	none.
+   */
+  void arm_fir_init_f32(
+			arm_fir_instance_f32 * S,
+			uint16_t numTaps,
+			float32_t * pCoeffs,
+			float32_t * pState,
+			uint32_t blockSize);
+
+
+  /**
+   * @brief Instance structure for the Q15 Biquad cascade filter.
+   */
+  typedef struct
+  {
+    int8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+    q15_t *pState;            /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    q15_t *pCoeffs;           /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+    int8_t postShift;         /**< Additional shift, in bits, applied to each output sample. */
+
+  } arm_biquad_casd_df1_inst_q15;
+
+
+  /**
+   * @brief Instance structure for the Q31 Biquad cascade filter.
+   */
+  typedef struct
+  {
+    uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+    q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+    uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
+
+  } arm_biquad_casd_df1_inst_q31;
+
+  /**
+   * @brief Instance structure for the floating-point Biquad cascade filter.
+   */
+  typedef struct
+  {
+    uint32_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+    float32_t *pState;          /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    float32_t *pCoeffs;         /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+
+
+  } arm_biquad_casd_df1_inst_f32;
+
+
+
+  /**
+   * @brief Processing function for the Q15 Biquad cascade filter.
+   * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
+   * @param[in]  *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in]  blockSize number of samples to process.
+   * @return     none.
+   */
+
+  void arm_biquad_cascade_df1_q15(
+				  const arm_biquad_casd_df1_inst_q15 * S,
+				   q15_t * pSrc,
+				  q15_t * pDst,
+				  uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the Q15 Biquad cascade filter.
+   * @param[in,out] *S           points to an instance of the Q15 Biquad cascade structure.
+   * @param[in]     numStages    number of 2nd order stages in the filter.
+   * @param[in]     *pCoeffs     points to the filter coefficients.
+   * @param[in]     *pState      points to the state buffer.
+   * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
+   * @return        none
+   */
+
+  void arm_biquad_cascade_df1_init_q15(
+				       arm_biquad_casd_df1_inst_q15 * S,
+				       uint8_t numStages,
+				       q15_t * pCoeffs,
+				       q15_t * pState,
+				       int8_t postShift);
+
+
+  /**
+   * @brief Fast but less precise processing function for the Q15 Biquad cascade filter.
+   * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
+   * @param[in]  *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in]  blockSize number of samples to process.
+   * @return     none.
+   */
+
+  void arm_biquad_cascade_df1_fast_q15(
+				       const arm_biquad_casd_df1_inst_q15 * S,
+				        q15_t * pSrc,
+				       q15_t * pDst,
+				       uint32_t blockSize);
+
+
+  /**
+   * @brief Processing function for the Q31 Biquad cascade filter
+   * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
+   * @param[in]  *pSrc      points to the block of input data.
+   * @param[out] *pDst      points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   * @return     none.
+   */
+
+  void arm_biquad_cascade_df1_q31(
+				  const arm_biquad_casd_df1_inst_q31 * S,
+				   q31_t * pSrc,
+				  q31_t * pDst,
+				  uint32_t blockSize);
+
+  /**
+   * @brief Fast but less precise processing function for the Q31 Biquad cascade filter.
+   * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
+   * @param[in]  *pSrc      points to the block of input data.
+   * @param[out] *pDst      points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   * @return     none.
+   */
+
+  void arm_biquad_cascade_df1_fast_q31(
+				       const arm_biquad_casd_df1_inst_q31 * S,
+				        q31_t * pSrc,
+				       q31_t * pDst,
+				       uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the Q31 Biquad cascade filter.
+   * @param[in,out] *S           points to an instance of the Q31 Biquad cascade structure.
+   * @param[in]     numStages      number of 2nd order stages in the filter.
+   * @param[in]     *pCoeffs     points to the filter coefficients.
+   * @param[in]     *pState      points to the state buffer.
+   * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
+   * @return        none
+   */
+
+  void arm_biquad_cascade_df1_init_q31(
+				       arm_biquad_casd_df1_inst_q31 * S,
+				       uint8_t numStages,
+				       q31_t * pCoeffs,
+				       q31_t * pState,
+				       int8_t postShift);
+
+  /**
+   * @brief Processing function for the floating-point Biquad cascade filter.
+   * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.
+   * @param[in]  *pSrc      points to the block of input data.
+   * @param[out] *pDst      points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   * @return     none.
+   */
+
+  void arm_biquad_cascade_df1_f32(
+				  const arm_biquad_casd_df1_inst_f32 * S,
+				   float32_t * pSrc,
+				  float32_t * pDst,
+				  uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the floating-point Biquad cascade filter.
+   * @param[in,out] *S           points to an instance of the floating-point Biquad cascade structure.
+   * @param[in]     numStages    number of 2nd order stages in the filter.
+   * @param[in]     *pCoeffs     points to the filter coefficients.
+   * @param[in]     *pState      points to the state buffer.
+   * @return        none
+   */
+
+  void arm_biquad_cascade_df1_init_f32(
+				       arm_biquad_casd_df1_inst_f32 * S,
+				       uint8_t numStages,
+				       float32_t * pCoeffs,
+				       float32_t * pState);
+
+
+  /**
+   * @brief Instance structure for the floating-point matrix structure.
+   */
+
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    float32_t *pData;     /**< points to the data of the matrix. */
+  } arm_matrix_instance_f32;
+
+  /**
+   * @brief Instance structure for the Q15 matrix structure.
+   */
+
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    q15_t *pData;         /**< points to the data of the matrix. */
+
+  } arm_matrix_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q31 matrix structure.
+   */
+
+  typedef struct
+  {
+    uint16_t numRows;     /**< number of rows of the matrix.     */
+    uint16_t numCols;     /**< number of columns of the matrix.  */
+    q31_t *pData;         /**< points to the data of the matrix. */
+
+  } arm_matrix_instance_q31;
+
+
+
+  /**
+   * @brief Floating-point matrix addition.
+   * @param[in]       *pSrcA points to the first input matrix structure
+   * @param[in]       *pSrcB points to the second input matrix structure
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_add_f32(
+			     const arm_matrix_instance_f32 * pSrcA,
+			     const arm_matrix_instance_f32 * pSrcB,
+			     arm_matrix_instance_f32 * pDst);
+
+  /**
+   * @brief Q15 matrix addition.
+   * @param[in]       *pSrcA points to the first input matrix structure
+   * @param[in]       *pSrcB points to the second input matrix structure
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_add_q15(
+			     const arm_matrix_instance_q15 * pSrcA,
+			     const arm_matrix_instance_q15 * pSrcB,
+			     arm_matrix_instance_q15 * pDst);
+
+  /**
+   * @brief Q31 matrix addition.
+   * @param[in]       *pSrcA points to the first input matrix structure
+   * @param[in]       *pSrcB points to the second input matrix structure
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_add_q31(
+			     const arm_matrix_instance_q31 * pSrcA,
+			     const arm_matrix_instance_q31 * pSrcB,
+			     arm_matrix_instance_q31 * pDst);
+
+
+  /**
+   * @brief Floating-point matrix transpose.
+   * @param[in]  *pSrc points to the input matrix
+   * @param[out] *pDst points to the output matrix
+   * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
+   * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_trans_f32(
+			       const arm_matrix_instance_f32 * pSrc,
+			       arm_matrix_instance_f32 * pDst);
+
+
+  /**
+   * @brief Q15 matrix transpose.
+   * @param[in]  *pSrc points to the input matrix
+   * @param[out] *pDst points to the output matrix
+   * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
+   * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_trans_q15(
+			       const arm_matrix_instance_q15 * pSrc,
+			       arm_matrix_instance_q15 * pDst);
+
+  /**
+   * @brief Q31 matrix transpose.
+   * @param[in]  *pSrc points to the input matrix
+   * @param[out] *pDst points to the output matrix
+   * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
+   * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_trans_q31(
+			       const arm_matrix_instance_q31 * pSrc,
+			       arm_matrix_instance_q31 * pDst);
+
+
+  /**
+   * @brief Floating-point matrix multiplication
+   * @param[in]       *pSrcA points to the first input matrix structure
+   * @param[in]       *pSrcB points to the second input matrix structure
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_mult_f32(
+			      const arm_matrix_instance_f32 * pSrcA,
+			      const arm_matrix_instance_f32 * pSrcB,
+			      arm_matrix_instance_f32 * pDst);
+
+  /**
+   * @brief Q15 matrix multiplication
+   * @param[in]       *pSrcA points to the first input matrix structure
+   * @param[in]       *pSrcB points to the second input matrix structure
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_mult_q15(
+			      const arm_matrix_instance_q15 * pSrcA,
+			      const arm_matrix_instance_q15 * pSrcB,
+			      arm_matrix_instance_q15 * pDst,
+			      q15_t * pState);
+
+  /**
+   * @brief Q15 matrix multiplication (fast variant)
+   * @param[in]       *pSrcA points to the first input matrix structure
+   * @param[in]       *pSrcB points to the second input matrix structure
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_mult_fast_q15(
+				   const arm_matrix_instance_q15 * pSrcA,
+				   const arm_matrix_instance_q15 * pSrcB,
+				   arm_matrix_instance_q15 * pDst,
+				   q15_t * pState);
+
+  /**
+   * @brief Q31 matrix multiplication
+   * @param[in]       *pSrcA points to the first input matrix structure
+   * @param[in]       *pSrcB points to the second input matrix structure
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_mult_q31(
+			      const arm_matrix_instance_q31 * pSrcA,
+			      const arm_matrix_instance_q31 * pSrcB,
+			      arm_matrix_instance_q31 * pDst);
+
+  /**
+   * @brief Q31 matrix multiplication (fast variant)
+   * @param[in]       *pSrcA points to the first input matrix structure
+   * @param[in]       *pSrcB points to the second input matrix structure
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_mult_fast_q31(
+				   const arm_matrix_instance_q31 * pSrcA,
+				   const arm_matrix_instance_q31 * pSrcB,
+				   arm_matrix_instance_q31 * pDst);
+
+
+  /**
+   * @brief Floating-point matrix subtraction
+   * @param[in]       *pSrcA points to the first input matrix structure
+   * @param[in]       *pSrcB points to the second input matrix structure
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_sub_f32(
+			     const arm_matrix_instance_f32 * pSrcA,
+			     const arm_matrix_instance_f32 * pSrcB,
+			     arm_matrix_instance_f32 * pDst);
+
+  /**
+   * @brief Q15 matrix subtraction
+   * @param[in]       *pSrcA points to the first input matrix structure
+   * @param[in]       *pSrcB points to the second input matrix structure
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_sub_q15(
+			     const arm_matrix_instance_q15 * pSrcA,
+			     const arm_matrix_instance_q15 * pSrcB,
+			     arm_matrix_instance_q15 * pDst);
+
+  /**
+   * @brief Q31 matrix subtraction
+   * @param[in]       *pSrcA points to the first input matrix structure
+   * @param[in]       *pSrcB points to the second input matrix structure
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_sub_q31(
+			     const arm_matrix_instance_q31 * pSrcA,
+			     const arm_matrix_instance_q31 * pSrcB,
+			     arm_matrix_instance_q31 * pDst);
+
+  /**
+   * @brief Floating-point matrix scaling.
+   * @param[in]  *pSrc points to the input matrix
+   * @param[in]  scale scale factor
+   * @param[out] *pDst points to the output matrix
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_scale_f32(
+			       const arm_matrix_instance_f32 * pSrc,
+			       float32_t scale,
+			       arm_matrix_instance_f32 * pDst);
+
+  /**
+   * @brief Q15 matrix scaling.
+   * @param[in]       *pSrc points to input matrix
+   * @param[in]       scaleFract fractional portion of the scale factor
+   * @param[in]       shift number of bits to shift the result by
+   * @param[out]      *pDst points to output matrix
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_scale_q15(
+			       const arm_matrix_instance_q15 * pSrc,
+			       q15_t scaleFract,
+			       int32_t shift,
+			       arm_matrix_instance_q15 * pDst);
+
+  /**
+   * @brief Q31 matrix scaling.
+   * @param[in]       *pSrc points to input matrix
+   * @param[in]       scaleFract fractional portion of the scale factor
+   * @param[in]       shift number of bits to shift the result by
+   * @param[out]      *pDst points to output matrix structure
+   * @return     The function returns either
+   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS based on the outcome of size checking.
+   */
+
+  arm_status arm_mat_scale_q31(
+			       const arm_matrix_instance_q31 * pSrc,
+			       q31_t scaleFract,
+			       int32_t shift,
+			       arm_matrix_instance_q31 * pDst);
+
+
+  /**
+   * @brief  Q31 matrix initialization.
+   * @param[in,out] *S             points to an instance of the floating-point matrix structure.
+   * @param[in]     nRows          number of rows in the matrix.
+   * @param[in]     nColumns       number of columns in the matrix.
+   * @param[in]     *pData	       points to the matrix data array.
+   * @return        none
+   */
+
+  void arm_mat_init_q31(
+			arm_matrix_instance_q31 * S,
+			uint16_t nRows,
+			uint16_t nColumns,
+			q31_t   *pData);
+
+  /**
+   * @brief  Q15 matrix initialization.
+   * @param[in,out] *S             points to an instance of the floating-point matrix structure.
+   * @param[in]     nRows          number of rows in the matrix.
+   * @param[in]     nColumns       number of columns in the matrix.
+   * @param[in]     *pData	       points to the matrix data array.
+   * @return        none
+   */
+
+  void arm_mat_init_q15(
+			arm_matrix_instance_q15 * S,
+			uint16_t nRows,
+			uint16_t nColumns,
+			q15_t    *pData);
+
+  /**
+   * @brief  Floating-point matrix initialization.
+   * @param[in,out] *S             points to an instance of the floating-point matrix structure.
+   * @param[in]     nRows          number of rows in the matrix.
+   * @param[in]     nColumns       number of columns in the matrix.
+   * @param[in]     *pData	       points to the matrix data array.
+   * @return        none
+   */
+
+  void arm_mat_init_f32(
+			arm_matrix_instance_f32 * S,
+			uint16_t nRows,
+			uint16_t nColumns,
+			float32_t   *pData);
+
+
+
+  /**
+   * @brief Instance structure for the Q15 PID Control.
+   */
+  typedef struct
+  {
+    q15_t A0;           /**< The derived gain, A0 = Kp + Ki + Kd . */
+    q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd. */
+    q15_t state[3];       /**< The state array of length 3. */
+    q15_t Kp;           /**< The proportional gain. */
+    q15_t Ki;           /**< The integral gain. */
+    q15_t Kd;           /**< The derivative gain. */
+  } arm_pid_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q31 PID Control.
+   */
+  typedef struct
+  {
+    q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
+    q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
+    q31_t A2;            /**< The derived gain, A2 = Kd . */
+    q31_t state[3];      /**< The state array of length 3. */
+    q31_t Kp;            /**< The proportional gain. */
+    q31_t Ki;            /**< The integral gain. */
+    q31_t Kd;            /**< The derivative gain. */
+
+  } arm_pid_instance_q31;
+
+  /**
+   * @brief Instance structure for the floating-point PID Control.
+   */
+  typedef struct
+  {
+    float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
+    float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
+    float32_t A2;          /**< The derived gain, A2 = Kd . */
+    float32_t state[3];    /**< The state array of length 3. */
+    float32_t Kp;               /**< The proportional gain. */
+    float32_t Ki;               /**< The integral gain. */
+    float32_t Kd;               /**< The derivative gain. */
+  } arm_pid_instance_f32;
+
+
+
+  /**
+   * @brief  Initialization function for the floating-point PID Control.
+   * @param[in,out] *S      points to an instance of the PID structure.
+   * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
+   * @return none.
+   */
+  void arm_pid_init_f32(
+			arm_pid_instance_f32 * S,
+			int32_t resetStateFlag);
+
+  /**
+   * @brief  Reset function for the floating-point PID Control.
+   * @param[in,out] *S is an instance of the floating-point PID Control structure
+   * @return none
+   */
+  void arm_pid_reset_f32(
+			 arm_pid_instance_f32 * S);
+
+
+  /**
+   * @brief  Initialization function for the Q31 PID Control.
+   * @param[in,out] *S points to an instance of the Q15 PID structure.
+   * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
+   * @return none.
+   */
+  void arm_pid_init_q31(
+			arm_pid_instance_q31 * S,
+			int32_t resetStateFlag);
+
+ 
+  /**
+   * @brief  Reset function for the Q31 PID Control.
+   * @param[in,out] *S points to an instance of the Q31 PID Control structure
+   * @return none
+   */
+
+  void arm_pid_reset_q31(
+			 arm_pid_instance_q31 * S);
+
+  /**
+   * @brief  Initialization function for the Q15 PID Control.
+   * @param[in,out] *S points to an instance of the Q15 PID structure.
+   * @param[in] resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
+   * @return none.
+   */
+  void arm_pid_init_q15(
+			arm_pid_instance_q15 * S,
+			int32_t resetStateFlag);
+
+  /**
+   * @brief  Reset function for the Q15 PID Control.
+   * @param[in,out] *S points to an instance of the q15 PID Control structure
+   * @return none
+   */
+  void arm_pid_reset_q15(
+			 arm_pid_instance_q15 * S);
+
+
+  /**
+   * @brief Instance structure for the floating-point Linear Interpolate function.
+   */
+  typedef struct
+  {
+    uint32_t nValues;
+    float32_t x1;
+    float32_t xSpacing;
+    float32_t *pYData;          /**< pointer to the table of Y values */
+  } arm_linear_interp_instance_f32;
+
+  /**
+   * @brief Instance structure for the floating-point bilinear interpolation function.
+   */
+
+  typedef struct
+  {
+    uint16_t numRows;	/**< number of rows in the data table. */
+    uint16_t numCols;	/**< number of columns in the data table. */
+    float32_t *pData;	/**< points to the data table. */
+  } arm_bilinear_interp_instance_f32;
+
+   /**
+   * @brief Instance structure for the Q31 bilinear interpolation function.
+   */
+
+  typedef struct
+  {
+    uint16_t numRows;	/**< number of rows in the data table. */
+    uint16_t numCols;	/**< number of columns in the data table. */
+    q31_t *pData;	/**< points to the data table. */
+  } arm_bilinear_interp_instance_q31;
+
+   /**
+   * @brief Instance structure for the Q15 bilinear interpolation function.
+   */
+
+  typedef struct
+  {
+    uint16_t numRows;	/**< number of rows in the data table. */
+    uint16_t numCols;	/**< number of columns in the data table. */
+    q15_t *pData;	/**< points to the data table. */
+  } arm_bilinear_interp_instance_q15;
+
+   /**
+   * @brief Instance structure for the Q15 bilinear interpolation function.
+   */
+
+  typedef struct
+  {
+    uint16_t numRows; 	/**< number of rows in the data table. */
+    uint16_t numCols;	/**< number of columns in the data table. */
+    q7_t *pData;		/**< points to the data table. */
+  } arm_bilinear_interp_instance_q7;
+
+
+  /**
+   * @brief Q7 vector multiplication.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst  points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_mult_q7(
+		    q7_t * pSrcA,
+		    q7_t * pSrcB,
+		   q7_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Q15 vector multiplication.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst  points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_mult_q15(
+		     q15_t * pSrcA,
+		     q15_t * pSrcB,
+		    q15_t * pDst,
+		    uint32_t blockSize);
+
+  /**
+   * @brief Q31 vector multiplication.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_mult_q31(
+		     q31_t * pSrcA,
+		     q31_t * pSrcB,
+		    q31_t * pDst,
+		    uint32_t blockSize);
+
+  /**
+   * @brief Floating-point vector multiplication.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_mult_f32(
+		     float32_t * pSrcA,
+		     float32_t * pSrcB,
+		    float32_t * pDst,
+		    uint32_t blockSize);
+
+
+  /**
+   * @brief Instance structure for the Q15 CFFT/CIFFT function.
+   */
+
+  typedef struct
+  {
+    uint16_t  fftLen;                /**< length of the FFT. */
+    uint8_t   ifftFlag;              /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+    uint8_t   bitReverseFlag;        /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    q15_t     *pTwiddle;             /**< points to the twiddle factor table. */
+    uint16_t  *pBitRevTable;         /**< points to the bit reversal table. */
+    uint16_t  twidCoefModifier;      /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    uint16_t  bitRevFactor;          /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+  } arm_cfft_radix4_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q31 CFFT/CIFFT function.
+   */
+
+  typedef struct
+  {
+    uint16_t    fftLen;              /**< length of the FFT. */
+    uint8_t     ifftFlag;            /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+    uint8_t     bitReverseFlag;      /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    q31_t       *pTwiddle;           /**< points to the twiddle factor table. */
+    uint16_t    *pBitRevTable;       /**< points to the bit reversal table. */
+    uint16_t    twidCoefModifier;    /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    uint16_t    bitRevFactor;        /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+  } arm_cfft_radix4_instance_q31;
+
+  /**
+   * @brief Instance structure for the floating-point CFFT/CIFFT function.
+   */
+
+  typedef struct
+  {
+    uint16_t     fftLen;               /**< length of the FFT. */
+    uint8_t      ifftFlag;             /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
+    uint8_t      bitReverseFlag;       /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
+    float32_t    *pTwiddle;            /**< points to the twiddle factor table. */
+    uint16_t     *pBitRevTable;        /**< points to the bit reversal table. */
+    uint16_t     twidCoefModifier;     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    uint16_t     bitRevFactor;         /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
+	float32_t    onebyfftLen;          /**< value of 1/fftLen. */
+  } arm_cfft_radix4_instance_f32;
+
+  /**
+   * @brief Processing function for the Q15 CFFT/CIFFT.
+   * @param[in]      *S    points to an instance of the Q15 CFFT/CIFFT structure.
+   * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
+   * @return none.
+   */
+
+  void arm_cfft_radix4_q15(
+			   const arm_cfft_radix4_instance_q15 * S,
+			   q15_t * pSrc);
+
+  /**
+   * @brief Initialization function for the Q15 CFFT/CIFFT.
+   * @param[in,out] *S             points to an instance of the Q15 CFFT/CIFFT structure.
+   * @param[in]     fftLen         length of the FFT.
+   * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
+   * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
+   * @return        arm_status     function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
+   */
+
+  arm_status arm_cfft_radix4_init_q15(
+				      arm_cfft_radix4_instance_q15 * S,
+				      uint16_t fftLen,
+				      uint8_t ifftFlag,
+				      uint8_t bitReverseFlag);
+
+  /**
+   * @brief Processing function for the Q31 CFFT/CIFFT.
+   * @param[in]      *S    points to an instance of the Q31 CFFT/CIFFT structure.
+   * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
+   * @return none.
+   */
+
+  void arm_cfft_radix4_q31(
+			   const arm_cfft_radix4_instance_q31 * S,
+			   q31_t * pSrc);
+
+  /**
+   * @brief  Initialization function for the Q31 CFFT/CIFFT.
+   * @param[in,out] *S             points to an instance of the Q31 CFFT/CIFFT structure.
+   * @param[in]     fftLen         length of the FFT.
+   * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
+   * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
+   * @return        arm_status     function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
+   */
+  
+  arm_status arm_cfft_radix4_init_q31(
+				      arm_cfft_radix4_instance_q31 * S,
+				      uint16_t fftLen,
+				      uint8_t ifftFlag,
+				      uint8_t bitReverseFlag);
+
+  /**
+   * @brief Processing function for the floating-point CFFT/CIFFT.
+   * @param[in]      *S    points to an instance of the floating-point CFFT/CIFFT structure.
+   * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place.
+   * @return none.
+   */
+
+  void arm_cfft_radix4_f32(
+			   const arm_cfft_radix4_instance_f32 * S,
+			   float32_t * pSrc);
+
+  /**
+   * @brief  Initialization function for the floating-point CFFT/CIFFT.
+   * @param[in,out] *S             points to an instance of the floating-point CFFT/CIFFT structure.
+   * @param[in]     fftLen         length of the FFT.
+   * @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
+   * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
+   * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
+   */
+  
+  arm_status arm_cfft_radix4_init_f32(
+				      arm_cfft_radix4_instance_f32 * S,
+				      uint16_t fftLen,
+				      uint8_t ifftFlag,
+				      uint8_t bitReverseFlag);
+
+
+
+  /*----------------------------------------------------------------------
+   *		Internal functions prototypes FFT function
+   ----------------------------------------------------------------------*/
+
+  /**
+   * @brief  Core function for the floating-point CFFT butterfly process.
+   * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
+   * @param[in]      fftLen           length of the FFT.
+   * @param[in]      *pCoef           points to the twiddle coefficient buffer.
+   * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+   * @return none.
+   */
+  
+  void arm_radix4_butterfly_f32(
+				float32_t * pSrc,
+				uint16_t fftLen,
+				float32_t * pCoef,
+				uint16_t twidCoefModifier);
+
+  /**
+   * @brief  Core function for the floating-point CIFFT butterfly process.
+   * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
+   * @param[in]      fftLen           length of the FFT.
+   * @param[in]      *pCoef           points to twiddle coefficient buffer.
+   * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+   * @param[in]      onebyfftLen      value of 1/fftLen.
+   * @return none.
+   */
+  
+  void arm_radix4_butterfly_inverse_f32(
+					float32_t * pSrc,
+					uint16_t fftLen,
+					float32_t * pCoef,
+					uint16_t twidCoefModifier,
+					float32_t onebyfftLen);
+
+  /**
+   * @brief  In-place bit reversal function.
+   * @param[in, out] *pSrc        points to the in-place buffer of floating-point data type.
+   * @param[in]      fftSize      length of the FFT.
+   * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table.
+   * @param[in]      *pBitRevTab  points to the bit reversal table.
+   * @return none.
+   */
+
+  void arm_bitreversal_f32(
+			   float32_t *pSrc,
+			   uint16_t fftSize,
+			   uint16_t bitRevFactor,
+			   uint16_t *pBitRevTab);
+
+  /**
+   * @brief  Core function for the Q31 CFFT butterfly process.
+   * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.
+   * @param[in]      fftLen           length of the FFT.
+   * @param[in]      *pCoef           points to twiddle coefficient buffer.
+   * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+   * @return none.
+   */
+  
+  void arm_radix4_butterfly_q31(
+				q31_t *pSrc,
+				uint32_t fftLen,
+				q31_t *pCoef,
+				uint32_t twidCoefModifier);
+
+  /**
+   * @brief  Core function for the Q31 CIFFT butterfly process.
+   * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.
+   * @param[in]      fftLen           length of the FFT.
+   * @param[in]      *pCoef           points to twiddle coefficient buffer.
+   * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+   * @return none.
+   */
+  
+  void arm_radix4_butterfly_inverse_q31(
+					q31_t * pSrc,
+					uint32_t fftLen,
+					q31_t * pCoef,
+					uint32_t twidCoefModifier);
+  
+  /**
+   * @brief  In-place bit reversal function.
+   * @param[in, out] *pSrc        points to the in-place buffer of Q31 data type.
+   * @param[in]      fftLen       length of the FFT.
+   * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
+   * @param[in]      *pBitRevTab  points to bit reversal table.
+   * @return none.
+   */
+
+  void arm_bitreversal_q31(
+			   q31_t * pSrc,
+			   uint32_t fftLen,
+			   uint16_t bitRevFactor,
+			   uint16_t *pBitRevTab);
+
+  /**
+   * @brief  Core function for the Q15 CFFT butterfly process.
+   * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
+   * @param[in]      fftLen           length of the FFT.
+   * @param[in]      *pCoef16         points to twiddle coefficient buffer.
+   * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+   * @return none.
+   */
+
+  void arm_radix4_butterfly_q15(
+				q15_t *pSrc16,
+				uint32_t fftLen,
+				q15_t *pCoef16,
+				uint32_t twidCoefModifier);
+
+  /**
+   * @brief  Core function for the Q15 CIFFT butterfly process.
+   * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type.
+   * @param[in]      fftLen           length of the FFT.
+   * @param[in]      *pCoef16         points to twiddle coefficient buffer.
+   * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+   * @return none.
+   */
+
+  void arm_radix4_butterfly_inverse_q15(
+					q15_t *pSrc16,
+					uint32_t fftLen,
+					q15_t *pCoef16,
+					uint32_t twidCoefModifier);
+
+  /**
+   * @brief  In-place bit reversal function.
+   * @param[in, out] *pSrc        points to the in-place buffer of Q15 data type.
+   * @param[in]      fftLen       length of the FFT.
+   * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
+   * @param[in]      *pBitRevTab  points to bit reversal table.
+   * @return none.
+   */
+
+  void arm_bitreversal_q15(
+			   q15_t * pSrc,
+			   uint32_t fftLen,
+			   uint16_t bitRevFactor,
+			   uint16_t *pBitRevTab);
+
+  /**
+   * @brief Instance structure for the Q15 RFFT/RIFFT function.
+   */
+
+  typedef struct
+  {
+    uint32_t fftLenReal;                      /**< length of the real FFT. */
+    uint32_t fftLenBy2;                       /**< length of the complex FFT. */
+    uint8_t  ifftFlagR;                       /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+	uint8_t  bitReverseFlagR;                 /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+    uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */  
+    q15_t    *pTwiddleAReal;                  /**< points to the real twiddle factor table. */
+    q15_t    *pTwiddleBReal;                  /**< points to the imag twiddle factor table. */
+    arm_cfft_radix4_instance_q15 *pCfft;	  /**< points to the complex FFT instance. */
+  } arm_rfft_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q31 RFFT/RIFFT function.
+   */
+
+  typedef struct
+  {
+    uint32_t fftLenReal;                        /**< length of the real FFT. */
+    uint32_t fftLenBy2;                         /**< length of the complex FFT. */
+    uint8_t  ifftFlagR;                         /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+	uint8_t  bitReverseFlagR;                   /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+    uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    q31_t    *pTwiddleAReal;                    /**< points to the real twiddle factor table. */
+    q31_t    *pTwiddleBReal;                    /**< points to the imag twiddle factor table. */
+    arm_cfft_radix4_instance_q31 *pCfft;        /**< points to the complex FFT instance. */
+  } arm_rfft_instance_q31;
+
+  /**
+   * @brief Instance structure for the floating-point RFFT/RIFFT function.
+   */
+
+  typedef struct
+  {
+    uint32_t  fftLenReal;                       /**< length of the real FFT. */
+    uint16_t  fftLenBy2;                        /**< length of the complex FFT. */
+    uint8_t   ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
+    uint8_t   bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
+	uint32_t  twidCoefRModifier;                /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
+    float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
+    float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
+    arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
+  } arm_rfft_instance_f32;
+
+  /**
+   * @brief Processing function for the Q15 RFFT/RIFFT.
+   * @param[in]  *S    points to an instance of the Q15 RFFT/RIFFT structure.
+   * @param[in]  *pSrc points to the input buffer.
+   * @param[out] *pDst points to the output buffer.
+   * @return none.
+   */
+
+  void arm_rfft_q15(
+		    const arm_rfft_instance_q15 * S,
+		    q15_t * pSrc,
+		    q15_t * pDst);
+
+  /**
+   * @brief  Initialization function for the Q15 RFFT/RIFFT.
+   * @param[in, out] *S             points to an instance of the Q15 RFFT/RIFFT structure.
+   * @param[in]      *S_CFFT        points to an instance of the Q15 CFFT/CIFFT structure.
+   * @param[in]      fftLenReal     length of the FFT.
+   * @param[in]      ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
+   * @param[in]      bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
+   * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
+   */
+
+  arm_status arm_rfft_init_q15(
+			       arm_rfft_instance_q15 * S,
+			       arm_cfft_radix4_instance_q15 * S_CFFT,
+			       uint32_t fftLenReal,
+			       uint32_t ifftFlagR,
+			       uint32_t bitReverseFlag);
+
+  /**
+   * @brief Processing function for the Q31 RFFT/RIFFT.
+   * @param[in]  *S    points to an instance of the Q31 RFFT/RIFFT structure.
+   * @param[in]  *pSrc points to the input buffer.
+   * @param[out] *pDst points to the output buffer.
+   * @return none.
+   */
+
+  void arm_rfft_q31(
+		    const arm_rfft_instance_q31 * S,
+		    q31_t * pSrc,
+		    q31_t * pDst);
+
+  /**
+   * @brief  Initialization function for the Q31 RFFT/RIFFT.
+   * @param[in, out] *S             points to an instance of the Q31 RFFT/RIFFT structure.
+   * @param[in, out] *S_CFFT        points to an instance of the Q31 CFFT/CIFFT structure.
+   * @param[in]      fftLenReal     length of the FFT.
+   * @param[in]      ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
+   * @param[in]      bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
+   * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
+   */
+
+  arm_status arm_rfft_init_q31(
+			       arm_rfft_instance_q31 * S,
+			       arm_cfft_radix4_instance_q31 * S_CFFT,
+			       uint32_t fftLenReal,
+			       uint32_t ifftFlagR,
+			       uint32_t bitReverseFlag);
+
+  /**
+   * @brief  Initialization function for the floating-point RFFT/RIFFT.
+   * @param[in,out] *S             points to an instance of the floating-point RFFT/RIFFT structure.
+   * @param[in,out] *S_CFFT        points to an instance of the floating-point CFFT/CIFFT structure.
+   * @param[in]     fftLenReal     length of the FFT.
+   * @param[in]     ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform.
+   * @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
+   * @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value.
+   */
+
+  arm_status arm_rfft_init_f32(
+			       arm_rfft_instance_f32 * S,
+			       arm_cfft_radix4_instance_f32 * S_CFFT,
+			       uint32_t fftLenReal,
+			       uint32_t ifftFlagR,
+			       uint32_t bitReverseFlag);
+
+  /**
+   * @brief Processing function for the floating-point RFFT/RIFFT.
+   * @param[in]  *S    points to an instance of the floating-point RFFT/RIFFT structure.
+   * @param[in]  *pSrc points to the input buffer.
+   * @param[out] *pDst points to the output buffer.
+   * @return none.
+   */
+
+  void arm_rfft_f32(
+		    const arm_rfft_instance_f32 * S,
+		    float32_t * pSrc,
+		    float32_t * pDst);
+
+  /**
+   * @brief Instance structure for the floating-point DCT4/IDCT4 function.
+   */
+
+  typedef struct
+  {
+    uint16_t N;                         /**< length of the DCT4. */
+    uint16_t Nby2;                      /**< half of the length of the DCT4. */
+    float32_t normalize;                /**< normalizing factor. */
+    float32_t *pTwiddle;                /**< points to the twiddle factor table. */
+    float32_t *pCosFactor;              /**< points to the cosFactor table. */
+    arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
+    arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
+  } arm_dct4_instance_f32;
+
+  /**
+   * @brief  Initialization function for the floating-point DCT4/IDCT4.
+   * @param[in,out] *S         points to an instance of floating-point DCT4/IDCT4 structure.
+   * @param[in]     *S_RFFT    points to an instance of floating-point RFFT/RIFFT structure.
+   * @param[in]     *S_CFFT    points to an instance of floating-point CFFT/CIFFT structure.
+   * @param[in]     N          length of the DCT4.
+   * @param[in]     Nby2       half of the length of the DCT4.
+   * @param[in]     normalize  normalizing factor.
+   * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
+   */
+
+  arm_status arm_dct4_init_f32(
+			       arm_dct4_instance_f32 * S,
+			       arm_rfft_instance_f32 * S_RFFT,
+			       arm_cfft_radix4_instance_f32 * S_CFFT,
+			       uint16_t N,
+			       uint16_t Nby2,
+			       float32_t normalize);
+
+  /**
+   * @brief Processing function for the floating-point DCT4/IDCT4.
+   * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure.
+   * @param[in]       *pState        points to state buffer.
+   * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
+   * @return none.
+   */
+
+  void arm_dct4_f32(
+		    const arm_dct4_instance_f32 * S,
+		    float32_t * pState,
+		    float32_t * pInlineBuffer);
+
+  /**
+   * @brief Instance structure for the Q31 DCT4/IDCT4 function.
+   */
+
+  typedef struct
+  {
+    uint16_t N;                         /**< length of the DCT4. */
+    uint16_t Nby2;                      /**< half of the length of the DCT4. */
+    q31_t normalize;                    /**< normalizing factor. */
+    q31_t *pTwiddle;                    /**< points to the twiddle factor table. */
+    q31_t *pCosFactor;                  /**< points to the cosFactor table. */
+    arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
+    arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
+  } arm_dct4_instance_q31;
+
+  /**
+   * @brief  Initialization function for the Q31 DCT4/IDCT4.
+   * @param[in,out] *S         points to an instance of Q31 DCT4/IDCT4 structure.
+   * @param[in]     *S_RFFT    points to an instance of Q31 RFFT/RIFFT structure
+   * @param[in]     *S_CFFT    points to an instance of Q31 CFFT/CIFFT structure
+   * @param[in]     N          length of the DCT4.
+   * @param[in]     Nby2       half of the length of the DCT4.
+   * @param[in]     normalize  normalizing factor.
+   * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
+   */
+
+  arm_status arm_dct4_init_q31(
+			       arm_dct4_instance_q31 * S,
+			       arm_rfft_instance_q31 * S_RFFT,
+			       arm_cfft_radix4_instance_q31 * S_CFFT,
+			       uint16_t N,
+			       uint16_t Nby2,
+			       q31_t normalize);
+
+  /**
+   * @brief Processing function for the Q31 DCT4/IDCT4.
+   * @param[in]       *S             points to an instance of the Q31 DCT4 structure.
+   * @param[in]       *pState        points to state buffer.
+   * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
+   * @return none.
+   */
+
+  void arm_dct4_q31(
+		    const arm_dct4_instance_q31 * S,
+		    q31_t * pState,
+		    q31_t * pInlineBuffer);
+
+  /**
+   * @brief Instance structure for the Q15 DCT4/IDCT4 function.
+   */
+
+  typedef struct
+  {
+    uint16_t N;                         /**< length of the DCT4. */
+    uint16_t Nby2;                      /**< half of the length of the DCT4. */
+    q15_t normalize;                    /**< normalizing factor. */
+    q15_t *pTwiddle;                    /**< points to the twiddle factor table. */
+    q15_t *pCosFactor;                  /**< points to the cosFactor table. */
+    arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
+    arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
+  } arm_dct4_instance_q15;
+
+  /**
+   * @brief  Initialization function for the Q15 DCT4/IDCT4.
+   * @param[in,out] *S         points to an instance of Q15 DCT4/IDCT4 structure.
+   * @param[in]     *S_RFFT    points to an instance of Q15 RFFT/RIFFT structure.
+   * @param[in]     *S_CFFT    points to an instance of Q15 CFFT/CIFFT structure.
+   * @param[in]     N          length of the DCT4.
+   * @param[in]     Nby2       half of the length of the DCT4.
+   * @param[in]     normalize  normalizing factor.
+   * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
+   */
+
+  arm_status arm_dct4_init_q15(
+			       arm_dct4_instance_q15 * S,
+			       arm_rfft_instance_q15 * S_RFFT,
+			       arm_cfft_radix4_instance_q15 * S_CFFT,
+			       uint16_t N,
+			       uint16_t Nby2,
+			       q15_t normalize);
+
+  /**
+   * @brief Processing function for the Q15 DCT4/IDCT4.
+   * @param[in]       *S             points to an instance of the Q15 DCT4 structure.
+   * @param[in]       *pState        points to state buffer.
+   * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
+   * @return none.
+   */
+
+  void arm_dct4_q15(
+		    const arm_dct4_instance_q15 * S,
+		    q15_t * pState,
+		    q15_t * pInlineBuffer);
+
+  /**
+   * @brief Floating-point vector addition.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_add_f32(
+		   float32_t * pSrcA,
+		   float32_t * pSrcB,
+		   float32_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Q7 vector addition.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_add_q7(
+		  q7_t * pSrcA,
+		  q7_t * pSrcB,
+		  q7_t * pDst,
+		  uint32_t blockSize);
+
+  /**
+   * @brief Q15 vector addition.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_add_q15(
+		    q15_t * pSrcA,
+		    q15_t * pSrcB,
+		   q15_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Q31 vector addition.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_add_q31(
+		    q31_t * pSrcA,
+		    q31_t * pSrcB,
+		   q31_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Floating-point vector subtraction.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_sub_f32(
+		    float32_t * pSrcA,
+		    float32_t * pSrcB,
+		   float32_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Q7 vector subtraction.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_sub_q7(
+		   q7_t * pSrcA,
+		   q7_t * pSrcB,
+		  q7_t * pDst,
+		  uint32_t blockSize);
+
+  /**
+   * @brief Q15 vector subtraction.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_sub_q15(
+		    q15_t * pSrcA,
+		    q15_t * pSrcB,
+		   q15_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Q31 vector subtraction.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_sub_q31(
+		    q31_t * pSrcA,
+		    q31_t * pSrcB,
+		   q31_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Multiplies a floating-point vector by a scalar.
+   * @param[in]       *pSrc points to the input vector
+   * @param[in]       scale scale factor to be applied
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_scale_f32(
+		      float32_t * pSrc,
+		     float32_t scale,
+		     float32_t * pDst,
+		     uint32_t blockSize);
+
+  /**
+   * @brief Multiplies a Q7 vector by a scalar.
+   * @param[in]       *pSrc points to the input vector
+   * @param[in]       scaleFract fractional portion of the scale value
+   * @param[in]       shift number of bits to shift the result by
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_scale_q7(
+		     q7_t * pSrc,
+		    q7_t scaleFract,
+		    int8_t shift,
+		    q7_t * pDst,
+		    uint32_t blockSize);
+
+  /**
+   * @brief Multiplies a Q15 vector by a scalar.
+   * @param[in]       *pSrc points to the input vector
+   * @param[in]       scaleFract fractional portion of the scale value
+   * @param[in]       shift number of bits to shift the result by
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_scale_q15(
+		      q15_t * pSrc,
+		     q15_t scaleFract,
+		     int8_t shift,
+		     q15_t * pDst,
+		     uint32_t blockSize);
+
+  /**
+   * @brief Multiplies a Q31 vector by a scalar.
+   * @param[in]       *pSrc points to the input vector
+   * @param[in]       scaleFract fractional portion of the scale value
+   * @param[in]       shift number of bits to shift the result by
+   * @param[out]      *pDst points to the output vector
+   * @param[in]       blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_scale_q31(
+		      q31_t * pSrc,
+		     q31_t scaleFract,
+		     int8_t shift,
+		     q31_t * pDst,
+		     uint32_t blockSize);
+
+  /**
+   * @brief Q7 vector absolute value.
+   * @param[in]       *pSrc points to the input buffer
+   * @param[out]      *pDst points to the output buffer
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_abs_q7(
+		   q7_t * pSrc,
+		  q7_t * pDst,
+		  uint32_t blockSize);
+
+  /**
+   * @brief Floating-point vector absolute value.
+   * @param[in]       *pSrc points to the input buffer
+   * @param[out]      *pDst points to the output buffer
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_abs_f32(
+		    float32_t * pSrc,
+		   float32_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Q15 vector absolute value.
+   * @param[in]       *pSrc points to the input buffer
+   * @param[out]      *pDst points to the output buffer
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_abs_q15(
+		    q15_t * pSrc,
+		   q15_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Q31 vector absolute value.
+   * @param[in]       *pSrc points to the input buffer
+   * @param[out]      *pDst points to the output buffer
+   * @param[in]       blockSize number of samples in each vector
+   * @return none.
+   */
+
+  void arm_abs_q31(
+		    q31_t * pSrc,
+		   q31_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Dot product of floating-point vectors.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[in]       blockSize number of samples in each vector
+   * @param[out]      *result output result returned here
+   * @return none.
+   */
+
+  void arm_dot_prod_f32(
+			 float32_t * pSrcA,
+			 float32_t * pSrcB,
+			uint32_t blockSize,
+			float32_t * result);
+
+  /**
+   * @brief Dot product of Q7 vectors.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[in]       blockSize number of samples in each vector
+   * @param[out]      *result output result returned here
+   * @return none.
+   */
+
+  void arm_dot_prod_q7(
+		        q7_t * pSrcA,
+		        q7_t * pSrcB,
+		       uint32_t blockSize,
+		       q31_t * result);
+
+  /**
+   * @brief Dot product of Q15 vectors.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[in]       blockSize number of samples in each vector
+   * @param[out]      *result output result returned here
+   * @return none.
+   */
+
+  void arm_dot_prod_q15(
+			 q15_t * pSrcA,
+			 q15_t * pSrcB,
+			uint32_t blockSize,
+			q63_t * result);
+
+  /**
+   * @brief Dot product of Q31 vectors.
+   * @param[in]       *pSrcA points to the first input vector
+   * @param[in]       *pSrcB points to the second input vector
+   * @param[in]       blockSize number of samples in each vector
+   * @param[out]      *result output result returned here
+   * @return none.
+   */
+
+  void arm_dot_prod_q31(
+			 q31_t * pSrcA,
+			 q31_t * pSrcB,
+			uint32_t blockSize,
+			q63_t * result);
+
+  /**
+   * @brief  Shifts the elements of a Q7 vector a specified number of bits.
+   * @param[in]  *pSrc points to the input vector
+   * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_shift_q7(
+		     q7_t * pSrc,
+		    int8_t shiftBits,
+		    q7_t * pDst,
+		    uint32_t blockSize);
+
+  /**
+   * @brief  Shifts the elements of a Q15 vector a specified number of bits.
+   * @param[in]  *pSrc points to the input vector
+   * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_shift_q15(
+		      q15_t * pSrc,
+		     int8_t shiftBits,
+		     q15_t * pDst,
+		     uint32_t blockSize);
+
+  /**
+   * @brief  Shifts the elements of a Q31 vector a specified number of bits.
+   * @param[in]  *pSrc points to the input vector
+   * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_shift_q31(
+		      q31_t * pSrc,
+		     int8_t shiftBits,
+		     q31_t * pDst,
+		     uint32_t blockSize);
+
+  /**
+   * @brief  Adds a constant offset to a floating-point vector.
+   * @param[in]  *pSrc points to the input vector
+   * @param[in]  offset is the offset to be added
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_offset_f32(
+		       float32_t * pSrc,
+		      float32_t offset,
+		      float32_t * pDst,
+		      uint32_t blockSize);
+
+  /**
+   * @brief  Adds a constant offset to a Q7 vector.
+   * @param[in]  *pSrc points to the input vector
+   * @param[in]  offset is the offset to be added
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_offset_q7(
+		      q7_t * pSrc,
+		     q7_t offset,
+		     q7_t * pDst,
+		     uint32_t blockSize);
+
+  /**
+   * @brief  Adds a constant offset to a Q15 vector.
+   * @param[in]  *pSrc points to the input vector
+   * @param[in]  offset is the offset to be added
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_offset_q15(
+		       q15_t * pSrc,
+		      q15_t offset,
+		      q15_t * pDst,
+		      uint32_t blockSize);
+
+  /**
+   * @brief  Adds a constant offset to a Q31 vector.
+   * @param[in]  *pSrc points to the input vector
+   * @param[in]  offset is the offset to be added
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_offset_q31(
+		       q31_t * pSrc,
+		      q31_t offset,
+		      q31_t * pDst,
+		      uint32_t blockSize);
+
+  /**
+   * @brief  Negates the elements of a floating-point vector.
+   * @param[in]  *pSrc points to the input vector
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_negate_f32(
+		       float32_t * pSrc,
+		      float32_t * pDst,
+		      uint32_t blockSize);
+
+  /**
+   * @brief  Negates the elements of a Q7 vector.
+   * @param[in]  *pSrc points to the input vector
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_negate_q7(
+		      q7_t * pSrc,
+		     q7_t * pDst,
+		     uint32_t blockSize);
+
+  /**
+   * @brief  Negates the elements of a Q15 vector.
+   * @param[in]  *pSrc points to the input vector
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_negate_q15(
+		       q15_t * pSrc,
+		      q15_t * pDst,
+		      uint32_t blockSize);
+
+  /**
+   * @brief  Negates the elements of a Q31 vector.
+   * @param[in]  *pSrc points to the input vector
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  blockSize number of samples in the vector
+   * @return none.
+   */
+
+  void arm_negate_q31(
+		       q31_t * pSrc,
+		      q31_t * pDst,
+		      uint32_t blockSize);
+  /**
+   * @brief  Copies the elements of a floating-point vector. 
+   * @param[in]  *pSrc input pointer
+   * @param[out]  *pDst output pointer
+   * @param[in]  blockSize number of samples to process
+   * @return none.
+   */
+  void arm_copy_f32(
+		     float32_t * pSrc,
+		    float32_t * pDst,
+		    uint32_t blockSize);
+
+  /**
+   * @brief  Copies the elements of a Q7 vector. 
+   * @param[in]  *pSrc input pointer
+   * @param[out]  *pDst output pointer
+   * @param[in]  blockSize number of samples to process
+   * @return none.
+   */
+  void arm_copy_q7(
+		    q7_t * pSrc,
+		   q7_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief  Copies the elements of a Q15 vector. 
+   * @param[in]  *pSrc input pointer
+   * @param[out]  *pDst output pointer
+   * @param[in]  blockSize number of samples to process
+   * @return none.
+   */
+  void arm_copy_q15(
+		     q15_t * pSrc,
+		    q15_t * pDst,
+		    uint32_t blockSize);
+
+  /**
+   * @brief  Copies the elements of a Q31 vector. 
+   * @param[in]  *pSrc input pointer
+   * @param[out]  *pDst output pointer
+   * @param[in]  blockSize number of samples to process
+   * @return none.
+   */
+  void arm_copy_q31(
+		     q31_t * pSrc,
+		    q31_t * pDst,
+		    uint32_t blockSize);
+  /**
+   * @brief  Fills a constant value into a floating-point vector. 
+   * @param[in]  value input value to be filled
+   * @param[out]  *pDst output pointer
+   * @param[in]  blockSize number of samples to process
+   * @return none.
+   */
+  void arm_fill_f32(
+		     float32_t value,
+		    float32_t * pDst,
+		    uint32_t blockSize);
+
+  /**
+   * @brief  Fills a constant value into a Q7 vector. 
+   * @param[in]  value input value to be filled
+   * @param[out]  *pDst output pointer
+   * @param[in]  blockSize number of samples to process
+   * @return none.
+   */
+  void arm_fill_q7(
+		    q7_t value,
+		   q7_t * pDst,
+		   uint32_t blockSize);
+
+  /**
+   * @brief  Fills a constant value into a Q15 vector. 
+   * @param[in]  value input value to be filled
+   * @param[out]  *pDst output pointer
+   * @param[in]  blockSize number of samples to process
+   * @return none.
+   */
+  void arm_fill_q15(
+		     q15_t value,
+		    q15_t * pDst,
+		    uint32_t blockSize);
+
+  /**
+   * @brief  Fills a constant value into a Q31 vector. 
+   * @param[in]  value input value to be filled
+   * @param[out]  *pDst output pointer
+   * @param[in]  blockSize number of samples to process
+   * @return none.
+   */
+  void arm_fill_q31(
+		     q31_t value,
+		    q31_t * pDst,
+		    uint32_t blockSize);
+
+  /**
+   * @brief Convolution of floating-point sequences.
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
+   * @return none.
+   */
+
+  void arm_conv_f32(
+		     float32_t * pSrcA,
+		    uint32_t srcALen,
+		     float32_t * pSrcB,
+		    uint32_t srcBLen,
+		    float32_t * pDst);
+
+  /**
+   * @brief Convolution of Q15 sequences.
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
+   * @return none.
+   */
+
+  void arm_conv_q15(
+		     q15_t * pSrcA,
+		    uint32_t srcALen,
+		     q15_t * pSrcB,
+		    uint32_t srcBLen,
+		    q15_t * pDst);
+
+  /**
+   * @brief Convolution of Q15 sequences (fast version).
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
+   * @return none.
+   */
+
+  void arm_conv_fast_q15(
+			  q15_t * pSrcA,
+			 uint32_t srcALen,
+			  q15_t * pSrcB,
+			 uint32_t srcBLen,
+			 q15_t * pDst);
+
+  /**
+   * @brief Convolution of Q31 sequences.
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
+   * @return none.
+   */
+
+  void arm_conv_q31(
+		     q31_t * pSrcA,
+		    uint32_t srcALen,
+		     q31_t * pSrcB,
+		    uint32_t srcBLen,
+		    q31_t * pDst);
+
+  /**
+   * @brief Convolution of Q31 sequences (fast version).
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
+   * @return none.
+   */
+
+  void arm_conv_fast_q31(
+			  q31_t * pSrcA,
+			 uint32_t srcALen,
+			  q31_t * pSrcB,
+			 uint32_t srcBLen,
+			 q31_t * pDst);
+
+  /**
+   * @brief Convolution of Q7 sequences.
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
+   * @return none.
+   */
+
+  void arm_conv_q7(
+		    q7_t * pSrcA,
+		   uint32_t srcALen,
+		    q7_t * pSrcB,
+		   uint32_t srcBLen,
+		   q7_t * pDst);
+
+  /**
+   * @brief Partial convolution of floating-point sequences.
+   * @param[in]       *pSrcA points to the first input sequence.
+   * @param[in]       srcALen length of the first input sequence.
+   * @param[in]       *pSrcB points to the second input sequence.
+   * @param[in]       srcBLen length of the second input sequence.
+   * @param[out]      *pDst points to the block of output data
+   * @param[in]       firstIndex is the first output sample to start with.
+   * @param[in]       numPoints is the number of output points to be computed.
+   * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
+   */
+
+  arm_status arm_conv_partial_f32(
+				   float32_t * pSrcA,
+				  uint32_t srcALen,
+				   float32_t * pSrcB,
+				  uint32_t srcBLen,
+				  float32_t * pDst,
+				  uint32_t firstIndex,
+				  uint32_t numPoints);
+
+  /**
+   * @brief Partial convolution of Q15 sequences.
+   * @param[in]       *pSrcA points to the first input sequence.
+   * @param[in]       srcALen length of the first input sequence.
+   * @param[in]       *pSrcB points to the second input sequence.
+   * @param[in]       srcBLen length of the second input sequence.
+   * @param[out]      *pDst points to the block of output data
+   * @param[in]       firstIndex is the first output sample to start with.
+   * @param[in]       numPoints is the number of output points to be computed.
+   * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
+   */
+
+  arm_status arm_conv_partial_q15(
+				   q15_t * pSrcA,
+				  uint32_t srcALen,
+				   q15_t * pSrcB,
+				  uint32_t srcBLen,
+				  q15_t * pDst,
+				  uint32_t firstIndex,
+				  uint32_t numPoints);
+
+  /**
+   * @brief Partial convolution of Q15 sequences (fast version).
+   * @param[in]       *pSrcA points to the first input sequence.
+   * @param[in]       srcALen length of the first input sequence.
+   * @param[in]       *pSrcB points to the second input sequence.
+   * @param[in]       srcBLen length of the second input sequence.
+   * @param[out]      *pDst points to the block of output data
+   * @param[in]       firstIndex is the first output sample to start with.
+   * @param[in]       numPoints is the number of output points to be computed.
+   * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
+   */
+
+  arm_status arm_conv_partial_fast_q15(
+				        q15_t * pSrcA,
+				       uint32_t srcALen,
+				        q15_t * pSrcB,
+				       uint32_t srcBLen,
+				       q15_t * pDst,
+				       uint32_t firstIndex,
+				       uint32_t numPoints);
+
+  /**
+   * @brief Partial convolution of Q31 sequences.
+   * @param[in]       *pSrcA points to the first input sequence.
+   * @param[in]       srcALen length of the first input sequence.
+   * @param[in]       *pSrcB points to the second input sequence.
+   * @param[in]       srcBLen length of the second input sequence.
+   * @param[out]      *pDst points to the block of output data
+   * @param[in]       firstIndex is the first output sample to start with.
+   * @param[in]       numPoints is the number of output points to be computed.
+   * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
+   */
+
+  arm_status arm_conv_partial_q31(
+				   q31_t * pSrcA,
+				  uint32_t srcALen,
+				   q31_t * pSrcB,
+				  uint32_t srcBLen,
+				  q31_t * pDst,
+				  uint32_t firstIndex,
+				  uint32_t numPoints);
+
+
+  /**
+   * @brief Partial convolution of Q31 sequences (fast version).
+   * @param[in]       *pSrcA points to the first input sequence.
+   * @param[in]       srcALen length of the first input sequence.
+   * @param[in]       *pSrcB points to the second input sequence.
+   * @param[in]       srcBLen length of the second input sequence.
+   * @param[out]      *pDst points to the block of output data
+   * @param[in]       firstIndex is the first output sample to start with.
+   * @param[in]       numPoints is the number of output points to be computed.
+   * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
+   */
+
+  arm_status arm_conv_partial_fast_q31(
+				        q31_t * pSrcA,
+				       uint32_t srcALen,
+				        q31_t * pSrcB,
+				       uint32_t srcBLen,
+				       q31_t * pDst,
+				       uint32_t firstIndex,
+				       uint32_t numPoints);
+
+  /**
+   * @brief Partial convolution of Q7 sequences
+   * @param[in]       *pSrcA points to the first input sequence.
+   * @param[in]       srcALen length of the first input sequence.
+   * @param[in]       *pSrcB points to the second input sequence.
+   * @param[in]       srcBLen length of the second input sequence.
+   * @param[out]      *pDst points to the block of output data
+   * @param[in]       firstIndex is the first output sample to start with.
+   * @param[in]       numPoints is the number of output points to be computed.
+   * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
+   */
+
+  arm_status arm_conv_partial_q7(
+				  q7_t * pSrcA,
+				 uint32_t srcALen,
+				  q7_t * pSrcB,
+				 uint32_t srcBLen,
+				 q7_t * pDst,
+				 uint32_t firstIndex,
+				 uint32_t numPoints);
+
+
+  /**
+   * @brief Instance structure for the Q15 FIR decimator.
+   */
+
+  typedef struct
+  {
+    uint8_t M;                      /**< decimation factor. */
+    uint16_t numTaps;               /**< number of coefficients in the filter. */
+    q15_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
+    q15_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+  } arm_fir_decimate_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q31 FIR decimator.
+   */
+
+  typedef struct
+  {
+    uint8_t M;                  /**< decimation factor. */
+    uint16_t numTaps;           /**< number of coefficients in the filter. */
+    q31_t *pCoeffs;              /**< points to the coefficient array. The array is of length numTaps.*/
+    q31_t *pState;               /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+
+  } arm_fir_decimate_instance_q31;
+
+  /**
+   * @brief Instance structure for the floating-point FIR decimator.
+   */
+
+  typedef struct
+  {
+    uint8_t M;                          /**< decimation factor. */
+    uint16_t numTaps;                   /**< number of coefficients in the filter. */
+    float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
+    float32_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+
+  } arm_fir_decimate_instance_f32;
+
+
+
+  /**
+   * @brief Processing function for the floating-point FIR decimator.
+   * @param[in] *S points to an instance of the floating-point FIR decimator structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data
+   * @param[in] blockSize number of input samples to process per call.
+   * @return none
+   */
+
+  void arm_fir_decimate_f32(
+			    const arm_fir_decimate_instance_f32 * S,
+			     float32_t * pSrc,
+			    float32_t * pDst,
+			    uint32_t blockSize);
+
+
+  /**
+   * @brief  Initialization function for the floating-point FIR decimator.
+   * @param[in,out] *S points to an instance of the floating-point FIR decimator structure.
+   * @param[in] numTaps  number of coefficients in the filter.
+   * @param[in] M  decimation factor.
+   * @param[in] *pCoeffs points to the filter coefficients.
+   * @param[in] *pState points to the state buffer.
+   * @param[in] blockSize number of input samples to process per call.
+   * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
+   * <code>blockSize</code> is not a multiple of <code>M</code>.
+   */
+
+  arm_status arm_fir_decimate_init_f32(
+				       arm_fir_decimate_instance_f32 * S,
+				       uint16_t numTaps,
+				       uint8_t M,
+				       float32_t * pCoeffs,
+				       float32_t * pState,
+				       uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the Q15 FIR decimator.
+   * @param[in] *S points to an instance of the Q15 FIR decimator structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data
+   * @param[in] blockSize number of input samples to process per call.
+   * @return none
+   */
+
+  void arm_fir_decimate_q15(
+			    const arm_fir_decimate_instance_q15 * S,
+			     q15_t * pSrc,
+			    q15_t * pDst,
+			    uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the Q15 FIR decimator (fast variant).
+   * @param[in] *S points to an instance of the Q15 FIR decimator structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data
+   * @param[in] blockSize number of input samples to process per call.
+   * @return none
+   */
+
+  void arm_fir_decimate_fast_q15(
+				 const arm_fir_decimate_instance_q15 * S,
+				  q15_t * pSrc,
+				 q15_t * pDst,
+				 uint32_t blockSize);
+
+
+
+  /**
+   * @brief  Initialization function for the Q15 FIR decimator.
+   * @param[in,out] *S points to an instance of the Q15 FIR decimator structure.
+   * @param[in] numTaps  number of coefficients in the filter.
+   * @param[in] M  decimation factor.
+   * @param[in] *pCoeffs points to the filter coefficients.
+   * @param[in] *pState points to the state buffer.
+   * @param[in] blockSize number of input samples to process per call.
+   * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
+   * <code>blockSize</code> is not a multiple of <code>M</code>.
+   */
+
+  arm_status arm_fir_decimate_init_q15(
+				       arm_fir_decimate_instance_q15 * S,
+				       uint16_t numTaps,
+				       uint8_t M,
+				       q15_t * pCoeffs,
+				       q15_t * pState,
+				       uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the Q31 FIR decimator.
+   * @param[in] *S points to an instance of the Q31 FIR decimator structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data
+   * @param[in] blockSize number of input samples to process per call.
+   * @return none
+   */
+
+  void arm_fir_decimate_q31(
+			    const arm_fir_decimate_instance_q31 * S,
+			     q31_t * pSrc,
+			    q31_t * pDst,
+			    uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the Q31 FIR decimator (fast variant).
+   * @param[in] *S points to an instance of the Q31 FIR decimator structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data
+   * @param[in] blockSize number of input samples to process per call.
+   * @return none
+   */
+
+  void arm_fir_decimate_fast_q31(
+				 arm_fir_decimate_instance_q31 * S,
+				  q31_t * pSrc,
+				 q31_t * pDst,
+				 uint32_t blockSize);
+
+
+  /**
+   * @brief  Initialization function for the Q31 FIR decimator.
+   * @param[in,out] *S points to an instance of the Q31 FIR decimator structure.
+   * @param[in] numTaps  number of coefficients in the filter.
+   * @param[in] M  decimation factor.
+   * @param[in] *pCoeffs points to the filter coefficients.
+   * @param[in] *pState points to the state buffer.
+   * @param[in] blockSize number of input samples to process per call.
+   * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
+   * <code>blockSize</code> is not a multiple of <code>M</code>.
+   */
+
+  arm_status arm_fir_decimate_init_q31(
+				       arm_fir_decimate_instance_q31 * S,
+				       uint16_t numTaps,
+				       uint8_t M,
+				       q31_t * pCoeffs,
+				       q31_t * pState,
+				       uint32_t blockSize);
+
+
+
+  /**
+   * @brief Instance structure for the Q15 FIR interpolator.
+   */
+
+  typedef struct
+  {
+    uint8_t L;                      /**< upsample factor. */
+    uint16_t phaseLength;           /**< length of each polyphase filter component. */
+    q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
+    q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
+  } arm_fir_interpolate_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q31 FIR interpolator.
+   */
+
+  typedef struct
+  {
+    uint8_t L;                      /**< upsample factor. */
+    uint16_t phaseLength;           /**< length of each polyphase filter component. */
+    q31_t *pCoeffs;                  /**< points to the coefficient array. The array is of length L*phaseLength. */
+    q31_t *pState;                   /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
+  } arm_fir_interpolate_instance_q31;
+
+  /**
+   * @brief Instance structure for the floating-point FIR interpolator.
+   */
+
+  typedef struct
+  {
+    uint8_t L;                     /**< upsample factor. */
+    uint16_t phaseLength;          /**< length of each polyphase filter component. */
+    float32_t *pCoeffs;             /**< points to the coefficient array. The array is of length L*phaseLength. */
+    float32_t *pState;              /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
+  } arm_fir_interpolate_instance_f32;
+
+
+  /**
+   * @brief Processing function for the Q15 FIR interpolator.
+   * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
+   * @param[in] *pSrc     points to the block of input data.
+   * @param[out] *pDst    points to the block of output data.
+   * @param[in] blockSize number of input samples to process per call.
+   * @return none.
+   */
+
+  void arm_fir_interpolate_q15(
+			       const arm_fir_interpolate_instance_q15 * S,
+			        q15_t * pSrc,
+			       q15_t * pDst,
+			       uint32_t blockSize);
+
+
+  /**
+   * @brief  Initialization function for the Q15 FIR interpolator.
+   * @param[in,out] *S        points to an instance of the Q15 FIR interpolator structure.
+   * @param[in]     L         upsample factor.
+   * @param[in]     numTaps   number of filter coefficients in the filter.
+   * @param[in]     *pCoeffs  points to the filter coefficient buffer.
+   * @param[in]     *pState   points to the state buffer.
+   * @param[in]     blockSize number of input samples to process per call.
+   * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
+   * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
+   */
+
+  arm_status arm_fir_interpolate_init_q15(
+					  arm_fir_interpolate_instance_q15 * S,
+					  uint8_t L,
+					  uint16_t numTaps,
+					  q15_t * pCoeffs,
+					  q15_t * pState,
+					  uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the Q31 FIR interpolator.
+   * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
+   * @param[in] *pSrc     points to the block of input data.
+   * @param[out] *pDst    points to the block of output data.
+   * @param[in] blockSize number of input samples to process per call.
+   * @return none.
+   */
+
+  void arm_fir_interpolate_q31(
+			       const arm_fir_interpolate_instance_q31 * S,
+			        q31_t * pSrc,
+			       q31_t * pDst,
+			       uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the Q31 FIR interpolator.
+   * @param[in,out] *S        points to an instance of the Q31 FIR interpolator structure.
+   * @param[in]     L         upsample factor.
+   * @param[in]     numTaps   number of filter coefficients in the filter.
+   * @param[in]     *pCoeffs  points to the filter coefficient buffer.
+   * @param[in]     *pState   points to the state buffer.
+   * @param[in]     blockSize number of input samples to process per call.
+   * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
+   * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
+   */
+
+  arm_status arm_fir_interpolate_init_q31(
+					  arm_fir_interpolate_instance_q31 * S,
+					  uint8_t L,
+					  uint16_t numTaps,
+					  q31_t * pCoeffs,
+					  q31_t * pState,
+					  uint32_t blockSize);
+
+
+  /**
+   * @brief Processing function for the floating-point FIR interpolator.
+   * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.
+   * @param[in] *pSrc     points to the block of input data.
+   * @param[out] *pDst    points to the block of output data.
+   * @param[in] blockSize number of input samples to process per call.
+   * @return none.
+   */
+
+  void arm_fir_interpolate_f32(
+			       const arm_fir_interpolate_instance_f32 * S,
+			        float32_t * pSrc,
+			       float32_t * pDst,
+			       uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the floating-point FIR interpolator.
+   * @param[in,out] *S        points to an instance of the floating-point FIR interpolator structure.
+   * @param[in]     L         upsample factor.
+   * @param[in]     numTaps   number of filter coefficients in the filter.
+   * @param[in]     *pCoeffs  points to the filter coefficient buffer.
+   * @param[in]     *pState   points to the state buffer.
+   * @param[in]     blockSize number of input samples to process per call.
+   * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
+   * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
+   */
+
+  arm_status arm_fir_interpolate_init_f32(
+					  arm_fir_interpolate_instance_f32 * S,
+					  uint8_t L,
+					  uint16_t numTaps,
+					  float32_t * pCoeffs,
+					  float32_t * pState,
+					  uint32_t blockSize);
+
+  /**
+   * @brief Instance structure for the high precision Q31 Biquad cascade filter.
+   */
+
+  typedef struct
+  {
+    uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+    q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
+    q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
+    uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
+
+  } arm_biquad_cas_df1_32x64_ins_q31;
+
+
+  /**
+   * @param[in]  *S        points to an instance of the high precision Q31 Biquad cascade filter structure.
+   * @param[in]  *pSrc     points to the block of input data.
+   * @param[out] *pDst     points to the block of output data
+   * @param[in]  blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_biquad_cas_df1_32x64_q31(
+				    const arm_biquad_cas_df1_32x64_ins_q31 * S,
+				     q31_t * pSrc,
+				    q31_t * pDst,
+				    uint32_t blockSize);
+
+
+  /**
+   * @param[in,out] *S           points to an instance of the high precision Q31 Biquad cascade filter structure.
+   * @param[in]     numStages    number of 2nd order stages in the filter.
+   * @param[in]     *pCoeffs     points to the filter coefficients.
+   * @param[in]     *pState      points to the state buffer.
+   * @param[in]     postShift    shift to be applied to the output. Varies according to the coefficients format
+   * @return        none
+   */
+
+  void arm_biquad_cas_df1_32x64_init_q31(
+					 arm_biquad_cas_df1_32x64_ins_q31 * S,
+					 uint8_t numStages,
+					 q31_t * pCoeffs,
+					 q63_t * pState,
+					 uint8_t postShift);
+
+
+
+  /**
+   * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
+   */
+
+  typedef struct
+  {
+    uint8_t   numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+    float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
+    float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
+  } arm_biquad_cascade_df2T_instance_f32;
+
+
+  /**
+   * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
+   * @param[in]  *S        points to an instance of the filter data structure.
+   * @param[in]  *pSrc     points to the block of input data.
+   * @param[out] *pDst     points to the block of output data
+   * @param[in]  blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_biquad_cascade_df2T_f32(
+				   const arm_biquad_cascade_df2T_instance_f32 * S,
+				    float32_t * pSrc,
+				   float32_t * pDst,
+				   uint32_t blockSize);
+
+
+  /**
+   * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
+   * @param[in,out] *S           points to an instance of the filter data structure.
+   * @param[in]     numStages    number of 2nd order stages in the filter.
+   * @param[in]     *pCoeffs     points to the filter coefficients.
+   * @param[in]     *pState      points to the state buffer.
+   * @return        none
+   */
+
+  void arm_biquad_cascade_df2T_init_f32(
+					arm_biquad_cascade_df2T_instance_f32 * S,
+					uint8_t numStages,
+					float32_t * pCoeffs,
+					float32_t * pState);
+
+
+
+  /**
+   * @brief Instance structure for the Q15 FIR lattice filter.
+   */
+
+  typedef struct
+  {
+    uint16_t numStages;                          /**< number of filter stages. */
+    q15_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
+    q15_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
+  } arm_fir_lattice_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q31 FIR lattice filter.
+   */
+
+  typedef struct
+  {
+    uint16_t numStages;                          /**< number of filter stages. */
+    q31_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
+    q31_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
+  } arm_fir_lattice_instance_q31;
+
+  /**
+   * @brief Instance structure for the floating-point FIR lattice filter.
+   */
+
+  typedef struct
+  {
+    uint16_t numStages;                  /**< number of filter stages. */
+    float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
+    float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
+  } arm_fir_lattice_instance_f32;
+
+  /**
+   * @brief Initialization function for the Q15 FIR lattice filter.
+   * @param[in] *S points to an instance of the Q15 FIR lattice structure.
+   * @param[in] numStages  number of filter stages.
+   * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages. 
+   * @param[in] *pState points to the state buffer.  The array is of length numStages. 
+   * @return none.
+   */
+
+  void arm_fir_lattice_init_q15(
+				arm_fir_lattice_instance_q15 * S,
+				uint16_t numStages,
+				q15_t * pCoeffs,
+				q15_t * pState);
+
+
+  /**
+   * @brief Processing function for the Q15 FIR lattice filter.
+   * @param[in] *S points to an instance of the Q15 FIR lattice structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+  void arm_fir_lattice_q15(
+			   const arm_fir_lattice_instance_q15 * S,
+			    q15_t * pSrc,
+			   q15_t * pDst,
+			   uint32_t blockSize);
+
+  /**
+   * @brief Initialization function for the Q31 FIR lattice filter.
+   * @param[in] *S points to an instance of the Q31 FIR lattice structure.
+   * @param[in] numStages  number of filter stages.
+   * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
+   * @param[in] *pState points to the state buffer.   The array is of length numStages.
+   * @return none.
+   */
+
+  void arm_fir_lattice_init_q31(
+				arm_fir_lattice_instance_q31 * S,
+				uint16_t numStages,
+				q31_t * pCoeffs,
+				q31_t * pState);
+
+
+  /**
+   * @brief Processing function for the Q31 FIR lattice filter.
+   * @param[in]  *S        points to an instance of the Q31 FIR lattice structure.
+   * @param[in]  *pSrc     points to the block of input data.
+   * @param[out] *pDst     points to the block of output data
+   * @param[in]  blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_fir_lattice_q31(
+			   const arm_fir_lattice_instance_q31 * S,
+			    q31_t * pSrc,
+			   q31_t * pDst,
+			   uint32_t blockSize);
+
+/**
+ * @brief Initialization function for the floating-point FIR lattice filter.
+ * @param[in] *S points to an instance of the floating-point FIR lattice structure.
+ * @param[in] numStages  number of filter stages.
+ * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
+ * @param[in] *pState points to the state buffer.  The array is of length numStages.
+ * @return none.
+ */
+
+  void arm_fir_lattice_init_f32(
+				arm_fir_lattice_instance_f32 * S,
+				uint16_t numStages,
+				float32_t * pCoeffs,
+				float32_t * pState);
+
+  /**
+   * @brief Processing function for the floating-point FIR lattice filter.
+   * @param[in]  *S        points to an instance of the floating-point FIR lattice structure.
+   * @param[in]  *pSrc     points to the block of input data.
+   * @param[out] *pDst     points to the block of output data
+   * @param[in]  blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_fir_lattice_f32(
+			   const arm_fir_lattice_instance_f32 * S,
+			    float32_t * pSrc,
+			   float32_t * pDst,
+			   uint32_t blockSize);
+
+  /**
+   * @brief Instance structure for the Q15 IIR lattice filter.
+   */
+  typedef struct
+  {
+    uint16_t numStages;                         /**< number of stages in the filter. */
+    q15_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
+    q15_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
+    q15_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
+  } arm_iir_lattice_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q31 IIR lattice filter.
+   */
+  typedef struct
+  {
+    uint16_t numStages;                         /**< number of stages in the filter. */
+    q31_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
+    q31_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
+    q31_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
+  } arm_iir_lattice_instance_q31;
+
+  /**
+   * @brief Instance structure for the floating-point IIR lattice filter.
+   */
+  typedef struct
+  {
+    uint16_t numStages;                         /**< number of stages in the filter. */
+    float32_t *pState;                          /**< points to the state variable array. The array is of length numStages+blockSize. */
+    float32_t *pkCoeffs;                        /**< points to the reflection coefficient array. The array is of length numStages. */
+    float32_t *pvCoeffs;                        /**< points to the ladder coefficient array. The array is of length numStages+1. */
+  } arm_iir_lattice_instance_f32;
+
+  /**
+   * @brief Processing function for the floating-point IIR lattice filter.
+   * @param[in] *S points to an instance of the floating-point IIR lattice structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_iir_lattice_f32(
+			   const arm_iir_lattice_instance_f32 * S,
+			    float32_t * pSrc,
+			   float32_t * pDst,
+			   uint32_t blockSize);
+
+  /**
+   * @brief Initialization function for the floating-point IIR lattice filter.
+   * @param[in] *S points to an instance of the floating-point IIR lattice structure.
+   * @param[in] numStages number of stages in the filter.
+   * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
+   * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
+   * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize-1.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_iir_lattice_init_f32(
+				arm_iir_lattice_instance_f32 * S,
+				uint16_t numStages,
+				float32_t *pkCoeffs,
+				float32_t *pvCoeffs,
+				float32_t *pState,
+				uint32_t blockSize);
+
+
+  /**
+   * @brief Processing function for the Q31 IIR lattice filter.
+   * @param[in] *S points to an instance of the Q31 IIR lattice structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_iir_lattice_q31(
+			   const arm_iir_lattice_instance_q31 * S,
+			    q31_t * pSrc,
+			   q31_t * pDst,
+			   uint32_t blockSize);
+
+
+  /**
+   * @brief Initialization function for the Q31 IIR lattice filter.
+   * @param[in] *S points to an instance of the Q31 IIR lattice structure.
+   * @param[in] numStages number of stages in the filter.
+   * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
+   * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
+   * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_iir_lattice_init_q31(
+				arm_iir_lattice_instance_q31 * S,
+				uint16_t numStages,
+				q31_t *pkCoeffs,
+				q31_t *pvCoeffs,
+				q31_t *pState,
+				uint32_t blockSize);
+
+
+  /**
+   * @brief Processing function for the Q15 IIR lattice filter.
+   * @param[in] *S points to an instance of the Q15 IIR lattice structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[out] *pDst points to the block of output data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_iir_lattice_q15(
+			   const arm_iir_lattice_instance_q15 * S,
+			    q15_t * pSrc,
+			   q15_t * pDst,
+			   uint32_t blockSize);
+
+
+/**
+ * @brief Initialization function for the Q15 IIR lattice filter.
+ * @param[in] *S points to an instance of the fixed-point Q15 IIR lattice structure.
+ * @param[in] numStages  number of stages in the filter.
+ * @param[in] *pkCoeffs points to reflection coefficient buffer.  The array is of length numStages.
+ * @param[in] *pvCoeffs points to ladder coefficient buffer.  The array is of length numStages+1.
+ * @param[in] *pState points to state buffer.  The array is of length numStages+blockSize.
+ * @param[in] blockSize number of samples to process per call.
+ * @return none.
+ */
+
+  void arm_iir_lattice_init_q15(
+				arm_iir_lattice_instance_q15 * S,
+				uint16_t numStages,
+				q15_t *pkCoeffs,
+				q15_t *pvCoeffs,
+				q15_t *pState,
+				uint32_t blockSize);
+
+  /**
+   * @brief Instance structure for the floating-point LMS filter.
+   */
+
+  typedef struct
+  {
+    uint16_t numTaps;    /**< number of coefficients in the filter. */
+    float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
+    float32_t mu;        /**< step size that controls filter coefficient updates. */
+  } arm_lms_instance_f32;
+
+  /**
+   * @brief Processing function for floating-point LMS filter.
+   * @param[in]  *S points to an instance of the floating-point LMS filter structure.
+   * @param[in]  *pSrc points to the block of input data.
+   * @param[in]  *pRef points to the block of reference data.
+   * @param[out] *pOut points to the block of output data.
+   * @param[out] *pErr points to the block of error data.
+   * @param[in]  blockSize number of samples to process.
+   * @return     none.
+   */
+
+  void arm_lms_f32(
+		   const arm_lms_instance_f32 * S,
+		    float32_t * pSrc,
+		    float32_t * pRef,
+		   float32_t * pOut,
+		   float32_t * pErr,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Initialization function for floating-point LMS filter.
+   * @param[in] *S points to an instance of the floating-point LMS filter structure.
+   * @param[in] numTaps  number of filter coefficients.
+   * @param[in] *pCoeffs points to the coefficient buffer.
+   * @param[in] *pState points to state buffer.
+   * @param[in] mu step size that controls filter coefficient updates.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_lms_init_f32(
+			arm_lms_instance_f32 * S,
+			uint16_t numTaps,
+			float32_t * pCoeffs,
+			float32_t * pState,
+			float32_t mu,
+			uint32_t blockSize);
+
+  /**
+   * @brief Instance structure for the Q15 LMS filter.
+   */
+
+  typedef struct
+  {
+    uint16_t numTaps;    /**< number of coefficients in the filter. */
+    q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
+    q15_t mu;            /**< step size that controls filter coefficient updates. */
+    uint32_t postShift;  /**< bit shift applied to coefficients. */
+  } arm_lms_instance_q15;
+
+
+  /**
+   * @brief Initialization function for the Q15 LMS filter.
+   * @param[in] *S points to an instance of the Q15 LMS filter structure.
+   * @param[in] numTaps  number of filter coefficients.
+   * @param[in] *pCoeffs points to the coefficient buffer.
+   * @param[in] *pState points to the state buffer.
+   * @param[in] mu step size that controls filter coefficient updates.
+   * @param[in] blockSize number of samples to process.
+   * @param[in] postShift bit shift applied to coefficients.
+   * @return    none.
+   */
+
+  void arm_lms_init_q15(
+			arm_lms_instance_q15 * S,
+			uint16_t numTaps,
+			q15_t * pCoeffs,
+			q15_t * pState,
+			q15_t mu,
+			uint32_t blockSize,
+			uint32_t postShift);
+
+  /**
+   * @brief Processing function for Q15 LMS filter.
+   * @param[in] *S points to an instance of the Q15 LMS filter structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[in] *pRef points to the block of reference data.
+   * @param[out] *pOut points to the block of output data.
+   * @param[out] *pErr points to the block of error data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_lms_q15(
+		   const arm_lms_instance_q15 * S,
+		    q15_t * pSrc,
+		    q15_t * pRef,
+		   q15_t * pOut,
+		   q15_t * pErr,
+		   uint32_t blockSize);
+
+
+  /**
+   * @brief Instance structure for the Q31 LMS filter.
+   */
+
+  typedef struct
+  {
+    uint16_t numTaps;    /**< number of coefficients in the filter. */
+    q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
+    q31_t mu;            /**< step size that controls filter coefficient updates. */
+    uint32_t postShift;  /**< bit shift applied to coefficients. */
+
+  } arm_lms_instance_q31;
+
+  /**
+   * @brief Processing function for Q31 LMS filter.
+   * @param[in]  *S points to an instance of the Q15 LMS filter structure.
+   * @param[in]  *pSrc points to the block of input data.
+   * @param[in]  *pRef points to the block of reference data.
+   * @param[out] *pOut points to the block of output data.
+   * @param[out] *pErr points to the block of error data.
+   * @param[in]  blockSize number of samples to process.
+   * @return     none.
+   */
+
+  void arm_lms_q31(
+		   const arm_lms_instance_q31 * S,
+		    q31_t * pSrc,
+		    q31_t * pRef,
+		   q31_t * pOut,
+		   q31_t * pErr,
+		   uint32_t blockSize);
+
+  /**
+   * @brief Initialization function for Q31 LMS filter.
+   * @param[in] *S points to an instance of the Q31 LMS filter structure.
+   * @param[in] numTaps  number of filter coefficients.
+   * @param[in] *pCoeffs points to coefficient buffer.
+   * @param[in] *pState points to state buffer.
+   * @param[in] mu step size that controls filter coefficient updates.
+   * @param[in] blockSize number of samples to process.
+   * @param[in] postShift bit shift applied to coefficients.
+   * @return none.
+   */
+
+  void arm_lms_init_q31(
+			arm_lms_instance_q31 * S,
+			uint16_t numTaps,
+			q31_t *pCoeffs,
+			q31_t *pState,
+			q31_t mu,
+			uint32_t blockSize,
+			uint32_t postShift);
+
+  /**
+   * @brief Instance structure for the floating-point normalized LMS filter.
+   */
+
+  typedef struct
+  {
+    uint16_t  numTaps;    /**< number of coefficients in the filter. */
+    float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
+    float32_t mu;        /**< step size that control filter coefficient updates. */
+    float32_t energy;    /**< saves previous frame energy. */
+    float32_t x0;        /**< saves previous input sample. */
+  } arm_lms_norm_instance_f32;
+
+  /**
+   * @brief Processing function for floating-point normalized LMS filter.
+   * @param[in] *S points to an instance of the floating-point normalized LMS filter structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[in] *pRef points to the block of reference data.
+   * @param[out] *pOut points to the block of output data.
+   * @param[out] *pErr points to the block of error data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_lms_norm_f32(
+			arm_lms_norm_instance_f32 * S,
+			 float32_t * pSrc,
+			 float32_t * pRef,
+			float32_t * pOut,
+			float32_t * pErr,
+			uint32_t blockSize);
+
+  /**
+   * @brief Initialization function for floating-point normalized LMS filter.
+   * @param[in] *S points to an instance of the floating-point LMS filter structure.
+   * @param[in] numTaps  number of filter coefficients.
+   * @param[in] *pCoeffs points to coefficient buffer.
+   * @param[in] *pState points to state buffer.
+   * @param[in] mu step size that controls filter coefficient updates.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_lms_norm_init_f32(
+			     arm_lms_norm_instance_f32 * S,
+			     uint16_t numTaps,
+			     float32_t * pCoeffs,
+			     float32_t * pState,
+			     float32_t mu,
+			     uint32_t blockSize);
+
+
+  /**
+   * @brief Instance structure for the Q31 normalized LMS filter.
+   */
+  typedef struct
+  {
+    uint16_t numTaps;     /**< number of coefficients in the filter. */
+    q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
+    q31_t mu;             /**< step size that controls filter coefficient updates. */
+    uint8_t postShift;    /**< bit shift applied to coefficients. */
+    q31_t *recipTable;    /**< points to the reciprocal initial value table. */
+    q31_t energy;         /**< saves previous frame energy. */
+    q31_t x0;             /**< saves previous input sample. */
+  } arm_lms_norm_instance_q31;
+
+  /**
+   * @brief Processing function for Q31 normalized LMS filter.
+   * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[in] *pRef points to the block of reference data.
+   * @param[out] *pOut points to the block of output data.
+   * @param[out] *pErr points to the block of error data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_lms_norm_q31(
+			arm_lms_norm_instance_q31 * S,
+			 q31_t * pSrc,
+			 q31_t * pRef,
+			q31_t * pOut,
+			q31_t * pErr,
+			uint32_t blockSize);
+
+  /**
+   * @brief Initialization function for Q31 normalized LMS filter.
+   * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
+   * @param[in] numTaps  number of filter coefficients.
+   * @param[in] *pCoeffs points to coefficient buffer.
+   * @param[in] *pState points to state buffer.
+   * @param[in] mu step size that controls filter coefficient updates.
+   * @param[in] blockSize number of samples to process.
+   * @param[in] postShift bit shift applied to coefficients.
+   * @return none.
+   */
+
+  void arm_lms_norm_init_q31(
+			     arm_lms_norm_instance_q31 * S,
+			     uint16_t numTaps,
+			     q31_t * pCoeffs,
+			     q31_t * pState,
+			     q31_t mu,
+			     uint32_t blockSize,
+			     uint8_t postShift);
+
+  /**
+   * @brief Instance structure for the Q15 normalized LMS filter.
+   */
+
+  typedef struct
+  {
+    uint16_t numTaps;    /**< Number of coefficients in the filter. */
+    q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
+    q15_t mu;            /**< step size that controls filter coefficient updates. */
+    uint8_t postShift;   /**< bit shift applied to coefficients. */
+    q15_t *recipTable;   /**< Points to the reciprocal initial value table. */
+    q15_t energy;        /**< saves previous frame energy. */
+    q15_t x0;            /**< saves previous input sample. */
+  } arm_lms_norm_instance_q15;
+
+  /**
+   * @brief Processing function for Q15 normalized LMS filter.
+   * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
+   * @param[in] *pSrc points to the block of input data.
+   * @param[in] *pRef points to the block of reference data.
+   * @param[out] *pOut points to the block of output data.
+   * @param[out] *pErr points to the block of error data.
+   * @param[in] blockSize number of samples to process.
+   * @return none.
+   */
+
+  void arm_lms_norm_q15(
+			arm_lms_norm_instance_q15 * S,
+			 q15_t * pSrc,
+			 q15_t * pRef,
+			q15_t * pOut,
+			q15_t * pErr,
+			uint32_t blockSize);
+
+
+  /**
+   * @brief Initialization function for Q15 normalized LMS filter.
+   * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
+   * @param[in] numTaps  number of filter coefficients.
+   * @param[in] *pCoeffs points to coefficient buffer.
+   * @param[in] *pState points to state buffer.
+   * @param[in] mu step size that controls filter coefficient updates.
+   * @param[in] blockSize number of samples to process.
+   * @param[in] postShift bit shift applied to coefficients.
+   * @return none.
+   */
+
+  void arm_lms_norm_init_q15(
+			     arm_lms_norm_instance_q15 * S,
+			     uint16_t numTaps,
+			     q15_t * pCoeffs,
+			     q15_t * pState,
+			     q15_t mu,
+			     uint32_t blockSize,
+			     uint8_t postShift);
+
+  /**
+   * @brief Correlation of floating-point sequences.
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   * @return none.
+   */
+
+  void arm_correlate_f32(
+			  float32_t * pSrcA,
+			 uint32_t srcALen,
+			  float32_t * pSrcB,
+			 uint32_t srcBLen,
+			 float32_t * pDst);
+
+  /**
+   * @brief Correlation of Q15 sequences
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   * @return none.
+   */
+
+  void arm_correlate_q15(
+			  q15_t * pSrcA,
+			 uint32_t srcALen,
+			  q15_t * pSrcB,
+			 uint32_t srcBLen,
+			 q15_t * pDst);
+
+  /**
+   * @brief Correlation of Q15 sequences (fast version).
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   * @return none.
+   */
+
+  void arm_correlate_fast_q15(
+			       q15_t * pSrcA,
+			      uint32_t srcALen,
+			       q15_t * pSrcB,
+			      uint32_t srcBLen,
+			      q15_t * pDst);
+
+  /**
+   * @brief Correlate Q31 sequences
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   * @return none.
+   */
+
+  void arm_correlate_q31(
+			  q31_t * pSrcA,
+			 uint32_t srcALen,
+			  q31_t * pSrcB,
+			 uint32_t srcBLen,
+			 q31_t * pDst);
+
+  /**
+   * @brief Correlate Q31 sequences (fast version)
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   * @return none.
+   */
+
+  void arm_correlate_fast_q31(
+			       q31_t * pSrcA,
+			      uint32_t srcALen,
+			       q31_t * pSrcB,
+			      uint32_t srcBLen,
+			      q31_t * pDst);
+
+  /**
+   * @brief Correlation of Q7 sequences.
+   * @param[in] *pSrcA points to the first input sequence.
+   * @param[in] srcALen length of the first input sequence.
+   * @param[in] *pSrcB points to the second input sequence.
+   * @param[in] srcBLen length of the second input sequence.
+   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   * @return none.
+   */
+
+  void arm_correlate_q7(
+			 q7_t * pSrcA,
+			uint32_t srcALen,
+			 q7_t * pSrcB,
+			uint32_t srcBLen,
+			q7_t * pDst);
+
+  /**
+   * @brief Instance structure for the floating-point sparse FIR filter.
+   */
+  typedef struct
+  {
+    uint16_t numTaps;             /**< number of coefficients in the filter. */
+    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+    float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
+    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+  } arm_fir_sparse_instance_f32;
+
+  /**
+   * @brief Instance structure for the Q31 sparse FIR filter.
+   */
+
+  typedef struct
+  {
+    uint16_t numTaps;             /**< number of coefficients in the filter. */
+    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+    q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
+    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+  } arm_fir_sparse_instance_q31;
+
+  /**
+   * @brief Instance structure for the Q15 sparse FIR filter.
+   */
+
+  typedef struct
+  {
+    uint16_t numTaps;             /**< number of coefficients in the filter. */
+    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+    q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
+    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+  } arm_fir_sparse_instance_q15;
+
+  /**
+   * @brief Instance structure for the Q7 sparse FIR filter.
+   */
+
+  typedef struct
+  {
+    uint16_t numTaps;             /**< number of coefficients in the filter. */
+    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
+    q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
+    q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
+    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
+    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
+  } arm_fir_sparse_instance_q7;
+
+  /**
+   * @brief Processing function for the floating-point sparse FIR filter.
+   * @param[in]  *S          points to an instance of the floating-point sparse FIR structure.
+   * @param[in]  *pSrc       points to the block of input data.
+   * @param[out] *pDst       points to the block of output data
+   * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
+   * @param[in]  blockSize   number of input samples to process per call.
+   * @return none.
+   */
+
+  void arm_fir_sparse_f32(
+			  arm_fir_sparse_instance_f32 * S,
+			   float32_t * pSrc,
+			  float32_t * pDst,
+			  float32_t * pScratchIn,
+			  uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the floating-point sparse FIR filter.
+   * @param[in,out] *S         points to an instance of the floating-point sparse FIR structure.
+   * @param[in]     numTaps    number of nonzero coefficients in the filter.
+   * @param[in]     *pCoeffs   points to the array of filter coefficients.
+   * @param[in]     *pState    points to the state buffer.
+   * @param[in]     *pTapDelay points to the array of offset times.
+   * @param[in]     maxDelay   maximum offset time supported.
+   * @param[in]     blockSize  number of samples that will be processed per block.
+   * @return none
+   */
+
+  void arm_fir_sparse_init_f32(
+			       arm_fir_sparse_instance_f32 * S,
+			       uint16_t numTaps,
+			       float32_t * pCoeffs,
+			       float32_t * pState,
+			       int32_t * pTapDelay,
+			       uint16_t maxDelay,
+			       uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the Q31 sparse FIR filter.
+   * @param[in]  *S          points to an instance of the Q31 sparse FIR structure.
+   * @param[in]  *pSrc       points to the block of input data.
+   * @param[out] *pDst       points to the block of output data
+   * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
+   * @param[in]  blockSize   number of input samples to process per call.
+   * @return none.
+   */
+
+  void arm_fir_sparse_q31(
+			  arm_fir_sparse_instance_q31 * S,
+			   q31_t * pSrc,
+			  q31_t * pDst,
+			  q31_t * pScratchIn,
+			  uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the Q31 sparse FIR filter.
+   * @param[in,out] *S         points to an instance of the Q31 sparse FIR structure.
+   * @param[in]     numTaps    number of nonzero coefficients in the filter.
+   * @param[in]     *pCoeffs   points to the array of filter coefficients.
+   * @param[in]     *pState    points to the state buffer.
+   * @param[in]     *pTapDelay points to the array of offset times.
+   * @param[in]     maxDelay   maximum offset time supported.
+   * @param[in]     blockSize  number of samples that will be processed per block.
+   * @return none
+   */
+
+  void arm_fir_sparse_init_q31(
+			       arm_fir_sparse_instance_q31 * S,
+			       uint16_t numTaps,
+			       q31_t * pCoeffs,
+			       q31_t * pState,
+			       int32_t * pTapDelay,
+			       uint16_t maxDelay,
+			       uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the Q15 sparse FIR filter.
+   * @param[in]  *S           points to an instance of the Q15 sparse FIR structure.
+   * @param[in]  *pSrc        points to the block of input data.
+   * @param[out] *pDst        points to the block of output data
+   * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
+   * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
+   * @param[in]  blockSize    number of input samples to process per call.
+   * @return none.
+   */
+
+  void arm_fir_sparse_q15(
+			  arm_fir_sparse_instance_q15 * S,
+			   q15_t * pSrc,
+			  q15_t * pDst,
+			  q15_t * pScratchIn,
+			  q31_t * pScratchOut,
+			  uint32_t blockSize);
+
+
+  /**
+   * @brief  Initialization function for the Q15 sparse FIR filter.
+   * @param[in,out] *S         points to an instance of the Q15 sparse FIR structure.
+   * @param[in]     numTaps    number of nonzero coefficients in the filter.
+   * @param[in]     *pCoeffs   points to the array of filter coefficients.
+   * @param[in]     *pState    points to the state buffer.
+   * @param[in]     *pTapDelay points to the array of offset times.
+   * @param[in]     maxDelay   maximum offset time supported.
+   * @param[in]     blockSize  number of samples that will be processed per block.
+   * @return none
+   */
+
+  void arm_fir_sparse_init_q15(
+			       arm_fir_sparse_instance_q15 * S,
+			       uint16_t numTaps,
+			       q15_t * pCoeffs,
+			       q15_t * pState,
+			       int32_t * pTapDelay,
+			       uint16_t maxDelay,
+			       uint32_t blockSize);
+
+  /**
+   * @brief Processing function for the Q7 sparse FIR filter.
+   * @param[in]  *S           points to an instance of the Q7 sparse FIR structure.
+   * @param[in]  *pSrc        points to the block of input data.
+   * @param[out] *pDst        points to the block of output data
+   * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
+   * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
+   * @param[in]  blockSize    number of input samples to process per call.
+   * @return none.
+   */
+
+  void arm_fir_sparse_q7(
+			 arm_fir_sparse_instance_q7 * S,
+			  q7_t * pSrc,
+			 q7_t * pDst,
+			 q7_t * pScratchIn,
+			 q31_t * pScratchOut,
+			 uint32_t blockSize);
+
+  /**
+   * @brief  Initialization function for the Q7 sparse FIR filter.
+   * @param[in,out] *S         points to an instance of the Q7 sparse FIR structure.
+   * @param[in]     numTaps    number of nonzero coefficients in the filter.
+   * @param[in]     *pCoeffs   points to the array of filter coefficients.
+   * @param[in]     *pState    points to the state buffer.
+   * @param[in]     *pTapDelay points to the array of offset times.
+   * @param[in]     maxDelay   maximum offset time supported.
+   * @param[in]     blockSize  number of samples that will be processed per block.
+   * @return none
+   */
+
+  void arm_fir_sparse_init_q7(
+			      arm_fir_sparse_instance_q7 * S,
+			      uint16_t numTaps,
+			      q7_t * pCoeffs,
+			      q7_t * pState,
+			      int32_t *pTapDelay,
+			      uint16_t maxDelay,
+			      uint32_t blockSize);
+
+
+  /*
+   * @brief  Floating-point sin_cos function.
+   * @param[in]  theta    input value in degrees 
+   * @param[out] *pSinVal points to the processed sine output. 
+   * @param[out] *pCosVal points to the processed cos output. 
+   * @return none.
+   */
+
+  void arm_sin_cos_f32(
+		       float32_t theta,
+		       float32_t *pSinVal,
+		       float32_t *pCcosVal);
+
+  /*
+   * @brief  Q31 sin_cos function.
+   * @param[in]  theta    scaled input value in degrees 
+   * @param[out] *pSinVal points to the processed sine output. 
+   * @param[out] *pCosVal points to the processed cosine output. 
+   * @return none.
+   */
+
+  void arm_sin_cos_q31(
+		       q31_t theta,
+		       q31_t *pSinVal,
+		       q31_t *pCosVal);
+
+
+  /**
+   * @brief  Floating-point complex conjugate.
+   * @param[in]  *pSrc points to the input vector
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  numSamples number of complex samples in each vector
+   * @return none.
+   */
+
+  void arm_cmplx_conj_f32(
+			   float32_t * pSrc,
+			  float32_t * pDst,
+			  uint32_t numSamples);
+
+  /**
+   * @brief  Q31 complex conjugate.
+   * @param[in]  *pSrc points to the input vector
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  numSamples number of complex samples in each vector
+   * @return none.
+   */
+
+  void arm_cmplx_conj_q31(
+			   q31_t * pSrc,
+			  q31_t * pDst,
+			  uint32_t numSamples);
+
+  /**
+   * @brief  Q15 complex conjugate.
+   * @param[in]  *pSrc points to the input vector
+   * @param[out]  *pDst points to the output vector
+   * @param[in]  numSamples number of complex samples in each vector
+   * @return none.
+   */
+
+  void arm_cmplx_conj_q15(
+			   q15_t * pSrc,
+			  q15_t * pDst,
+			  uint32_t numSamples);
+
+
+
+  /**
+   * @brief  Floating-point complex magnitude squared
+   * @param[in]  *pSrc points to the complex input vector
+   * @param[out]  *pDst points to the real output vector
+   * @param[in]  numSamples number of complex samples in the input vector
+   * @return none.
+   */
+
+  void arm_cmplx_mag_squared_f32(
+				  float32_t * pSrc,
+				 float32_t * pDst,
+				 uint32_t numSamples);
+
+  /**
+   * @brief  Q31 complex magnitude squared
+   * @param[in]  *pSrc points to the complex input vector
+   * @param[out]  *pDst points to the real output vector
+   * @param[in]  numSamples number of complex samples in the input vector
+   * @return none.
+   */
+
+  void arm_cmplx_mag_squared_q31(
+				  q31_t * pSrc,
+				 q31_t * pDst,
+				 uint32_t numSamples);
+
+  /**
+   * @brief  Q15 complex magnitude squared
+   * @param[in]  *pSrc points to the complex input vector
+   * @param[out]  *pDst points to the real output vector
+   * @param[in]  numSamples number of complex samples in the input vector
+   * @return none.
+   */
+
+  void arm_cmplx_mag_squared_q15(
+				  q15_t * pSrc,
+				 q15_t * pDst,
+				 uint32_t numSamples);
+
+
+ /**
+   * @ingroup groupController
+   */
+
+  /**
+   * @defgroup PID PID Motor Control
+   *
+   * A Proportional Integral Derivative (PID) controller is a generic feedback control 
+   * loop mechanism widely used in industrial control systems.
+   * A PID controller is the most commonly used type of feedback controller.
+   *
+   * This set of functions implements (PID) controllers
+   * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
+   * of data and each call to the function returns a single processed value.
+   * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
+   * is the input sample value. The functions return the output value.
+   *
+   * \par Algorithm:
+   * <pre>
+   *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
+   *    A0 = Kp + Ki + Kd
+   *    A1 = (-Kp ) - (2 * Kd )
+   *    A2 = Kd  </pre>
+   *
+   * \par
+   * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
+   * 
+   * \par 
+   * \image html PID.gif "Proportional Integral Derivative Controller" 
+   *
+   * \par
+   * The PID controller calculates an "error" value as the difference between
+   * the measured output and the reference input.
+   * The controller attempts to minimize the error by adjusting the process control inputs.  
+   * The proportional value determines the reaction to the current error, 
+   * the integral value determines the reaction based on the sum of recent errors, 
+   * and the derivative value determines the reaction based on the rate at which the error has been changing.
+   *
+   * \par Instance Structure 
+   * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure. 
+   * A separate instance structure must be defined for each PID Controller. 
+   * There are separate instance structure declarations for each of the 3 supported data types. 
+   * 
+   * \par Reset Functions 
+   * There is also an associated reset function for each data type which clears the state array. 
+   *
+   * \par Initialization Functions 
+   * There is also an associated initialization function for each data type. 
+   * The initialization function performs the following operations: 
+   * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
+   * - Zeros out the values in the state buffer.   
+   * 
+   * \par 
+   * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function. 
+   *
+   * \par Fixed-Point Behavior 
+   * Care must be taken when using the fixed-point versions of the PID Controller functions. 
+   * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered. 
+   * Refer to the function specific documentation below for usage guidelines. 
+   */
+
+  /**
+   * @addtogroup PID
+   * @{
+   */
+
+  /**
+   * @brief  Process function for the floating-point PID Control.
+   * @param[in,out] *S is an instance of the floating-point PID Control structure
+   * @param[in] in input sample to process
+   * @return out processed output sample.
+   */
+
+
+  static __INLINE float32_t arm_pid_f32(
+					arm_pid_instance_f32 * S,
+					float32_t in)
+  {
+    float32_t out;
+
+    /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
+    out = (S->A0 * in) +
+      (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
+
+    /* Update state */
+    S->state[1] = S->state[0];
+    S->state[0] = in;
+    S->state[2] = out;
+
+    /* return to application */
+    return (out);
+
+  }
+
+  /**
+   * @brief  Process function for the Q31 PID Control.
+   * @param[in,out] *S points to an instance of the Q31 PID Control structure
+   * @param[in] in input sample to process
+   * @return out processed output sample.
+   *
+   * <b>Scaling and Overflow Behavior:</b> 
+   * \par 
+   * The function is implemented using an internal 64-bit accumulator. 
+   * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit. 
+   * Thus, if the accumulator result overflows it wraps around rather than clip. 
+   * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions. 
+   * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format. 
+   */
+
+  static __INLINE q31_t arm_pid_q31(
+				    arm_pid_instance_q31 * S,
+				    q31_t in)
+  {
+    q63_t acc;
+	q31_t out;
+
+    /* acc = A0 * x[n]  */
+    acc = (q63_t) S->A0 * in;
+
+    /* acc += A1 * x[n-1] */
+    acc += (q63_t) S->A1 * S->state[0];
+
+    /* acc += A2 * x[n-2]  */
+    acc += (q63_t) S->A2 * S->state[1];
+
+    /* convert output to 1.31 format to add y[n-1] */
+    out = (q31_t) (acc >> 31u);
+
+    /* out += y[n-1] */
+    out += S->state[2];
+
+    /* Update state */
+    S->state[1] = S->state[0];
+    S->state[0] = in;
+    S->state[2] = out;
+
+    /* return to application */
+    return (out);
+
+  }
+
+  /**
+   * @brief  Process function for the Q15 PID Control.
+   * @param[in,out] *S points to an instance of the Q15 PID Control structure
+   * @param[in] in input sample to process
+   * @return out processed output sample.
+   *
+   * <b>Scaling and Overflow Behavior:</b> 
+   * \par 
+   * The function is implemented using a 64-bit internal accumulator. 
+   * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result. 
+   * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format. 
+   * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved. 
+   * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits. 
+   * Lastly, the accumulator is saturated to yield a result in 1.15 format.
+   */
+
+  static __INLINE q15_t arm_pid_q15(
+				    arm_pid_instance_q15 * S,
+				    q15_t in)
+  {
+    q63_t acc;
+    q15_t out;
+
+    /* Implementation of PID controller */
+
+    /* acc = A0 * x[n]  */
+    acc = (q31_t) __SMUAD(S->A0, in);
+
+    /* acc += A1 * x[n-1] + A2 * x[n-2]  */
+    acc = __SMLALD(S->A1, (q31_t)__SIMD32(S->state), acc);
+
+    /* acc += y[n-1] */
+    acc += (q31_t) S->state[2] << 15;
+
+    /* saturate the output */
+    out = (q15_t) (__SSAT((acc >> 15), 16));
+
+    /* Update state */
+    S->state[1] = S->state[0];
+    S->state[0] = in;
+    S->state[2] = out;
+
+    /* return to application */
+    return (out);
+
+  }
+  
+  /**
+   * @} end of PID group
+   */
+
+
+  /**
+   * @brief Floating-point matrix inverse.
+   * @param[in]  *src points to the instance of the input floating-point matrix structure.
+   * @param[out] *dst points to the instance of the output floating-point matrix structure.
+   * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
+   * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
+   */
+
+  arm_status arm_mat_inverse_f32(
+				 const arm_matrix_instance_f32 * src,
+				 arm_matrix_instance_f32 * dst);
+
+  
+ 
+  /**
+   * @ingroup groupController
+   */
+
+
+  /**
+   * @defgroup clarke Vector Clarke Transform
+   * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
+   * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
+   * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
+   * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
+   * \image html clarke.gif Stator current space vector and its components in (a,b).
+   * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
+   * can be calculated using only <code>Ia</code> and <code>Ib</code>.
+   *
+   * The function operates on a single sample of data and each call to the function returns the processed output. 
+   * The library provides separate functions for Q31 and floating-point data types.
+   * \par Algorithm
+   * \image html clarkeFormula.gif
+   * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
+   * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
+   * \par Fixed-Point Behavior
+   * Care must be taken when using the Q31 version of the Clarke transform.
+   * In particular, the overflow and saturation behavior of the accumulator used must be considered.
+   * Refer to the function specific documentation below for usage guidelines.
+   */
+
+  /**
+   * @addtogroup clarke
+   * @{
+   */
+
+  /**
+   *
+   * @brief  Floating-point Clarke transform
+   * @param[in]       Ia       input three-phase coordinate <code>a</code>
+   * @param[in]       Ib       input three-phase coordinate <code>b</code>
+   * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
+   * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
+   * @return none.
+   */
+
+  static __INLINE void arm_clarke_f32(
+				      float32_t Ia,
+				      float32_t Ib,
+				      float32_t * pIalpha,
+				      float32_t * pIbeta)
+  {
+    /* Calculate pIalpha using the equation, pIalpha = Ia */
+    *pIalpha = Ia;
+
+    /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
+    *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
+
+  }
+
+  /**
+   * @brief  Clarke transform for Q31 version
+   * @param[in]       Ia       input three-phase coordinate <code>a</code>
+   * @param[in]       Ib       input three-phase coordinate <code>b</code>
+   * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
+   * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
+   * @return none.
+   *
+   * <b>Scaling and Overflow Behavior:</b>
+   * \par
+   * The function is implemented using an internal 32-bit accumulator.
+   * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+   * There is saturation on the addition, hence there is no risk of overflow.
+   */
+
+  static __INLINE void arm_clarke_q31(
+				      q31_t Ia,
+				      q31_t Ib,
+				      q31_t * pIalpha,
+				      q31_t * pIbeta)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
+
+    /* Calculating pIalpha from Ia by equation pIalpha = Ia */
+    *pIalpha = Ia;
+
+    /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
+    product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
+
+    /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
+    product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
+
+    /* pIbeta is calculated by adding the intermediate products */
+    *pIbeta = __QADD(product1, product2);
+  }
+
+  /**
+   * @} end of clarke group
+   */
+
+  /**
+   * @brief  Converts the elements of the Q7 vector to Q31 vector.
+   * @param[in]  *pSrc     input pointer
+   * @param[out]  *pDst    output pointer
+   * @param[in]  blockSize number of samples to process
+   * @return none.
+   */
+  void arm_q7_to_q31(
+		     q7_t * pSrc,
+		     q31_t * pDst,
+		     uint32_t blockSize);
+
+
+ 
+
+  /**
+   * @ingroup groupController
+   */
+
+  /**
+   * @defgroup inv_clarke Vector Inverse Clarke Transform
+   * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
+   * 
+   * The function operates on a single sample of data and each call to the function returns the processed output. 
+   * The library provides separate functions for Q31 and floating-point data types.
+   * \par Algorithm
+   * \image html clarkeInvFormula.gif
+   * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
+   * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
+   * \par Fixed-Point Behavior
+   * Care must be taken when using the Q31 version of the Clarke transform.
+   * In particular, the overflow and saturation behavior of the accumulator used must be considered.
+   * Refer to the function specific documentation below for usage guidelines.
+   */
+
+  /**
+   * @addtogroup inv_clarke
+   * @{
+   */
+
+   /**
+   * @brief  Floating-point Inverse Clarke transform
+   * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
+   * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
+   * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
+   * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
+   * @return none.
+   */
+
+
+  static __INLINE void arm_inv_clarke_f32(
+					  float32_t Ialpha,
+					  float32_t Ibeta,
+					  float32_t * pIa,
+					  float32_t * pIb)
+  {
+    /* Calculating pIa from Ialpha by equation pIa = Ialpha */
+    *pIa = Ialpha;
+
+    /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
+    *pIb = -0.5 * Ialpha + (float32_t) 0.8660254039 *Ibeta;
+
+  }
+
+  /**
+   * @brief  Inverse Clarke transform for Q31 version 
+   * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
+   * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
+   * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
+   * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
+   * @return none.
+   *
+   * <b>Scaling and Overflow Behavior:</b>
+   * \par
+   * The function is implemented using an internal 32-bit accumulator.
+   * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+   * There is saturation on the subtraction, hence there is no risk of overflow.
+   */
+
+  static __INLINE void arm_inv_clarke_q31(
+					  q31_t Ialpha,
+					  q31_t Ibeta,
+					  q31_t * pIa,
+					  q31_t * pIb)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
+
+    /* Calculating pIa from Ialpha by equation pIa = Ialpha */
+    *pIa = Ialpha;
+
+    /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
+    product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
+
+    /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
+    product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
+
+    /* pIb is calculated by subtracting the products */
+    *pIb = __QSUB(product2, product1);
+
+  }
+
+  /**
+   * @} end of inv_clarke group
+   */
+
+  /**
+   * @brief  Converts the elements of the Q7 vector to Q15 vector.
+   * @param[in]  *pSrc     input pointer
+   * @param[out] *pDst     output pointer
+   * @param[in]  blockSize number of samples to process
+   * @return none.
+   */
+  void arm_q7_to_q15(
+		      q7_t * pSrc,
+		     q15_t * pDst,
+		     uint32_t blockSize);
+
+  
+
+  /**
+   * @ingroup groupController
+   */
+
+  /**
+   * @defgroup park Vector Park Transform
+   *
+   * Forward Park transform converts the input two-coordinate vector to flux and torque components.
+   * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents 
+   * from the stationary to the moving reference frame and control the spatial relationship between 
+   * the stator vector current and rotor flux vector.
+   * If we consider the d axis aligned with the rotor flux, the diagram below shows the 
+   * current vector and the relationship from the two reference frames:
+   * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
+   *
+   * The function operates on a single sample of data and each call to the function returns the processed output. 
+   * The library provides separate functions for Q31 and floating-point data types.
+   * \par Algorithm
+   * \image html parkFormula.gif
+   * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,  
+   * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the 
+   * cosine and sine values of theta (rotor flux position).
+   * \par Fixed-Point Behavior
+   * Care must be taken when using the Q31 version of the Park transform.
+   * In particular, the overflow and saturation behavior of the accumulator used must be considered.
+   * Refer to the function specific documentation below for usage guidelines.
+   */
+
+  /**
+   * @addtogroup park
+   * @{
+   */
+
+  /**
+   * @brief Floating-point Park transform
+   * @param[in]       Ialpha input two-phase vector coordinate alpha
+   * @param[in]       Ibeta  input two-phase vector coordinate beta
+   * @param[out]      *pId   points to output	rotor reference frame d
+   * @param[out]      *pIq   points to output	rotor reference frame q
+   * @param[in]       sinVal sine value of rotation angle theta
+   * @param[in]       cosVal cosine value of rotation angle theta
+   * @return none.
+   *
+   * The function implements the forward Park transform.
+   *
+   */
+
+  static __INLINE void arm_park_f32(
+				    float32_t Ialpha,
+				    float32_t Ibeta,
+				    float32_t * pId,
+				    float32_t * pIq,
+				    float32_t sinVal,
+				    float32_t cosVal)
+  {
+    /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
+    *pId = Ialpha * cosVal + Ibeta * sinVal;
+
+    /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
+    *pIq = -Ialpha * sinVal + Ibeta * cosVal;
+
+  }
+
+  /**
+   * @brief  Park transform for Q31 version 
+   * @param[in]       Ialpha input two-phase vector coordinate alpha
+   * @param[in]       Ibeta  input two-phase vector coordinate beta
+   * @param[out]      *pId   points to output rotor reference frame d
+   * @param[out]      *pIq   points to output rotor reference frame q
+   * @param[in]       sinVal sine value of rotation angle theta
+   * @param[in]       cosVal cosine value of rotation angle theta
+   * @return none.
+   *
+   * <b>Scaling and Overflow Behavior:</b>
+   * \par
+   * The function is implemented using an internal 32-bit accumulator.
+   * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+   * There is saturation on the addition and subtraction, hence there is no risk of overflow.
+   */
+
+
+  static __INLINE void arm_park_q31(
+				    q31_t Ialpha,
+				    q31_t Ibeta,
+				    q31_t * pId,
+				    q31_t * pIq,
+				    q31_t sinVal,
+				    q31_t cosVal)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
+    q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
+
+    /* Intermediate product is calculated by (Ialpha * cosVal) */
+    product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
+
+    /* Intermediate product is calculated by (Ibeta * sinVal) */
+    product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
+
+
+    /* Intermediate product is calculated by (Ialpha * sinVal) */
+    product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
+
+    /* Intermediate product is calculated by (Ibeta * cosVal) */
+    product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
+
+    /* Calculate pId by adding the two intermediate products 1 and 2 */
+    *pId = __QADD(product1, product2);
+
+    /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
+    *pIq = __QSUB(product4, product3);
+  }
+
+  /**
+   * @} end of park group
+   */
+
+  /**
+   * @brief  Converts the elements of the Q7 vector to floating-point vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[out]  *pDst is output pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @return none.
+   */
+  void arm_q7_to_float(
+		        q7_t * pSrc,
+		       float32_t * pDst,
+		       uint32_t blockSize);
+
+ 
+  /**
+   * @ingroup groupController
+   */
+
+  /**
+   * @defgroup inv_park Vector Inverse Park transform
+   * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
+   *
+   * The function operates on a single sample of data and each call to the function returns the processed output. 
+   * The library provides separate functions for Q31 and floating-point data types.
+   * \par Algorithm
+   * \image html parkInvFormula.gif
+   * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,  
+   * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the 
+   * cosine and sine values of theta (rotor flux position).
+   * \par Fixed-Point Behavior
+   * Care must be taken when using the Q31 version of the Park transform.
+   * In particular, the overflow and saturation behavior of the accumulator used must be considered.
+   * Refer to the function specific documentation below for usage guidelines.
+   */
+
+  /**
+   * @addtogroup inv_park
+   * @{
+   */
+
+   /**
+   * @brief  Floating-point Inverse Park transform
+   * @param[in]       Id        input coordinate of rotor reference frame d
+   * @param[in]       Iq        input coordinate of rotor reference frame q
+   * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
+   * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
+   * @param[in]       sinVal    sine value of rotation angle theta
+   * @param[in]       cosVal    cosine value of rotation angle theta
+   * @return none.
+   */
+
+  static __INLINE void arm_inv_park_f32(
+					float32_t Id,
+					float32_t Iq,
+					float32_t * pIalpha,
+					float32_t * pIbeta,
+					float32_t sinVal,
+					float32_t cosVal)
+  {
+    /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
+    *pIalpha = Id * cosVal - Iq * sinVal;
+
+    /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
+    *pIbeta = Id * sinVal + Iq * cosVal;
+
+  }
+
+
+  /**
+   * @brief  Inverse Park transform for	Q31 version 
+   * @param[in]       Id        input coordinate of rotor reference frame d
+   * @param[in]       Iq        input coordinate of rotor reference frame q
+   * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
+   * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
+   * @param[in]       sinVal    sine value of rotation angle theta
+   * @param[in]       cosVal    cosine value of rotation angle theta
+   * @return none.
+   *
+   * <b>Scaling and Overflow Behavior:</b>
+   * \par
+   * The function is implemented using an internal 32-bit accumulator.
+   * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
+   * There is saturation on the addition, hence there is no risk of overflow.
+   */
+
+
+  static __INLINE void arm_inv_park_q31(
+					q31_t Id,
+					q31_t Iq,
+					q31_t * pIalpha,
+					q31_t * pIbeta,
+					q31_t sinVal,
+					q31_t cosVal)
+  {
+    q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
+    q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
+
+    /* Intermediate product is calculated by (Id * cosVal) */
+    product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
+
+    /* Intermediate product is calculated by (Iq * sinVal) */
+    product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
+
+
+    /* Intermediate product is calculated by (Id * sinVal) */
+    product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
+
+    /* Intermediate product is calculated by (Iq * cosVal) */
+    product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
+
+    /* Calculate pIalpha by using the two intermediate products 1 and 2 */
+    *pIalpha = __QSUB(product1, product2);
+
+    /* Calculate pIbeta by using the two intermediate products 3 and 4 */
+    *pIbeta = __QADD(product4, product3);
+
+  }
+
+  /**
+   * @} end of Inverse park group
+   */
+
+   
+  /**
+   * @brief  Converts the elements of the Q31 vector to floating-point vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[out]  *pDst is output pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @return none.
+   */
+  void arm_q31_to_float(
+			 q31_t * pSrc,
+			float32_t * pDst,
+			uint32_t blockSize);
+
+  /**
+   * @ingroup groupInterpolation
+   */
+
+  /**
+   * @defgroup LinearInterpolate Linear Interpolation
+   *
+   * Linear interpolation is a method of curve fitting using linear polynomials.
+   * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
+   *
+   * \par 
+   * \image html LinearInterp.gif "Linear interpolation"
+   *
+   * \par
+   * A  Linear Interpolate function calculates an output value(y), for the input(x)
+   * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
+   *
+   * \par Algorithm:
+   * <pre>
+   *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
+   *       where x0, x1 are nearest values of input x
+   *             y0, y1 are nearest values to output y
+   * </pre>
+   *
+   * \par
+   * This set of functions implements Linear interpolation process
+   * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
+   * sample of data and each call to the function returns a single processed value.
+   * <code>S</code> points to an instance of the Linear Interpolate function data structure.
+   * <code>x</code> is the input sample value. The functions returns the output value.
+   * 
+   * \par
+   * if x is outside of the table boundary, Linear interpolation returns first value of the table 
+   * if x is below input range and returns last value of table if x is above range.  
+   */
+
+  /**
+   * @addtogroup LinearInterpolate
+   * @{
+   */
+
+  /**
+   * @brief  Process function for the floating-point Linear Interpolation Function.
+   * @param[in,out] *S is an instance of the floating-point Linear Interpolation structure
+   * @param[in] x input sample to process
+   * @return y processed output sample.
+   *
+   */
+
+  static __INLINE float32_t arm_linear_interp_f32(
+						  arm_linear_interp_instance_f32 * S,
+						  float32_t x)
+  {
+
+	  float32_t y;
+	  float32_t x0, x1;						/* Nearest input values */
+	  float32_t y0, y1;	  					/* Nearest output values */
+	  float32_t xSpacing = S->xSpacing;		/* spacing between input values */
+	  int32_t i;  							/* Index variable */
+	  float32_t *pYData = S->pYData;	    /* pointer to output table */
+
+	  /* Calculation of index */
+	  i =   (x - S->x1) / xSpacing;
+
+	  if(i < 0)
+	  {
+	     /* Iniatilize output for below specified range as least output value of table */
+		 y = pYData[0];
+	  }
+	  else if(i >= S->nValues)
+	  {
+	  	  /* Iniatilize output for above specified range as last output value of table */
+	  	  y = pYData[S->nValues-1];	
+	  }
+	  else
+	  {	 
+	  	  /* Calculation of nearest input values */
+		  x0 = S->x1 + i * xSpacing;
+		  x1 = S->x1 + (i +1) * xSpacing;
+		 
+		 /* Read of nearest output values */
+		  y0 = pYData[i];
+		  y1 = pYData[i + 1];
+		
+		  /* Calculation of output */
+		  y = y0 + (x - x0) * ((y1 - y0)/(x1-x0));	
+		
+	  }
+
+      /* returns output value */
+	  return (y);
+  }
+
+   /**
+   *
+   * @brief  Process function for the Q31 Linear Interpolation Function.
+   * @param[in] *pYData  pointer to Q31 Linear Interpolation table
+   * @param[in] x input sample to process
+   * @param[in] nValues number of table values
+   * @return y processed output sample.
+   *
+   * \par
+   * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
+   * This function can support maximum of table size 2^12.
+   *
+   */
+
+
+  static __INLINE q31_t arm_linear_interp_q31(q31_t *pYData,
+					      q31_t x, uint32_t nValues)
+  {
+    q31_t y;                                   /* output */
+    q31_t y0, y1;                                /* Nearest output values */
+    q31_t fract;                                 /* fractional part */
+    int32_t index;                              /* Index to read nearest output values */
+    
+    /* Input is in 12.20 format */
+    /* 12 bits for the table index */
+    /* Index value calculation */
+    index = ((x & 0xFFF00000) >> 20);
+
+	if(index >= (nValues - 1))
+	{
+		return(pYData[nValues - 1]);
+	}
+	else if(index < 0)
+	{
+		return(pYData[0]);
+	}
+	else
+	{
+
+	    /* 20 bits for the fractional part */
+	    /* shift left by 11 to keep fract in 1.31 format */
+	    fract = (x & 0x000FFFFF) << 11;
+	
+	    /* Read two nearest output values from the index in 1.31(q31) format */
+	    y0 = pYData[index];
+	    y1 = pYData[index + 1u];
+	
+	    /* Calculation of y0 * (1-fract) and y is in 2.30 format */
+	    y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
+	
+	    /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
+	    y += ((q31_t) (((q63_t) y1 * fract) >> 32));
+	
+	    /* Convert y to 1.31 format */
+	    return (y << 1u);
+
+	}
+
+  }
+
+  /**
+   *
+   * @brief  Process function for the Q15 Linear Interpolation Function.
+   * @param[in] *pYData  pointer to Q15 Linear Interpolation table
+   * @param[in] x input sample to process
+   * @param[in] nValues number of table values
+   * @return y processed output sample.
+   *
+   * \par
+   * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
+   * This function can support maximum of table size 2^12. 
+   *
+   */
+
+
+  static __INLINE q15_t arm_linear_interp_q15(q15_t *pYData, q31_t x, uint32_t nValues)
+  {
+    q63_t y;                                   /* output */
+    q15_t y0, y1;                              /* Nearest output values */
+    q31_t fract;                               /* fractional part */
+    int32_t index;                            /* Index to read nearest output values */ 
+
+    /* Input is in 12.20 format */
+    /* 12 bits for the table index */
+    /* Index value calculation */
+    index = ((x & 0xFFF00000) >> 20u); 
+
+	if(index >= (nValues - 1))
+	{
+		return(pYData[nValues - 1]);
+	}
+	else if(index < 0)
+	{
+		return(pYData[0]);
+	}
+	else
+	{	
+	    /* 20 bits for the fractional part */
+	    /* fract is in 12.20 format */
+	    fract = (x & 0x000FFFFF);
+	
+	    /* Read two nearest output values from the index */
+	    y0 = pYData[index];
+	    y1 = pYData[index + 1u];
+	
+	    /* Calculation of y0 * (1-fract) and y is in 13.35 format */
+	    y = ((q63_t) y0 * (0xFFFFF - fract));
+	
+	    /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
+	    y += ((q63_t) y1 * (fract));
+	
+	    /* convert y to 1.15 format */
+	    return (y >> 20);
+	}
+
+
+  }
+
+  /**
+   *
+   * @brief  Process function for the Q7 Linear Interpolation Function.
+   * @param[in] *pYData  pointer to Q7 Linear Interpolation table
+   * @param[in] x input sample to process
+   * @param[in] nValues number of table values
+   * @return y processed output sample.
+   *
+   * \par
+   * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
+   * This function can support maximum of table size 2^12.
+   */
+
+
+  static __INLINE q7_t arm_linear_interp_q7(q7_t *pYData, q31_t x,  uint32_t nValues)
+  {
+    q31_t y;                                   /* output */
+    q7_t y0, y1;                                 /* Nearest output values */
+    q31_t fract;                                 /* fractional part */
+    int32_t index;                              /* Index to read nearest output values */
+    
+    /* Input is in 12.20 format */
+    /* 12 bits for the table index */
+    /* Index value calculation */
+    index = ((x & 0xFFF00000) >> 20u);
+
+
+    if(index >= (nValues - 1))
+	{
+		return(pYData[nValues - 1]);
+	}
+	else if(index < 0)
+	{
+		return(pYData[0]);
+	}
+	else
+	{
+
+	    /* 20 bits for the fractional part */
+	    /* fract is in 12.20 format */
+	    fract = (x & 0x000FFFFF);
+	
+	    /* Read two nearest output values from the index and are in 1.7(q7) format */
+	    y0 = pYData[index];
+	    y1 = pYData[index + 1u];
+	
+	    /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
+	    y = ((y0 * (0xFFFFF - fract)));
+	
+	    /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
+	    y += (y1 * fract);
+	
+	    /* convert y to 1.7(q7) format */
+	    return (y >> 20u);
+
+	}
+
+  }
+  /**
+   * @} end of LinearInterpolate group
+   */
+
+  /**
+   * @brief  Fast approximation to the trigonometric sine function for floating-point data.
+   * @param[in] x input value in radians.
+   * @return  sin(x).
+   */
+
+  float32_t arm_sin_f32(
+			 float32_t x);
+
+  /**
+   * @brief  Fast approximation to the trigonometric sine function for Q31 data.
+   * @param[in] x Scaled input value in radians.
+   * @return  sin(x).
+   */
+
+  q31_t arm_sin_q31(
+		     q31_t x);
+
+  /**
+   * @brief  Fast approximation to the trigonometric sine function for Q15 data.
+   * @param[in] x Scaled input value in radians.
+   * @return  sin(x).
+   */
+
+  q15_t arm_sin_q15(
+		     q15_t x);
+
+  /**
+   * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
+   * @param[in] x input value in radians.
+   * @return  cos(x).
+   */
+
+  float32_t arm_cos_f32(
+			 float32_t x);
+
+  /**
+   * @brief Fast approximation to the trigonometric cosine function for Q31 data.
+   * @param[in] x Scaled input value in radians.
+   * @return  cos(x).
+   */
+
+  q31_t arm_cos_q31(
+		     q31_t x);
+
+  /**
+   * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
+   * @param[in] x Scaled input value in radians.
+   * @return  cos(x).
+   */
+
+  q15_t arm_cos_q15(
+		     q15_t x);
+
+
+  /**
+   * @ingroup groupFastMath
+   */
+
+
+  /**
+   * @defgroup SQRT Square Root
+   *
+   * Computes the square root of a number.
+   * There are separate functions for Q15, Q31, and floating-point data types.  
+   * The square root function is computed using the Newton-Raphson algorithm.
+   * This is an iterative algorithm of the form:
+   * <pre>
+   *      x1 = x0 - f(x0)/f'(x0)
+   * </pre>
+   * where <code>x1</code> is the current estimate,
+   * <code>x0</code> is the previous estimate and
+   * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
+   * For the square root function, the algorithm reduces to:
+   * <pre>
+   *     x0 = in/2                         [initial guess]
+   *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
+   * </pre>
+   */
+
+
+  /**
+   * @addtogroup SQRT
+   * @{
+   */
+
+  /**
+   * @brief  Floating-point square root function.
+   * @param[in]  in     input value.
+   * @param[out] *pOut  square root of input value.
+   * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
+   * <code>in</code> is negative value and returns zero output for negative values.
+   */
+
+  static __INLINE arm_status  arm_sqrt_f32(
+					  float32_t in, float32_t *pOut)
+  {
+    float32_t out;
+    float32_t prevOut;
+
+	if(in > 0)
+	{
+	    /* Take initial guess as half the input */
+	    prevOut = in / 2;
+	
+	    /* run for ten iterations */
+	    out = 0.5f * (prevOut + (in / prevOut));
+	    prevOut = 0.5f * (out + (in / out));
+	
+	    /* Third iteration */
+	    out = 0.5f * (prevOut + (in / prevOut));
+	    prevOut = 0.5f * (out + (in / out));
+	
+	    /* Fifth iteration */
+	    out = 0.5f * (prevOut + (in / prevOut));
+	    prevOut = 0.5f * (out + (in / out));
+	
+	    /* Seventh iteration */
+	    out = 0.5f * (prevOut + (in / prevOut));
+	    prevOut = 0.5f * (out + (in / out));
+	    out = 0.5f * (prevOut + (in / prevOut));
+	
+	    /* tenth iteration */
+	    *pOut = 0.5f * (out + (in / out));
+		return (ARM_MATH_SUCCESS);
+	}
+	else
+	{
+		*pOut = 0.0f;
+		return (ARM_MATH_ARGUMENT_ERROR);
+	}
+
+  }
+
+
+  /**
+   * @brief Q31 square root function.
+   * @param[in]   in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
+   * @param[out]  *pOut square root of input value.
+   * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
+   * <code>in</code> is negative value and returns zero output for negative values.
+   */
+  arm_status arm_sqrt_q31(
+		      q31_t in, q31_t *pOut);
+
+  /**
+   * @brief  Q15 square root function.
+   * @param[in]   in     input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
+   * @param[out]  *pOut  square root of input value.
+   * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
+   * <code>in</code> is negative value and returns zero output for negative values.
+   */
+  arm_status arm_sqrt_q15(
+		      q15_t in, q15_t *pOut);
+
+  /**
+   * @} end of SQRT group
+   */
+
+
+
+
+
+
+  /**
+   * @brief floating-point Circular write function.
+   */
+
+  static __INLINE void arm_circularWrite_f32(
+					     int32_t * circBuffer,
+					     int32_t L,
+					     uint16_t * writeOffset,
+					     int32_t bufferInc,
+					     const int32_t * src,
+					     int32_t srcInc,
+					     uint32_t blockSize)
+  {
+    uint32_t i = 0u;
+    int32_t wOffset;
+
+    /* Copy the value of Index pointer that points
+     * to the current location where the input samples to be copied */
+    wOffset = *writeOffset;
+
+    /* Loop over the blockSize */
+    i = blockSize;
+
+    while(i > 0u)
+      {
+	/* copy the input sample to the circular buffer */
+	circBuffer[wOffset] = *src;
+
+	/* Update the input pointer */
+	src += srcInc;
+
+	/* Circularly update wOffset.  Watch out for positive and negative value */
+	wOffset += bufferInc;
+	if(wOffset >= L)
+	  wOffset -= L;
+
+	/* Decrement the loop counter */
+	i--;
+      }
+
+    /* Update the index pointer */
+    *writeOffset = wOffset;
+  }
+
+
+
+  /**
+   * @brief floating-point Circular Read function.
+   */
+  static __INLINE void arm_circularRead_f32(
+					    int32_t * circBuffer,
+					    int32_t L,
+					    int32_t * readOffset,
+					    int32_t bufferInc,
+					    int32_t * dst,
+					    int32_t * dst_base,
+					    int32_t dst_length,
+					    int32_t dstInc,
+					    uint32_t blockSize)
+  {
+    uint32_t i = 0u;
+    int32_t rOffset, dst_end;
+
+    /* Copy the value of Index pointer that points
+     * to the current location from where the input samples to be read */
+    rOffset = *readOffset;
+    dst_end = (int32_t) (dst_base + dst_length);
+
+    /* Loop over the blockSize */
+    i = blockSize;
+
+    while(i > 0u)
+      {
+	/* copy the sample from the circular buffer to the destination buffer */
+	*dst = circBuffer[rOffset];
+
+	/* Update the input pointer */
+	dst += dstInc;
+
+	if(dst == (int32_t *) dst_end)
+	  {
+	    dst = dst_base;
+	  }
+
+	/* Circularly update rOffset.  Watch out for positive and negative value  */
+	rOffset += bufferInc;
+
+	if(rOffset >= L)
+	  {
+	    rOffset -= L;
+	  }
+
+	/* Decrement the loop counter */
+	i--;
+      }
+
+    /* Update the index pointer */
+    *readOffset = rOffset;
+  }
+
+  /**
+   * @brief Q15 Circular write function.
+   */
+
+  static __INLINE void arm_circularWrite_q15(
+					     q15_t * circBuffer,
+					     int32_t L,
+					     uint16_t * writeOffset,
+					     int32_t bufferInc,
+					     const q15_t * src,
+					     int32_t srcInc,
+					     uint32_t blockSize)
+  {
+    uint32_t i = 0u;
+    int32_t wOffset;
+
+    /* Copy the value of Index pointer that points
+     * to the current location where the input samples to be copied */
+    wOffset = *writeOffset;
+
+    /* Loop over the blockSize */
+    i = blockSize;
+
+    while(i > 0u)
+      {
+	/* copy the input sample to the circular buffer */
+	circBuffer[wOffset] = *src;
+
+	/* Update the input pointer */
+	src += srcInc;
+
+	/* Circularly update wOffset.  Watch out for positive and negative value */
+	wOffset += bufferInc;
+	if(wOffset >= L)
+	  wOffset -= L;
+
+	/* Decrement the loop counter */
+	i--;
+      }
+
+    /* Update the index pointer */
+    *writeOffset = wOffset;
+  }
+
+
+
+  /**
+   * @brief Q15 Circular Read function.
+   */
+  static __INLINE void arm_circularRead_q15(
+					    q15_t * circBuffer,
+					    int32_t L,
+					    int32_t * readOffset,
+					    int32_t bufferInc,
+					    q15_t * dst,
+					    q15_t * dst_base,
+					    int32_t dst_length,
+					    int32_t dstInc,
+					    uint32_t blockSize)
+  {
+    uint32_t i = 0;
+    int32_t rOffset, dst_end;
+
+    /* Copy the value of Index pointer that points
+     * to the current location from where the input samples to be read */
+    rOffset = *readOffset;
+
+    dst_end = (int32_t) (dst_base + dst_length);
+
+    /* Loop over the blockSize */
+    i = blockSize;
+
+    while(i > 0u)
+      {
+	/* copy the sample from the circular buffer to the destination buffer */
+	*dst = circBuffer[rOffset];
+
+	/* Update the input pointer */
+	dst += dstInc;
+
+	if(dst == (q15_t *) dst_end)
+	  {
+	    dst = dst_base;
+	  }
+
+	/* Circularly update wOffset.  Watch out for positive and negative value */
+	rOffset += bufferInc;
+
+	if(rOffset >= L)
+	  {
+	    rOffset -= L;
+	  }
+
+	/* Decrement the loop counter */
+	i--;
+      }
+
+    /* Update the index pointer */
+    *readOffset = rOffset;
+  }
+
+
+  /**
+   * @brief Q7 Circular write function.
+   */
+
+  static __INLINE void arm_circularWrite_q7(
+					    q7_t * circBuffer,
+					    int32_t L,
+					    uint16_t * writeOffset,
+					    int32_t bufferInc,
+					    const q7_t * src,
+					    int32_t srcInc,
+					    uint32_t blockSize)
+  {
+    uint32_t i = 0u;
+    int32_t wOffset;
+
+    /* Copy the value of Index pointer that points
+     * to the current location where the input samples to be copied */
+    wOffset = *writeOffset;
+
+    /* Loop over the blockSize */
+    i = blockSize;
+
+    while(i > 0u)
+      {
+	/* copy the input sample to the circular buffer */
+	circBuffer[wOffset] = *src;
+
+	/* Update the input pointer */
+	src += srcInc;
+
+	/* Circularly update wOffset.  Watch out for positive and negative value */
+	wOffset += bufferInc;
+	if(wOffset >= L)
+	  wOffset -= L;
+
+	/* Decrement the loop counter */
+	i--;
+      }
+
+    /* Update the index pointer */
+    *writeOffset = wOffset;
+  }
+
+
+
+  /**
+   * @brief Q7 Circular Read function.
+   */
+  static __INLINE void arm_circularRead_q7(
+					   q7_t * circBuffer,
+					   int32_t L,
+					   int32_t * readOffset,
+					   int32_t bufferInc,
+					   q7_t * dst,
+					   q7_t * dst_base,
+					   int32_t dst_length,
+					   int32_t dstInc,
+					   uint32_t blockSize)
+  {
+    uint32_t i = 0;
+    int32_t rOffset, dst_end;
+
+    /* Copy the value of Index pointer that points
+     * to the current location from where the input samples to be read */
+    rOffset = *readOffset;
+
+    dst_end = (int32_t) (dst_base + dst_length);
+
+    /* Loop over the blockSize */
+    i = blockSize;
+
+    while(i > 0u)
+      {
+	/* copy the sample from the circular buffer to the destination buffer */
+	*dst = circBuffer[rOffset];
+
+	/* Update the input pointer */
+	dst += dstInc;
+
+	if(dst == (q7_t *) dst_end)
+	  {
+	    dst = dst_base;
+	  }
+
+	/* Circularly update rOffset.  Watch out for positive and negative value */
+	rOffset += bufferInc;
+
+	if(rOffset >= L)
+	  {
+	    rOffset -= L;
+	  }
+
+	/* Decrement the loop counter */
+	i--;
+      }
+
+    /* Update the index pointer */
+    *readOffset = rOffset;
+  }
+
+
+  /**
+   * @brief  Sum of the squares of the elements of a Q31 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_power_q31(
+		      q31_t * pSrc,
+		     uint32_t blockSize,
+		     q63_t * pResult);
+
+  /**
+   * @brief  Sum of the squares of the elements of a floating-point vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_power_f32(
+		      float32_t * pSrc,
+		     uint32_t blockSize,
+		     float32_t * pResult);
+
+  /**
+   * @brief  Sum of the squares of the elements of a Q15 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_power_q15(
+		      q15_t * pSrc,
+		     uint32_t blockSize,
+		     q63_t * pResult);
+
+  /**
+   * @brief  Sum of the squares of the elements of a Q7 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_power_q7(
+		     q7_t * pSrc,
+		    uint32_t blockSize,
+		    q31_t * pResult);
+
+  /**
+   * @brief  Mean value of a Q7 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_mean_q7(
+		    q7_t * pSrc,
+		   uint32_t blockSize,
+		   q7_t * pResult);
+
+  /**
+   * @brief  Mean value of a Q15 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+  void arm_mean_q15(
+		     q15_t * pSrc,
+		    uint32_t blockSize,
+		    q15_t * pResult);
+
+  /**
+   * @brief  Mean value of a Q31 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+  void arm_mean_q31(
+		     q31_t * pSrc,
+		    uint32_t blockSize,
+		    q31_t * pResult);
+
+  /**
+   * @brief  Mean value of a floating-point vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+  void arm_mean_f32(
+		     float32_t * pSrc,
+		    uint32_t blockSize,
+		    float32_t * pResult);
+
+  /**
+   * @brief  Variance of the elements of a floating-point vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_var_f32(
+		    float32_t * pSrc,
+		   uint32_t blockSize,
+		   float32_t * pResult);
+
+  /**
+   * @brief  Variance of the elements of a Q31 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_var_q31(
+		    q31_t * pSrc,
+		   uint32_t blockSize,
+		   q63_t * pResult);
+
+  /**
+   * @brief  Variance of the elements of a Q15 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_var_q15(
+		    q15_t * pSrc,
+		   uint32_t blockSize,
+		   q31_t * pResult);
+
+  /**
+   * @brief  Root Mean Square of the elements of a floating-point vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_rms_f32(
+		    float32_t * pSrc,
+		   uint32_t blockSize,
+		   float32_t * pResult);
+
+  /**
+   * @brief  Root Mean Square of the elements of a Q31 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_rms_q31(
+		    q31_t * pSrc,
+		   uint32_t blockSize,
+		   q31_t * pResult);
+
+  /**
+   * @brief  Root Mean Square of the elements of a Q15 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_rms_q15(
+		    q15_t * pSrc,
+		   uint32_t blockSize,
+		   q15_t * pResult);
+
+  /**
+   * @brief  Standard deviation of the elements of a floating-point vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_std_f32(
+		    float32_t * pSrc,
+		   uint32_t blockSize,
+		   float32_t * pResult);
+
+  /**
+   * @brief  Standard deviation of the elements of a Q31 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_std_q31(
+		    q31_t * pSrc,
+		   uint32_t blockSize,
+		   q31_t * pResult);
+
+  /**
+   * @brief  Standard deviation of the elements of a Q15 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output value.
+   * @return none.
+   */
+
+  void arm_std_q15(
+		    q15_t * pSrc,
+		   uint32_t blockSize,
+		   q15_t * pResult);
+
+  /**
+   * @brief  Floating-point complex magnitude
+   * @param[in]  *pSrc points to the complex input vector
+   * @param[out]  *pDst points to the real output vector
+   * @param[in]  numSamples number of complex samples in the input vector
+   * @return none.
+   */
+
+  void arm_cmplx_mag_f32(
+			  float32_t * pSrc,
+			 float32_t * pDst,
+			 uint32_t numSamples);
+
+  /**
+   * @brief  Q31 complex magnitude
+   * @param[in]  *pSrc points to the complex input vector
+   * @param[out]  *pDst points to the real output vector
+   * @param[in]  numSamples number of complex samples in the input vector
+   * @return none.
+   */
+
+  void arm_cmplx_mag_q31(
+			  q31_t * pSrc,
+			 q31_t * pDst,
+			 uint32_t numSamples);
+
+  /**
+   * @brief  Q15 complex magnitude
+   * @param[in]  *pSrc points to the complex input vector
+   * @param[out]  *pDst points to the real output vector
+   * @param[in]  numSamples number of complex samples in the input vector
+   * @return none.
+   */
+
+  void arm_cmplx_mag_q15(
+			  q15_t * pSrc,
+			 q15_t * pDst,
+			 uint32_t numSamples);
+
+  /**
+   * @brief  Q15 complex dot product
+   * @param[in]  *pSrcA points to the first input vector
+   * @param[in]  *pSrcB points to the second input vector
+   * @param[in]  numSamples number of complex samples in each vector
+   * @param[out]  *realResult real part of the result returned here
+   * @param[out]  *imagResult imaginary part of the result returned here
+   * @return none.
+   */
+
+  void arm_cmplx_dot_prod_q15(
+			       q15_t * pSrcA,
+			       q15_t * pSrcB,
+			      uint32_t numSamples,
+			      q31_t * realResult,
+			      q31_t * imagResult);
+
+  /**
+   * @brief  Q31 complex dot product
+   * @param[in]  *pSrcA points to the first input vector
+   * @param[in]  *pSrcB points to the second input vector
+   * @param[in]  numSamples number of complex samples in each vector
+   * @param[out]  *realResult real part of the result returned here
+   * @param[out]  *imagResult imaginary part of the result returned here
+   * @return none.
+   */
+
+  void arm_cmplx_dot_prod_q31(
+			       q31_t * pSrcA,
+			       q31_t * pSrcB,
+			      uint32_t numSamples,
+			      q63_t * realResult,
+			      q63_t * imagResult);
+
+  /**
+   * @brief  Floating-point complex dot product
+   * @param[in]  *pSrcA points to the first input vector
+   * @param[in]  *pSrcB points to the second input vector
+   * @param[in]  numSamples number of complex samples in each vector
+   * @param[out]  *realResult real part of the result returned here
+   * @param[out]  *imagResult imaginary part of the result returned here
+   * @return none.
+   */
+
+  void arm_cmplx_dot_prod_f32(
+			       float32_t * pSrcA,
+			       float32_t * pSrcB,
+			      uint32_t numSamples,
+			      float32_t * realResult,
+			      float32_t * imagResult);
+
+  /**
+   * @brief  Q15 complex-by-real multiplication
+   * @param[in]  *pSrcCmplx points to the complex input vector
+   * @param[in]  *pSrcReal points to the real input vector
+   * @param[out]  *pCmplxDst points to the complex output vector
+   * @param[in]  numSamples number of samples in each vector
+   * @return none.
+   */
+
+  void arm_cmplx_mult_real_q15(
+			        q15_t * pSrcCmplx,
+			        q15_t * pSrcReal,
+			       q15_t * pCmplxDst,
+			       uint32_t numSamples);
+
+  /**
+   * @brief  Q31 complex-by-real multiplication
+   * @param[in]  *pSrcCmplx points to the complex input vector
+   * @param[in]  *pSrcReal points to the real input vector
+   * @param[out]  *pCmplxDst points to the complex output vector
+   * @param[in]  numSamples number of samples in each vector
+   * @return none.
+   */
+
+  void arm_cmplx_mult_real_q31(
+			        q31_t * pSrcCmplx,
+			        q31_t * pSrcReal,
+			       q31_t * pCmplxDst,
+			       uint32_t numSamples);
+
+  /**
+   * @brief  Floating-point complex-by-real multiplication
+   * @param[in]  *pSrcCmplx points to the complex input vector
+   * @param[in]  *pSrcReal points to the real input vector
+   * @param[out]  *pCmplxDst points to the complex output vector
+   * @param[in]  numSamples number of samples in each vector
+   * @return none.
+   */
+
+  void arm_cmplx_mult_real_f32(
+			        float32_t * pSrcCmplx,
+			        float32_t * pSrcReal,
+			       float32_t * pCmplxDst,
+			       uint32_t numSamples);
+
+  /**
+   * @brief  Minimum value of a Q7 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *result is output pointer
+   * @param[in]  index is the array index of the minimum value in the input buffer.
+   * @return none.
+   */
+
+  void arm_min_q7(
+		   q7_t * pSrc,
+		  uint32_t blockSize,
+		  q7_t * result,
+		  uint32_t * index);
+
+  /**
+   * @brief  Minimum value of a Q15 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output pointer
+   * @param[in]  *pIndex is the array index of the minimum value in the input buffer.
+   * @return none.
+   */
+
+  void arm_min_q15(
+		    q15_t * pSrc,
+		   uint32_t blockSize,
+		   q15_t * pResult,
+		   uint32_t * pIndex);
+
+  /**
+   * @brief  Minimum value of a Q31 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output pointer
+   * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
+   * @return none.
+   */
+  void arm_min_q31(
+		    q31_t * pSrc,
+		   uint32_t blockSize,
+		   q31_t * pResult,
+		   uint32_t * pIndex);
+
+  /**
+   * @brief  Minimum value of a floating-point vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @param[out]  *pResult is output pointer
+   * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
+   * @return none.
+   */
+
+  void arm_min_f32(
+		    float32_t * pSrc,
+		   uint32_t blockSize,
+		   float32_t * pResult,
+		   uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of a Q7 vector.
+ * @param[in]       *pSrc points to the input buffer
+ * @param[in]       blockSize length of the input vector
+ * @param[out]      *pResult maximum value returned here
+ * @param[out]      *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+  void arm_max_q7(
+		   q7_t * pSrc,
+		  uint32_t blockSize,
+		  q7_t * pResult,
+		  uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of a Q15 vector.
+ * @param[in]       *pSrc points to the input buffer
+ * @param[in]       blockSize length of the input vector
+ * @param[out]      *pResult maximum value returned here
+ * @param[out]      *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+  void arm_max_q15(
+		    q15_t * pSrc,
+		   uint32_t blockSize,
+		   q15_t * pResult,
+		   uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of a Q31 vector.
+ * @param[in]       *pSrc points to the input buffer
+ * @param[in]       blockSize length of the input vector
+ * @param[out]      *pResult maximum value returned here
+ * @param[out]      *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+  void arm_max_q31(
+		    q31_t * pSrc,
+		   uint32_t blockSize,
+		   q31_t * pResult,
+		   uint32_t * pIndex);
+
+/**
+ * @brief Maximum value of a floating-point vector.
+ * @param[in]       *pSrc points to the input buffer
+ * @param[in]       blockSize length of the input vector
+ * @param[out]      *pResult maximum value returned here
+ * @param[out]      *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+  void arm_max_f32(
+		    float32_t * pSrc,
+		   uint32_t blockSize,
+		   float32_t * pResult,
+		   uint32_t * pIndex);
+
+  /**
+   * @brief  Q15 complex-by-complex multiplication
+   * @param[in]  *pSrcA points to the first input vector
+   * @param[in]  *pSrcB points to the second input vector
+   * @param[out]  *pDst  points to the output vector
+   * @param[in]  numSamples number of complex samples in each vector
+   * @return none.
+   */
+
+  void arm_cmplx_mult_cmplx_q15(
+			        q15_t * pSrcA,
+			        q15_t * pSrcB,
+			       q15_t * pDst,
+			       uint32_t numSamples);
+
+  /**
+   * @brief  Q31 complex-by-complex multiplication
+   * @param[in]  *pSrcA points to the first input vector
+   * @param[in]  *pSrcB points to the second input vector
+   * @param[out]  *pDst  points to the output vector
+   * @param[in]  numSamples number of complex samples in each vector
+   * @return none.
+   */
+
+  void arm_cmplx_mult_cmplx_q31(
+			        q31_t * pSrcA,
+			        q31_t * pSrcB,
+			       q31_t * pDst,
+			       uint32_t numSamples);
+
+  /**
+   * @brief  Floating-point complex-by-complex multiplication
+   * @param[in]  *pSrcA points to the first input vector
+   * @param[in]  *pSrcB points to the second input vector
+   * @param[out]  *pDst  points to the output vector
+   * @param[in]  numSamples number of complex samples in each vector
+   * @return none.
+   */
+
+  void arm_cmplx_mult_cmplx_f32(
+			        float32_t * pSrcA,
+			        float32_t * pSrcB,
+			       float32_t * pDst,
+			       uint32_t numSamples);
+
+  /**
+   * @brief Converts the elements of the floating-point vector to Q31 vector. 
+   * @param[in]       *pSrc points to the floating-point input vector 
+   * @param[out]      *pDst points to the Q31 output vector
+   * @param[in]       blockSize length of the input vector 
+   * @return none. 
+   */
+  void arm_float_to_q31(
+			       float32_t * pSrc,
+			      q31_t * pDst,
+			      uint32_t blockSize);
+
+  /**
+   * @brief Converts the elements of the floating-point vector to Q15 vector. 
+   * @param[in]       *pSrc points to the floating-point input vector 
+   * @param[out]      *pDst points to the Q15 output vector
+   * @param[in]       blockSize length of the input vector 
+   * @return          none
+   */
+  void arm_float_to_q15(
+			       float32_t * pSrc,
+			      q15_t * pDst,
+			      uint32_t blockSize);
+
+  /**
+   * @brief Converts the elements of the floating-point vector to Q7 vector. 
+   * @param[in]       *pSrc points to the floating-point input vector 
+   * @param[out]      *pDst points to the Q7 output vector
+   * @param[in]       blockSize length of the input vector 
+   * @return          none
+   */
+  void arm_float_to_q7(
+			      float32_t * pSrc,
+			     q7_t * pDst,
+			     uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q31 vector to Q15 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[out]  *pDst is output pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @return none.
+   */
+  void arm_q31_to_q15(
+		       q31_t * pSrc,
+		      q15_t * pDst,
+		      uint32_t blockSize);
+
+  /**
+   * @brief  Converts the elements of the Q31 vector to Q7 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[out]  *pDst is output pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @return none.
+   */
+  void arm_q31_to_q7(
+		      q31_t * pSrc,
+		     q7_t * pDst,
+		     uint32_t blockSize);
+
+  /**
+   * @brief  Converts the elements of the Q15 vector to floating-point vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[out]  *pDst is output pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @return none.
+   */
+  void arm_q15_to_float(
+			 q15_t * pSrc,
+			float32_t * pDst,
+			uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q15 vector to Q31 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[out]  *pDst is output pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @return none.
+   */
+  void arm_q15_to_q31(
+		       q15_t * pSrc,
+		      q31_t * pDst,
+		      uint32_t blockSize);
+
+
+  /**
+   * @brief  Converts the elements of the Q15 vector to Q7 vector.
+   * @param[in]  *pSrc is input pointer
+   * @param[out]  *pDst is output pointer
+   * @param[in]  blockSize is the number of samples to process
+   * @return none.
+   */
+  void arm_q15_to_q7(
+		      q15_t * pSrc,
+		     q7_t * pDst,
+		     uint32_t blockSize);
+
+
+  /**
+   * @ingroup groupInterpolation
+   */
+
+  /**
+   * @defgroup BilinearInterpolate Bilinear Interpolation
+   *
+   * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
+   * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
+   * determines values between the grid points.
+   * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
+   * Bilinear interpolation is often used in image processing to rescale images.
+   * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
+   *
+   * <b>Algorithm</b>
+   * \par
+   * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
+   * For floating-point, the instance structure is defined as:
+   * <pre>
+   *   typedef struct
+   *   {
+   *     uint16_t numRows;
+   *     uint16_t numCols;
+   *     float32_t *pData;
+   * } arm_bilinear_interp_instance_f32;
+   * </pre>
+   *
+   * \par
+   * where <code>numRows</code> specifies the number of rows in the table;
+   * <code>numCols</code> specifies the number of columns in the table;
+   * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
+   * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
+   * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
+   *
+   * \par
+   * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
+   * <pre>
+   *     XF = floor(x)
+   *     YF = floor(y)
+   * </pre>
+   * \par
+   * The interpolated output point is computed as:
+   * <pre>
+   *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
+   *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
+   *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
+   *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
+   * </pre>
+   * Note that the coordinates (x, y) contain integer and fractional components.  
+   * The integer components specify which portion of the table to use while the
+   * fractional components control the interpolation processor.
+   *
+   * \par
+   * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output. 
+   */
+
+  /**
+   * @addtogroup BilinearInterpolate
+   * @{
+   */
+
+  /**
+  *
+  * @brief  Floating-point bilinear interpolation.
+  * @param[in,out] *S points to an instance of the interpolation structure.
+  * @param[in] X interpolation coordinate.
+  * @param[in] Y interpolation coordinate.
+  * @return out interpolated value.
+  */
+
+  
+  static __INLINE float32_t arm_bilinear_interp_f32(
+						    const arm_bilinear_interp_instance_f32 * S,
+						    float32_t X,
+						    float32_t Y)
+  {
+    float32_t out;
+    float32_t f00, f01, f10, f11;
+    float32_t *pData = S->pData;
+    int32_t xIndex, yIndex, index;
+    float32_t xdiff, ydiff;
+    float32_t b1, b2, b3, b4;
+
+    xIndex = (int32_t) X;
+    yIndex = (int32_t) Y;
+
+	/* Care taken for table outside boundary */
+	/* Returns zero output when values are outside table boundary */
+	if(xIndex < 0 || xIndex > (S->numRows-1) || yIndex < 0  || yIndex > ( S->numCols-1))
+	{
+		return(0);
+	}
+	
+    /* Calculation of index for two nearest points in X-direction */
+    index = (xIndex - 1) + (yIndex - 1) * S->numRows;
+
+
+    /* Read two nearest points in X-direction */
+    f00 = pData[index];
+    f01 = pData[index + 1];
+
+    /* Calculation of index for two nearest points in Y-direction */
+    index = (xIndex - 1) + (yIndex) * S->numRows;
+
+
+    /* Read two nearest points in Y-direction */
+    f10 = pData[index];
+    f11 = pData[index + 1];
+
+    /* Calculation of intermediate values */
+    b1 = f00;
+    b2 = f01 - f00;
+    b3 = f10 - f00;
+    b4 = f00 - f01 - f10 + f11;
+
+    /* Calculation of fractional part in X */
+    xdiff = X - xIndex;
+
+    /* Calculation of fractional part in Y */
+    ydiff = Y - yIndex;
+
+    /* Calculation of bi-linear interpolated output */
+    out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
+
+    /* return to application */
+    return (out);
+
+  }
+
+  /**
+  *
+  * @brief  Q31 bilinear interpolation.
+  * @param[in,out] *S points to an instance of the interpolation structure.
+  * @param[in] X interpolation coordinate in 12.20 format.
+  * @param[in] Y interpolation coordinate in 12.20 format.
+  * @return out interpolated value.
+  */
+
+  static __INLINE q31_t arm_bilinear_interp_q31(
+						arm_bilinear_interp_instance_q31 * S,
+						q31_t X,
+						q31_t Y)
+  {
+    q31_t out;                                   /* Temporary output */
+    q31_t acc = 0;                               /* output */
+    q31_t xfract, yfract;                        /* X, Y fractional parts */
+    q31_t x1, x2, y1, y2;                        /* Nearest output values */
+    int32_t rI, cI;                             /* Row and column indices */
+    q31_t *pYData = S->pData;                    /* pointer to output table values */
+    uint32_t nRows = S->numRows;                 /* num of rows */
+
+
+    /* Input is in 12.20 format */
+    /* 12 bits for the table index */
+    /* Index value calculation */
+    rI = ((X & 0xFFF00000) >> 20u);
+
+    /* Input is in 12.20 format */
+    /* 12 bits for the table index */
+    /* Index value calculation */
+    cI = ((Y & 0xFFF00000) >> 20u);
+
+	/* Care taken for table outside boundary */
+	/* Returns zero output when values are outside table boundary */
+	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
+	{
+		return(0);
+	}
+
+    /* 20 bits for the fractional part */
+    /* shift left xfract by 11 to keep 1.31 format */
+    xfract = (X & 0x000FFFFF) << 11u;
+
+    /* Read two nearest output values from the index */
+    x1 = pYData[(rI) + nRows * (cI)];
+    x2 = pYData[(rI) + nRows * (cI) + 1u];
+
+    /* 20 bits for the fractional part */
+    /* shift left yfract by 11 to keep 1.31 format */
+    yfract = (Y & 0x000FFFFF) << 11u;
+
+    /* Read two nearest output values from the index */
+    y1 = pYData[(rI) + nRows * (cI + 1)];
+    y2 = pYData[(rI) + nRows * (cI + 1) + 1u];
+
+    /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
+    out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32));
+    acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
+
+    /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
+    out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
+    acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
+
+    /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
+    out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
+    acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
+
+    /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
+    out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
+    acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
+
+    /* Convert acc to 1.31(q31) format */
+    return (acc << 2u);
+
+  }
+
+  /**
+  * @brief  Q15 bilinear interpolation.
+  * @param[in,out] *S points to an instance of the interpolation structure.
+  * @param[in] X interpolation coordinate in 12.20 format.
+  * @param[in] Y interpolation coordinate in 12.20 format.
+  * @return out interpolated value.
+  */
+
+  static __INLINE q15_t arm_bilinear_interp_q15(
+						arm_bilinear_interp_instance_q15 * S,
+						q31_t X,
+						q31_t Y)
+  {
+    q63_t acc = 0;                               /* output */
+    q31_t out;                                   /* Temporary output */
+    q15_t x1, x2, y1, y2;                        /* Nearest output values */
+    q31_t xfract, yfract;                        /* X, Y fractional parts */
+    int32_t rI, cI;                             /* Row and column indices */
+    q15_t *pYData = S->pData;                    /* pointer to output table values */
+    uint32_t nRows = S->numRows;                 /* num of rows */
+
+    /* Input is in 12.20 format */
+    /* 12 bits for the table index */
+    /* Index value calculation */
+    rI = ((X & 0xFFF00000) >> 20);
+
+    /* Input is in 12.20 format */
+    /* 12 bits for the table index */
+    /* Index value calculation */
+    cI = ((Y & 0xFFF00000) >> 20);
+
+	/* Care taken for table outside boundary */
+	/* Returns zero output when values are outside table boundary */
+	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
+	{
+		return(0);
+	}
+
+    /* 20 bits for the fractional part */
+    /* xfract should be in 12.20 format */
+    xfract = (X & 0x000FFFFF);
+
+    /* Read two nearest output values from the index */
+    x1 = pYData[(rI) + nRows * (cI)];
+    x2 = pYData[(rI) + nRows * (cI) + 1u];
+
+
+    /* 20 bits for the fractional part */
+    /* yfract should be in 12.20 format */
+    yfract = (Y & 0x000FFFFF);
+
+    /* Read two nearest output values from the index */
+    y1 = pYData[(rI) + nRows * (cI + 1)];
+    y2 = pYData[(rI) + nRows * (cI + 1) + 1u];
+
+    /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
+
+    /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
+    /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
+    out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
+    acc = ((q63_t) out * (0xFFFFF - yfract));
+
+    /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
+    out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
+    acc += ((q63_t) out * (xfract));
+
+    /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
+    out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
+    acc += ((q63_t) out * (yfract));
+
+    /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
+    out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
+    acc += ((q63_t) out * (yfract));
+
+    /* acc is in 13.51 format and down shift acc by 36 times */
+    /* Convert out to 1.15 format */
+    return (acc >> 36);
+
+  }
+
+  /**
+  * @brief  Q7 bilinear interpolation.
+  * @param[in,out] *S points to an instance of the interpolation structure.
+  * @param[in] X interpolation coordinate in 12.20 format.
+  * @param[in] Y interpolation coordinate in 12.20 format.
+  * @return out interpolated value.
+  */
+
+  static __INLINE q7_t arm_bilinear_interp_q7(
+					      arm_bilinear_interp_instance_q7 * S,
+					      q31_t X,
+					      q31_t Y)
+  {
+    q63_t acc = 0;                               /* output */
+    q31_t out;                                   /* Temporary output */
+    q31_t xfract, yfract;                        /* X, Y fractional parts */
+    q7_t x1, x2, y1, y2;                         /* Nearest output values */
+    int32_t rI, cI;                             /* Row and column indices */
+    q7_t *pYData = S->pData;                     /* pointer to output table values */
+    uint32_t nRows = S->numRows;                 /* num of rows */
+
+    /* Input is in 12.20 format */
+    /* 12 bits for the table index */
+    /* Index value calculation */
+    rI = ((X & 0xFFF00000) >> 20);
+
+    /* Input is in 12.20 format */
+    /* 12 bits for the table index */
+    /* Index value calculation */
+    cI = ((Y & 0xFFF00000) >> 20);
+
+	/* Care taken for table outside boundary */
+	/* Returns zero output when values are outside table boundary */
+	if(rI < 0 || rI > (S->numRows-1) || cI < 0  || cI > ( S->numCols-1))
+	{
+		return(0);
+	}
+
+    /* 20 bits for the fractional part */
+    /* xfract should be in 12.20 format */
+    xfract = (X & 0x000FFFFF);
+
+    /* Read two nearest output values from the index */
+    x1 = pYData[(rI) + nRows * (cI)];
+    x2 = pYData[(rI) + nRows * (cI) + 1u];
+
+
+    /* 20 bits for the fractional part */
+    /* yfract should be in 12.20 format */
+    yfract = (Y & 0x000FFFFF);
+
+    /* Read two nearest output values from the index */
+    y1 = pYData[(rI) + nRows * (cI + 1)];
+    y2 = pYData[(rI) + nRows * (cI + 1) + 1u];
+
+    /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
+    out = ((x1 * (0xFFFFF - xfract)));
+    acc = (((q63_t) out * (0xFFFFF - yfract)));
+
+    /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
+    out = ((x2 * (0xFFFFF - yfract)));
+    acc += (((q63_t) out * (xfract)));
+
+    /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
+    out = ((y1 * (0xFFFFF - xfract)));
+    acc += (((q63_t) out * (yfract)));
+
+    /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
+    out = ((y2 * (yfract)));
+    acc += (((q63_t) out * (xfract)));
+
+    /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
+    return (acc >> 40);
+
+  }
+
+  /**
+   * @} end of BilinearInterpolate group
+   */
+
+
+
+
+
+
+#ifdef	__cplusplus
+}
+#endif
+
+
+#endif /* _ARM_MATH_H */
+
+
+/**
+ *
+ * End of file.
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Common/Include/math_helper.h	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,53 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library 
+*
+* Title:	    math_helper.h
+* 
+*
+* Description:	Prototypes of all helper functions required.  
+*
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done 
+* -------------------------------------------------------------------- */
+
+
+#include "arm_math.h"
+
+#ifndef MATH_HELPER_H
+#define MATH_HELPER_H
+
+float arm_snr_f32(float *pRef, float *pTest,  uint32_t buffSize);  
+void arm_float_to_q12_20(float *pIn, q31_t * pOut, uint32_t numSamples);
+void arm_provide_guard_bits_q15(q15_t *input_buf, uint32_t blockSize, uint32_t guard_bits);
+void arm_provide_guard_bits_q31(q31_t *input_buf, uint32_t blockSize, uint32_t guard_bits);
+void arm_float_to_q14(float *pIn, q15_t *pOut, uint32_t numSamples);
+void arm_float_to_q29(float *pIn, q31_t *pOut, uint32_t numSamples);
+void arm_float_to_q28(float *pIn, q31_t *pOut, uint32_t numSamples);
+void arm_float_to_q30(float *pIn, q31_t *pOut, uint32_t numSamples);
+void arm_clip_f32(float *pIn, uint32_t numSamples);
+uint32_t arm_calc_guard_bits(uint32_t num_adds);
+void arm_apply_guard_bits (float32_t * pIn, uint32_t numSamples, uint32_t guard_bits);
+uint32_t arm_compare_fixed_q15(q15_t *pIn, q15_t * pOut, uint32_t numSamples);
+uint32_t arm_compare_fixed_q31(q31_t *pIn, q31_t *pOut, uint32_t numSamples);
+uint32_t arm_calc_2pow(uint32_t guard_bits);
+#endif
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Common/Source/math_helper.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,419 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library 
+*
+* Title:	    math_helper.c
+*
+* Description:	Definition of all helper functions required.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done 
+* -------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+*		Include standard header files  
+* -------------------------------------------------------------------- */
+#include<math.h>
+
+/* ----------------------------------------------------------------------
+*		Include project header files  
+* -------------------------------------------------------------------- */
+#include "math_helper.h"
+
+/** 
+ * @brief  Caluclation of SNR
+ * @param  float* 	Pointer to the reference buffer
+ * @param  float*	Pointer to the test buffer
+ * @param  uint32_t	total number of samples
+ * @return float	SNR
+ * The function Caluclates signal to noise ratio for the reference output 
+ * and test output 
+ */
+
+float arm_snr_f32(float *pRef, float *pTest, uint32_t buffSize)
+{
+  float EnergySignal = 0.0, EnergyError = 0.0;
+  uint32_t i;
+  float SNR;
+
+  for (i = 0; i < buffSize; i++)
+    {
+      EnergySignal += pRef[i] * pRef[i];
+      EnergyError += (pRef[i] - pTest[i]) * (pRef[i] - pTest[i]); 
+    }
+	
+
+  SNR = 10 * log10 (EnergySignal / EnergyError);
+
+  return (SNR);
+
+}
+
+
+/** 
+ * @brief  Provide guard bits for Input buffer
+ * @param  q15_t* 	    Pointer to input buffer
+ * @param  uint32_t 	blockSize
+ * @param  uint32_t 	guard_bits
+ * @return none
+ * The function Provides the guard bits for the buffer 
+ * to avoid overflow 
+ */
+
+void arm_provide_guard_bits_q15 (q15_t * input_buf, uint32_t blockSize,
+                            uint32_t guard_bits)
+{
+  uint32_t i;
+
+  for (i = 0; i < blockSize; i++)
+    {
+      input_buf[i] = input_buf[i] >> guard_bits;
+    }
+}
+
+/** 
+ * @brief  Converts float to fixed in q12.20 format
+ * @param  uint32_t 	number of samples in the buffer
+ * @return none
+ * The function converts floating point values to fixed point(q12.20) values 
+ */
+
+void arm_float_to_q12_20(float *pIn, q31_t * pOut, uint32_t numSamples)
+{
+  uint32_t i;
+
+  for (i = 0; i < numSamples; i++)
+    {
+	  /* 1048576.0f corresponds to pow(2, 20) */
+      pOut[i] = (q31_t) (pIn[i] * 1048576.0f);
+
+      pOut[i] += pIn[i] > 0 ? 0.5 : -0.5;
+
+      if (pIn[i] == (float) 1.0)
+        {
+          pOut[i] = 0x000FFFFF;
+        }
+    }
+}
+
+/** 
+ * @brief  Compare MATLAB Reference Output and ARM Test output
+ * @param  q15_t* 	Pointer to Ref buffer
+ * @param  q15_t* 	Pointer to Test buffer
+ * @param  uint32_t 	number of samples in the buffer
+ * @return none 
+ */
+
+uint32_t arm_compare_fixed_q15(q15_t *pIn, q15_t * pOut, uint32_t numSamples)
+{
+  uint32_t i; 
+  int32_t diff, diffCrnt = 0;
+  uint32_t maxDiff = 0;
+
+  for (i = 0; i < numSamples; i++)
+  {
+  	diff = pIn[i] - pOut[i];
+  	diffCrnt = (diff > 0) ? diff : -diff;
+
+	if(diffCrnt > maxDiff)
+	{
+		maxDiff = diffCrnt;
+	}	
+  }
+
+  return(maxDiff);
+}
+
+/** 
+ * @brief  Compare MATLAB Reference Output and ARM Test output
+ * @param  q31_t* 	Pointer to Ref buffer
+ * @param  q31_t* 	Pointer to Test buffer
+ * @param  uint32_t 	number of samples in the buffer
+ * @return none 
+ */
+
+uint32_t arm_compare_fixed_q31(q31_t *pIn, q31_t * pOut, uint32_t numSamples)
+{
+  uint32_t i; 
+  int32_t diff, diffCrnt = 0;
+  uint32_t maxDiff = 0;
+
+  for (i = 0; i < numSamples; i++)
+  {
+  	diff = pIn[i] - pOut[i];
+  	diffCrnt = (diff > 0) ? diff : -diff;
+
+	if(diffCrnt > maxDiff)
+	{
+		maxDiff = diffCrnt;
+	}
+  }
+
+  return(maxDiff);
+}
+
+/** 
+ * @brief  Provide guard bits for Input buffer
+ * @param  q31_t* 	Pointer to input buffer
+ * @param  uint32_t 	blockSize
+ * @param  uint32_t 	guard_bits
+ * @return none
+ * The function Provides the guard bits for the buffer 
+ * to avoid overflow 
+ */
+
+void arm_provide_guard_bits_q31 (q31_t * input_buf, 
+								 uint32_t blockSize,
+                                 uint32_t guard_bits)
+{
+  uint32_t i;
+
+  for (i = 0; i < blockSize; i++)
+    {
+      input_buf[i] = input_buf[i] >> guard_bits;
+    }
+}
+
+/** 
+ * @brief  Provide guard bits for Input buffer
+ * @param  q31_t* 	Pointer to input buffer
+ * @param  uint32_t 	blockSize
+ * @param  uint32_t 	guard_bits
+ * @return none
+ * The function Provides the guard bits for the buffer 
+ * to avoid overflow 
+ */
+
+void arm_provide_guard_bits_q7 (q7_t * input_buf, 
+								uint32_t blockSize,
+                                uint32_t guard_bits)
+{
+  uint32_t i;
+
+  for (i = 0; i < blockSize; i++)
+    {
+      input_buf[i] = input_buf[i] >> guard_bits;
+    }
+}
+
+
+
+/** 
+ * @brief  Caluclates number of guard bits 
+ * @param  uint32_t 	number of additions
+ * @return none
+ * The function Caluclates the number of guard bits  
+ * depending on the numtaps 
+ */
+
+uint32_t arm_calc_guard_bits (uint32_t num_adds)
+{
+  uint32_t i = 1, j = 0;
+
+  if (num_adds == 1)
+    {
+      return (0);
+    }
+
+  while (i < num_adds)
+    {
+      i = i * 2;
+      j++;
+    }
+
+  return (j);
+}
+
+/** 
+ * @brief  Converts Q15 to floating-point
+ * @param  uint32_t 	number of samples in the buffer
+ * @return none
+ */
+
+void arm_apply_guard_bits (float32_t * pIn, 
+						   uint32_t numSamples, 
+						   uint32_t guard_bits)
+{
+  uint32_t i;
+
+  for (i = 0; i < numSamples; i++)
+    {
+      pIn[i] = pIn[i] * arm_calc_2pow(guard_bits);
+    }
+}
+
+/** 
+ * @brief  Calculates pow(2, numShifts)
+ * @param  uint32_t 	number of shifts
+ * @return pow(2, numShifts)
+ */
+uint32_t arm_calc_2pow(uint32_t numShifts)
+{
+
+  uint32_t i, val = 1;
+
+  for (i = 0; i < numShifts; i++)
+    {
+      val = val * 2;
+    }	
+
+  return(val);
+}
+
+
+
+/** 
+ * @brief  Converts float to fixed q14 
+ * @param  uint32_t 	number of samples in the buffer
+ * @return none
+ * The function converts floating point values to fixed point values 
+ */
+
+void arm_float_to_q14 (float *pIn, q15_t * pOut, 
+                       uint32_t numSamples)
+{
+  uint32_t i;
+
+  for (i = 0; i < numSamples; i++)
+    {
+	  /* 16384.0f corresponds to pow(2, 14) */
+      pOut[i] = (q15_t) (pIn[i] * 16384.0f);
+
+      pOut[i] += pIn[i] > 0 ? 0.5 : -0.5;
+
+      if (pIn[i] == (float) 2.0)
+        {
+          pOut[i] = 0x7FFF;
+        }
+
+    }
+
+}
+
+ 
+/** 
+ * @brief  Converts float to fixed q30 format
+ * @param  uint32_t 	number of samples in the buffer
+ * @return none
+ * The function converts floating point values to fixed point values 
+ */
+
+void arm_float_to_q30 (float *pIn, q31_t * pOut, 
+					   uint32_t numSamples)
+{
+  uint32_t i;
+
+  for (i = 0; i < numSamples; i++)
+    {
+	  /* 1073741824.0f corresponds to pow(2, 30) */
+      pOut[i] = (q31_t) (pIn[i] * 1073741824.0f);
+
+      pOut[i] += pIn[i] > 0 ? 0.5 : -0.5;
+
+      if (pIn[i] == (float) 2.0)
+        {
+          pOut[i] = 0x7FFFFFFF;
+        }
+    }
+}
+
+/** 
+ * @brief  Converts float to fixed q30 format
+ * @param  uint32_t 	number of samples in the buffer
+ * @return none
+ * The function converts floating point values to fixed point values 
+ */
+
+void arm_float_to_q29 (float *pIn, q31_t * pOut, 
+					   uint32_t numSamples)
+{
+  uint32_t i;
+
+  for (i = 0; i < numSamples; i++)
+    {
+	  /* 1073741824.0f corresponds to pow(2, 30) */
+      pOut[i] = (q31_t) (pIn[i] * 536870912.0f);
+
+      pOut[i] += pIn[i] > 0 ? 0.5 : -0.5;
+
+      if (pIn[i] == (float) 4.0)
+        {
+          pOut[i] = 0x7FFFFFFF;
+        }
+    }
+}
+
+
+/** 
+ * @brief  Converts float to fixed q28 format
+ * @param  uint32_t 	number of samples in the buffer
+ * @return none
+ * The function converts floating point values to fixed point values 
+ */
+
+void arm_float_to_q28 (float *pIn, q31_t * pOut, 
+                       uint32_t numSamples)
+{
+  uint32_t i;
+
+  for (i = 0; i < numSamples; i++)
+    {
+	/* 268435456.0f corresponds to pow(2, 28) */
+      pOut[i] = (q31_t) (pIn[i] * 268435456.0f);
+
+      pOut[i] += pIn[i] > 0 ? 0.5 : -0.5;
+
+      if (pIn[i] == (float) 8.0)
+        {
+          pOut[i] = 0x7FFFFFFF;
+        }
+    }
+}
+
+/** 
+ * @brief  Clip the float values to +/- 1 
+ * @param  pIn 	input buffer
+ * @param  numSamples 	number of samples in the buffer
+ * @return none
+ * The function converts floating point values to fixed point values 
+ */
+
+void arm_clip_f32 (float *pIn, uint32_t numSamples)
+{
+  uint32_t i;
+
+  for (i = 0; i < numSamples; i++)
+    {
+      if(pIn[i] > 1.0f)
+	  {
+	    pIn[i] = 1.0;
+	  }
+	  else if( pIn[i] < -1.0f)
+	  {
+	    pIn[i] = -1.0;
+	  }
+	       
+    }
+}
+
+
+
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_abs_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,105 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_abs_f32.c  
+*  
+* Description:	Vector absolute value.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+#include <math.h> 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @defgroup BasicAbs Vector Absolute Value  
+ *  
+ * Computes the absolute value of a vector on an element-by-element basis.  
+ *  
+ * <pre>  
+ *     pDst[n] = abs(pSrcA[n]),   0 <= n < blockSize.  
+ * </pre>  
+ *  
+ * The operation can be done in-place by setting the input and output pointers to the same buffer.  
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup BasicAbs  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Floating-point vector absolute value.  
+ * @param[in]       *pSrc points to the input buffer  
+ * @param[out]      *pDst points to the output buffer  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ */ 
+ 
+void arm_abs_f32( 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = |A| */ 
+    /* Calculate absolute and then store the results in the destination buffer. */ 
+    *pDst++ = fabsf(*pSrc++); 
+    *pDst++ = fabsf(*pSrc++); 
+    *pDst++ = fabsf(*pSrc++); 
+    *pDst++ = fabsf(*pSrc++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = |A| */ 
+    /* Calculate absolute and then store the results in the destination buffer. */ 
+    *pDst++ = fabsf(*pSrc++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicAbs group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_abs_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,110 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_abs_q15.c  
+*  
+* Description:	Q15 vector absolute value.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicAbs  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q15 vector absolute value.  
+ * @param[in]       *pSrc points to the input buffer  
+ * @param[out]      *pDst points to the output buffer  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.  
+ */ 
+ 
+void arm_abs_q15( 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q15_t in1;                                     /* Input value1 */ 
+  q15_t in2;                                     /* Input value2 */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = |A| */ 
+    /* Read two inputs */ 
+    in1 = *pSrc++; 
+    in2 = *pSrc++; 
+    /* Store the Absolute result in the destination buffer by packing the two values, in a single cycle */ 
+    *__SIMD32(pDst)++ = 
+      __PKHBT(((in1 > 0) ? in1 : __SSAT(-in1, 16)), 
+              ((in2 > 0) ? in2 : __SSAT(-in2, 16)), 16); 
+ 
+    in1 = *pSrc++; 
+    in2 = *pSrc++; 
+    *__SIMD32(pDst)++ = 
+      __PKHBT(((in1 > 0) ? in1 : __SSAT(-in1, 16)), 
+              ((in2 > 0) ? in2 : __SSAT(-in2, 16)), 16); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = |A| */ 
+    /* Read the input */ 
+    in1 = *pSrc++; 
+ 
+    /* Calculate absolute value of input and then store the result in the destination buffer. */ 
+    *pDst++ = (in1 > 0) ? in1 : __SSAT(-in1, 16); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicAbs group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_abs_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,103 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_abs_q31.c  
+*  
+* Description:	Q31 vector absolute value.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicAbs  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Q31 vector absolute value.  
+ * @param[in]       *pSrc points to the input buffer  
+ * @param[out]      *pDst points to the output buffer  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.  
+ */ 
+ 
+void arm_abs_q31( 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q31_t in;                                      /* Input value */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = |A| */ 
+    /* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */ 
+    in = *pSrc++; 
+    *pDst++ = (in > 0) ? in : ((in == 0x80000000) ? 0x7fffffff : -in); 
+    in = *pSrc++; 
+    *pDst++ = (in > 0) ? in : ((in == 0x80000000) ? 0x7fffffff : -in); 
+    in = *pSrc++; 
+    *pDst++ = (in > 0) ? in : ((in == 0x80000000) ? 0x7fffffff : -in); 
+    in = *pSrc++; 
+    *pDst++ = (in > 0) ? in : ((in == 0x80000000) ? 0x7fffffff : -in); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = |A| */ 
+    /* Calculate absolute value of the input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */ 
+    in = *pSrc++; 
+    *pDst++ = (in > 0) ? in : ((in == 0x80000000) ? 0x7fffffff : -in); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicAbs group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_abs_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,111 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_abs_q7.c  
+*  
+* Description:	Q7 vector absolute value.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicAbs  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q7 vector absolute value.  
+ * @param[in]       *pSrc points to the input buffer  
+ * @param[out]      *pDst points to the output buffer  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.  
+ */ 
+ 
+void arm_abs_q7( 
+  q7_t * pSrc, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q7_t in1;                                      /* Input value1 */ 
+  q7_t in2;                                      /* Input value2 */ 
+  q7_t in3;                                      /* Input value3 */ 
+  q7_t in4;                                      /* Input value4 */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = |A| */ 
+    /* Read 4 inputs */ 
+    in1 = *pSrc++; 
+    in2 = *pSrc++; 
+    in3 = *pSrc++; 
+    in4 = *pSrc++; 
+ 
+    /* Store the Absolute result in the destination buffer by packing the 4 values in single cycle */ 
+    *__SIMD32(pDst)++ = 
+      __PACKq7(((in1 > 0) ? in1 : __SSAT(-in1, 8)), 
+               ((in2 > 0) ? in2 : __SSAT(-in2, 8)), 
+               ((in3 > 0) ? in3 : __SSAT(-in3, 8)), 
+               ((in4 > 0) ? in4 : __SSAT(-in4, 8))); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = |A| */ 
+    /* Read the input */ 
+    in1 = *pSrc++; 
+ 
+    /* Store the Absolute result in the destination buffer */ 
+    *pDst++ = (in1 > 0) ? in1 : __SSAT(-in1, 8); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicAbs group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_add_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,105 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_add_f32.c  
+*  
+* Description:	Floating-point vector addition.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @defgroup BasicAdd Vector Addition  
+ *  
+ * Element-by-element addition of two vectors.  
+ *  
+ * <pre>  
+ *     pDst[n] = pSrcA[n] + pSrcB[n],   0 <= n < blockSize.  
+ * </pre>  
+ *  
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup BasicAdd  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Floating-point vector addition.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ */ 
+ 
+void arm_add_f32( 
+  float32_t * pSrcA, 
+  float32_t * pSrcB, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + B */ 
+    /* Add and then store the results in the destination buffer. */ 
+    *pDst++ = (*pSrcA++) + (*pSrcB++); 
+    *pDst++ = (*pSrcA++) + (*pSrcB++); 
+    *pDst++ = (*pSrcA++) + (*pSrcB++); 
+    *pDst++ = (*pSrcA++) + (*pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + B */ 
+    /* Add and then store the results in the destination buffer. */ 
+    *pDst++ = (*pSrcA++) + (*pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicAdd group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_add_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,98 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_add_q15.c  
+*  
+* Description:	Q15 vector addition  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicAdd  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q15 vector addition.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.  
+ */ 
+ 
+void arm_add_q15( 
+  q15_t * pSrcA, 
+  q15_t * pSrcB, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + B */ 
+    /* Add and then store the results in the destination buffer. */ 
+    *__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++); 
+    *__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + B */ 
+    /* Add and then store the results in the destination buffer. */ 
+    *pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of BasicAdd group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_add_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,100 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_add_q31.c  
+*  
+* Description:	Q31 vector addition.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicAdd  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Q31 vector addition.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.  
+ */ 
+ 
+void arm_add_q31( 
+  q31_t * pSrcA, 
+  q31_t * pSrcB, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + B */ 
+    /* Add and then store the results in the destination buffer. */ 
+    *pDst++ = __QADD(*pSrcA++, *pSrcB++); 
+    *pDst++ = __QADD(*pSrcA++, *pSrcB++); 
+    *pDst++ = __QADD(*pSrcA++, *pSrcB++); 
+    *pDst++ = __QADD(*pSrcA++, *pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + B */ 
+    /* Add and then store the results in the destination buffer. */ 
+    *pDst++ = __QADD(*pSrcA++, *pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicAdd group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_add_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,97 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_add_q7.c  
+*  
+* Description:	Q7 vector addition.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicAdd  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q7 vector addition.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.  
+ */ 
+ 
+void arm_add_q7( 
+  q7_t * pSrcA, 
+  q7_t * pSrcB, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + B */ 
+    /* Add and then store the results in the destination buffer. */ 
+    *__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + B */ 
+    /* Add and then store the results in the destination buffer. */ 
+    *pDst++ = (q7_t) __SSAT(*pSrcA++ + *pSrcB++, 8); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of BasicAdd group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_dot_prod_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,106 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_dot_prod_f32.c  
+*  
+* Description:	Floating-point dot product.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @defgroup dot_prod Vector Dot Product  
+ *  
+ * Computes the dot product of two vectors.  
+ * The vectors are multiplied element-by-element and then summed.  
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup dot_prod  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Dot product of floating-point vectors.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @param[out]      *result output result returned here  
+ * @return none.  
+ */ 
+ 
+ 
+void arm_dot_prod_f32( 
+  float32_t * pSrcA, 
+  float32_t * pSrcB, 
+  uint32_t blockSize, 
+  float32_t * result) 
+{ 
+  float32_t sum = 0.0f;                          /* Temporary result storage */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 
+    /* Calculate dot product and then store the result in a temporary buffer */ 
+    sum += (*pSrcA++) * (*pSrcB++); 
+    sum += (*pSrcA++) * (*pSrcB++); 
+    sum += (*pSrcA++) * (*pSrcB++); 
+    sum += (*pSrcA++) * (*pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 
+    /* Calculate dot product and then store the result in a temporary buffer. */ 
+    sum += (*pSrcA++) * (*pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the result back in the destination buffer */ 
+  *result = sum; 
+} 
+ 
+/**  
+ * @} end of dot_prod group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_dot_prod_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,104 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_dot_prod_q15.c  
+*  
+* Description:	Q15 dot product.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup dot_prod  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Dot product of Q15 vectors.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @param[out]      *result output result returned here  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these  
+ * results are added to a 64-bit accumulator in 34.30 format.  
+ * Nonsaturating additions are used and given that there are 33 guard bits in the accumulator  
+ * there is no risk of overflow.  
+ * The return result is in 34.30 format.  
+ */ 
+ 
+void arm_dot_prod_q15( 
+  q15_t * pSrcA, 
+  q15_t * pSrcB, 
+  uint32_t blockSize, 
+  q63_t * result) 
+{ 
+  q63_t sum = 0;                                 /* Temporary result storage */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 
+    /* Calculate dot product and then store the result in a temporary buffer. */ 
+    sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum); 
+    sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 
+    /* Calculate dot product and then store the results in a temporary buffer. */ 
+    sum = __SMLALD(*pSrcA++, *pSrcB++, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the result in the destination buffer in 34.30 format */ 
+  *result = sum; 
+} 
+ 
+/**  
+ * @} end of dot_prod group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_dot_prod_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,107 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_dot_prod_q31.c  
+*  
+* Description:	Q31 dot product.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup dot_prod  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Dot product of Q31 vectors.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @param[out]      *result output result returned here  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these  
+ * are truncated to 2.48 format by discarding the lower 14 bits.  
+ * The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.  
+ * There are 15 guard bits in the accumulator and there is no risk of overflow as long as  
+ * the length of the vectors is less than 2^16 elements.  
+ * The return result is in 16.48 format.  
+ */ 
+ 
+void arm_dot_prod_q31( 
+  q31_t * pSrcA, 
+  q31_t * pSrcB, 
+  uint32_t blockSize, 
+  q63_t * result) 
+{ 
+  q63_t sum = 0;                                 /* Temporary result storage */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 
+    /* Calculate dot product and then store the result in a temporary buffer. */ 
+    sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u; 
+    sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u; 
+    sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u; 
+    sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 
+    /* Calculate dot product and then store the result in a temporary buffer. */ 
+    sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the result in the destination buffer in 16.48 format */ 
+  *result = sum; 
+} 
+ 
+/**  
+ * @} end of dot_prod group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_dot_prod_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,132 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_dot_prod_q7.c  
+*  
+* Description:	Q7 dot product.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup dot_prod  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Dot product of Q7 vectors.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @param[out]      *result output result returned here  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these  
+ * results are added to an accumulator in 18.14 format.  
+ * Nonsaturating additions are used and there is no danger of wrap around as long as  
+ * the vectors are less than 2^18 elements long.  
+ * The return result is in 18.14 format.  
+ */ 
+ 
+void arm_dot_prod_q7( 
+  q7_t * pSrcA, 
+  q7_t * pSrcB, 
+  uint32_t blockSize, 
+  q31_t * result) 
+{ 
+  q31_t input1, input2;                          /* Temporary variables to store input */ 
+  q15_t in1, in2;                                /* Temporary variables to store input */ 
+  q31_t sum = 0;                                 /* Temporary variables to store output */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Reading two inputs of SrcA buffer and packing */ 
+    in1 = (q15_t) * pSrcA++; 
+    in2 = (q15_t) * pSrcA++; 
+    input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+    /* Reading two inputs of SrcB buffer and packing */ 
+    in1 = (q15_t) * pSrcB++; 
+    in2 = (q15_t) * pSrcB++; 
+    input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 
+    /* Perform Dot product of 2 packed inputs using SMLALD and store the result in a temporary variable. */ 
+    sum = __SMLAD(input1, input2, sum); 
+ 
+    /* Reading two inputs of SrcA buffer and packing */ 
+    in1 = (q15_t) * pSrcA++; 
+    in2 = (q15_t) * pSrcA++; 
+    input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+    /* Reading two inputs of SrcB buffer and packing */ 
+    in1 = (q15_t) * pSrcB++; 
+    in2 = (q15_t) * pSrcB++; 
+    input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 
+    /* Perform Dot product of 2 packed inputs using SMLALD and store the result in a temporary variable. */ 
+    sum = __SMLAD(input1, input2, sum); 
+ 
+ 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */ 
+    /* Dot product and then store the results in a temporary buffer. */ 
+    sum = __SMLAD(*pSrcA++, *pSrcB++, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the result in the destination buffer in 18.14 format */ 
+  *result = sum; 
+} 
+ 
+/**  
+ * @} end of dot_prod group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_mult_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,108 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mult_f32.c  
+*  
+* Description:	Floating-point vector multiplication.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @defgroup BasicMult Vector Multiplication  
+ *  
+ * Element-by-element multiplication of two vectors.  
+ *  
+ * <pre>  
+ *     pDst[n] = pSrcA[n] * pSrcB[n],   0 <= n < blockSize.  
+ * </pre>  
+ *  
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup BasicMult  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Floating-point vector multiplication.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ */ 
+ 
+void arm_mult_f32( 
+  float32_t * pSrcA, 
+  float32_t * pSrcB, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counters */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * B */ 
+    /* Multiply the inputs and store the results in output buffer */ 
+    *pDst++ = (*pSrcA++) * (*pSrcB++); 
+    *pDst++ = (*pSrcA++) * (*pSrcB++); 
+    *pDst++ = (*pSrcA++) * (*pSrcB++); 
+    *pDst++ = (*pSrcA++) * (*pSrcB++); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * B */ 
+    /* Multiply the inputs and store the results in output buffer */ 
+    *pDst++ = (*pSrcA++) * (*pSrcB++); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_mult_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,102 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mult_q15.c  
+*  
+* Description:	Q15 vector multiplication.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicMult  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief           Q15 vector multiplication  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.  
+ */ 
+ 
+void arm_mult_q15( 
+  q15_t * pSrcA, 
+  q15_t * pSrcB, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counters */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * B */ 
+    /* Multiply the inputs and store the result in the destination buffer */ 
+    *pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16); 
+    *pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16); 
+    *pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16); 
+    *pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * B */ 
+    /* Multiply the inputs and store the result in the destination buffer */ 
+    *pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_mult_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,101 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mult_q31.c  
+*  
+* Description:	Q31 vector multiplication.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicMult  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q31 vector multiplication.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.  
+ */ 
+ 
+void arm_mult_q31( 
+  q31_t * pSrcA, 
+  q31_t * pSrcB, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counters */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * B */ 
+    /* Multiply the inputs and then store the results in the destination buffer. */ 
+    *pDst++ = (q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31); 
+    *pDst++ = (q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31); 
+    *pDst++ = (q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31); 
+    *pDst++ = (q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * B */ 
+    /* Multiply the inputs and then store the results in the destination buffer. */ 
+    *pDst++ = (q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_mult_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,108 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mult_q7.c  
+*  
+* Description:	Q7 vector multiplication.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10 DP  
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicMult  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief           Q7 vector multiplication  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.  
+ */ 
+ 
+void arm_mult_q7( 
+  q7_t * pSrcA, 
+  q7_t * pSrcB, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counters */ 
+  q7_t out1, out2, out3, out4;                   /* Temporary variables to store the product */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * B */ 
+    /* Multiply the inputs and store the results in temporary variables */ 
+    out1 = (q7_t) (((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7); 
+    out2 = (q7_t) (((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7); 
+    out3 = (q7_t) (((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7); 
+    out4 = (q7_t) (((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7); 
+ 
+    /* Store the results of 4 inputs in the destination buffer in single cycle by packing */ 
+    *__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * B */ 
+    /* Multiply the inputs and store the result in the destination buffer */ 
+    *pDst++ = (q7_t) (((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_negate_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,101 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_negate_f32.c  
+*  
+* Description:	Negates floating-point vectors.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @defgroup negate Vector Negate  
+ *  
+ * Negates the elements of a vector.  
+ *  
+ * <pre>  
+ *     pDst[n] = -pSrc[n],   0 <= n < blockSize.  
+ * </pre>  
+ */ 
+ 
+/**  
+ * @addtogroup negate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Negates the elements of a floating-point vector.  
+ * @param  *pSrc points to the input vector  
+ * @param  *pDst points to the output vector  
+ * @param  blockSize number of samples in the vector  
+ * @return none.  
+ */ 
+ 
+void arm_negate_f32( 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = ~A */ 
+    /* Negate and then store the results in the destination buffer. */ 
+    *pDst++ = -*pSrc++; 
+    *pDst++ = -*pSrc++; 
+    *pDst++ = -*pSrc++; 
+    *pDst++ = -*pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = ~A */ 
+    /* Negate and then store the results in the destination buffer. */ 
+    *pDst++ = -*pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of negate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_negate_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,103 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_negate_q15.c  
+*  
+* Description:	Negates Q15 vectors.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup negate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Negates the elements of a Q15 vector.  
+ * @param  *pSrc points to the input vector  
+ * @param  *pDst points to the output vector  
+ * @param  blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.  
+ */ 
+ 
+void arm_negate_q15( 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q15_t in1, in2;                                /* Temporary variables */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = ~A */ 
+    /* Read two inputs */ 
+    in1 = *pSrc++; 
+    in2 = *pSrc++; 
+    /* Negate and then store the results in the destination buffer by packing. */ 
+    *__SIMD32(pDst)++ = __PKHBT(__SSAT(-in1, 16), __SSAT(-in2, 16), 16); 
+ 
+    in1 = *pSrc++; 
+    in2 = *pSrc++; 
+ 
+    *__SIMD32(pDst)++ = __PKHBT(__SSAT(-in1, 16), __SSAT(-in2, 16), 16); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = ~A */ 
+    /* Negate and then store the result in the destination buffer. */ 
+    *pDst++ = __SSAT(-*pSrc++, 16); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of negate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_negate_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,103 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_negate_q31.c  
+*  
+* Description:	Negates Q31 vectors.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup negate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Negates the elements of a Q31 vector.  
+ * @param  *pSrc points to the input vector  
+ * @param  *pDst points to the output vector  
+ * @param  blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.  
+ */ 
+ 
+void arm_negate_q31( 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t in;                                      /* Temporary variable */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = ~A */ 
+    /* Negate and then store the results in the destination buffer. */ 
+    in = *pSrc++; 
+    *pDst++ = (in == 0x80000000) ? 0x7fffffff : -in; 
+    in = *pSrc++; 
+    *pDst++ = (in == 0x80000000) ? 0x7fffffff : -in; 
+    in = *pSrc++; 
+    *pDst++ = (in == 0x80000000) ? 0x7fffffff : -in; 
+    in = *pSrc++; 
+    *pDst++ = (in == 0x80000000) ? 0x7fffffff : -in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = ~A */ 
+    /* Negate and then store the result in the destination buffer. */ 
+    in = *pSrc++; 
+    *pDst++ = (in == 0x80000000) ? 0x7fffffff : -in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of negate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_negate_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,106 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_negate_q7.c  
+*  
+* Description:	Negates Q7 vectors.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup negate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Negates the elements of a Q7 vector.  
+ * @param  *pSrc points to the input vector  
+ * @param  *pDst points to the output vector  
+ * @param  blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.  
+ */ 
+ 
+void arm_negate_q7( 
+  q7_t * pSrc, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q7_t in1;                                      /* Input value1 */ 
+  q7_t in2;                                      /* Input value2 */ 
+  q7_t in3;                                      /* Input value3 */ 
+  q7_t in4;                                      /* Input value4 */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = ~A */ 
+    /* Read four inputs */ 
+    in1 = *pSrc++; 
+    in2 = *pSrc++; 
+    in3 = *pSrc++; 
+    in4 = *pSrc++; 
+ 
+    /* Store the Negated results in the destination buffer in a single cycle by packing the results */ 
+    *__SIMD32(pDst)++ = 
+      __PACKq7(__SSAT(-in1, 8), __SSAT(-in2, 8), __SSAT(-in3, 8), 
+               __SSAT(-in4, 8)); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = ~A */ 
+    /* Negate and then store the results in the destination buffer. */ 
+    *pDst++ = __SSAT(-*pSrc++, 8); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of negate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_offset_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,106 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_offset_f32.c  
+*  
+* Description:	Floating-point vector offset.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @defgroup offset Vector Offset  
+ *  
+ * Adds a constant offset to each element of a vector.  
+ *  
+ * <pre>  
+ *     pDst[n] = pSrc[n] + offset,   0 <= n < blockSize.  
+ * </pre>  
+ *  
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup offset  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Adds a constant offset to a floating-point vector.  
+ * @param  *pSrc points to the input vector  
+ * @param  offset is the offset to be added  
+ * @param  *pDst points to the output vector  
+ * @param  blockSize number of samples in the vector  
+ * @return none.  
+ */ 
+ 
+ 
+void arm_offset_f32( 
+  float32_t * pSrc, 
+  float32_t offset, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + offset */ 
+    /* Add offset and then store the results in the destination buffer. */ 
+    *pDst++ = (*pSrc++) + offset; 
+    *pDst++ = (*pSrc++) + offset; 
+    *pDst++ = (*pSrc++) + offset; 
+    *pDst++ = (*pSrc++) + offset; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + offset */ 
+    /* Add offset and then store the result in the destination buffer. */ 
+    *pDst++ = (*pSrc++) + offset; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of offset group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_offset_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,101 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_offset_q15.c  
+*  
+* Description:	Q15 vector offset.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup offset  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Adds a constant offset to a Q15 vector.  
+ * @param  *pSrc points to the input vector  
+ * @param  offset is the offset to be added  
+ * @param  *pDst points to the output vector  
+ * @param  blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.  
+ */ 
+ 
+void arm_offset_q15( 
+  q15_t * pSrc, 
+  q15_t offset, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q31_t offset_packed;                           /* Offset packed to 32 bit */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Offset is packed to 32 bit in order to use SIMD32 for addition */ 
+  offset_packed = __PKHBT(offset, offset, 16); 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + offset */ 
+    /* Add offset and then store the results in the destination buffer, 2 samples at a time. */ 
+    *__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed); 
+    *__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + offset */ 
+    /* Add offset and then store the results in the destination buffer. */ 
+    *pDst++ = (q15_t) __QADD16(*pSrc++, offset); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of offset group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_offset_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,99 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_offset_q31.c  
+*  
+* Description:	Q31 vector offset.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup offset  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Adds a constant offset to a Q31 vector.  
+ * @param  *pSrc points to the input vector  
+ * @param  offset is the offset to be added  
+ * @param  *pDst points to the output vector  
+ * @param  blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.  
+ */ 
+ 
+void arm_offset_q31( 
+  q31_t * pSrc, 
+  q31_t offset, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + offset */ 
+    /* Add offset and then store the results in the destination buffer. */ 
+    *pDst++ = __QADD(*pSrc++, offset); 
+    *pDst++ = __QADD(*pSrc++, offset); 
+    *pDst++ = __QADD(*pSrc++, offset); 
+    *pDst++ = __QADD(*pSrc++, offset); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + offset */ 
+    /* Add offset and then store the result in the destination buffer. */ 
+    *pDst++ = __QADD(*pSrc++, offset); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of offset group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_offset_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,100 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_offset_q7.c  
+*  
+* Description:	Q7 vector offset.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup offset  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Adds a constant offset to a Q7 vector.  
+ * @param  *pSrc points to the input vector  
+ * @param  offset is the offset to be added  
+ * @param  *pDst points to the output vector  
+ * @param  blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q7 range [0x80 0x7F] are saturated.  
+ */ 
+ 
+void arm_offset_q7( 
+  q7_t * pSrc, 
+  q7_t offset, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q31_t offset_packed;                           /* Offset packed to 32 bit */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Offset is packed to 32 bit in order to use SIMD32 for addition */ 
+  offset_packed = __PACKq7(offset, offset, offset, offset); 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + offset */ 
+    /* Add offset and then store the results in the destination bufferfor 4 samples at a time. */ 
+    *__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrc)++, offset_packed); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A + offset */ 
+    /* Add offset and then store the result in the destination buffer. */ 
+    *pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of offset group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_scale_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,118 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_scale_f32.c  
+*  
+* Description:	Multiplies a floating-point vector by a scalar.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @defgroup scale Vector Scale  
+ *  
+ * Multiply a vector by a scalar value.  For floating-point data, the algorithm used is:  
+ *  
+ * <pre>  
+ *     pDst[n] = pSrc[n] * scale,   0 <= n < blockSize.  
+ * </pre>  
+ *  
+ * In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by  
+ * a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.  
+ * The shift allows the gain of the scaling operation to exceed 1.0.  
+ * The algorithm used with fixed-point data is:  
+ *  
+ * <pre>  
+ *     pDst[n] = (pSrc[n] * scaleFract) << shift,   0 <= n < blockSize.  
+ * </pre>  
+ *  
+ * The overall scale factor applied to the fixed-point data is  
+ * <pre>  
+ *     scale = scaleFract * 2^shift.  
+ * </pre>  
+ */ 
+ 
+/**  
+ * @addtogroup scale  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Multiplies a floating-point vector by a scalar.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       scale scale factor to be applied  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in the vector  
+ * @return none.  
+ */ 
+ 
+ 
+void arm_scale_f32( 
+  float32_t * pSrc, 
+  float32_t scale, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * scale */ 
+    /* Scale the input and then store the results in the destination buffer. */ 
+    *pDst++ = (*pSrc++) * scale; 
+    *pDst++ = (*pSrc++) * scale; 
+    *pDst++ = (*pSrc++) * scale; 
+    *pDst++ = (*pSrc++) * scale; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * scale */ 
+    /* Scale the input and then store the result in the destination buffer. */ 
+    *pDst++ = (*pSrc++) * scale; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of scale group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_scale_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,114 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_scale_q15.c  
+*  
+* Description:	Multiplies a Q15 vector by a scalar.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup scale  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Multiplies a Q15 vector by a scalar.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       scaleFract fractional portion of the scale value  
+ * @param[in]       shift number of bits to shift the result by  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.  
+ * These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.  
+ */ 
+ 
+ 
+void arm_scale_q15( 
+  q15_t * pSrc, 
+  q15_t scaleFract, 
+  int8_t shift, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  int8_t kShift = 15 - shift;                    /* shift to apply after scaling */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q15_t in1, in2;                                /* Temporary variables */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Reading 2 inputs from memory */ 
+    in1 = *pSrc++; 
+    in2 = *pSrc++; 
+    /* C = A * scale */ 
+    /* Scale the inputs and then store the 2 results in the destination buffer  
+     * in single cycle by packing the outputs */ 
+    *__SIMD32(pDst)++ = 
+      __PKHBT(__SSAT((in1 * scaleFract) >> kShift, 16), 
+              __SSAT((in2 * scaleFract) >> kShift, 16), 16); 
+ 
+    in1 = *pSrc++; 
+    in2 = *pSrc++; 
+ 
+    *__SIMD32(pDst)++ = 
+      __PKHBT(__SSAT((in1 * scaleFract) >> kShift, 16), 
+              __SSAT((in2 * scaleFract) >> kShift, 16), 16); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * scale */ 
+    /* Scale the input and then store the result in the destination buffer. */ 
+    *pDst++ = (q15_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 16)); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of scale group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_scale_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,102 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_scale_q31.c  
+*  
+* Description:	Multiplies a Q31 vector by a scalar.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup scale  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Multiplies a Q31 vector by a scalar.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       scaleFract fractional portion of the scale value  
+ * @param[in]       shift number of bits to shift the result by  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.  
+ * These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.  
+ */ 
+ 
+void arm_scale_q31( 
+  q31_t * pSrc, 
+  q31_t scaleFract, 
+  int8_t shift, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  int8_t kShift = 31 - shift;                    /* Shift to apply after scaling */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * scale */ 
+    /* Scale the input and then store the results in the destination buffer. */ 
+    *pDst++ = clip_q63_to_q31(((q63_t) * pSrc++ * scaleFract) >> kShift); 
+    *pDst++ = clip_q63_to_q31(((q63_t) * pSrc++ * scaleFract) >> kShift); 
+    *pDst++ = clip_q63_to_q31(((q63_t) * pSrc++ * scaleFract) >> kShift); 
+    *pDst++ = clip_q63_to_q31(((q63_t) * pSrc++ * scaleFract) >> kShift); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * scale */ 
+    /* Scale the input and then store the result in the destination buffer. */ 
+    *pDst++ = clip_q63_to_q31(((q63_t) * pSrc++ * scaleFract) >> kShift); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of scale group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_scale_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,114 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_scale_q7.c  
+*  
+* Description:	Multiplies a Q7 vector by a scalar.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup scale  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Multiplies a Q7 vector by a scalar.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       scaleFract fractional portion of the scale value  
+ * @param[in]       shift number of bits to shift the result by  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.7 format.  
+ * These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format.  
+ */ 
+ 
+void arm_scale_q7( 
+  q7_t * pSrc, 
+  q7_t scaleFract, 
+  int8_t shift, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  int8_t kShift = 7 - shift;                     /* shift to apply after scaling */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q7_t in1, in2, in3, in4, out1, out2, out3, out4;      /* Temporary variables to store input & output */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Reading 4 inputs from memory */ 
+    in1 = *pSrc++; 
+    in2 = *pSrc++; 
+    in3 = *pSrc++; 
+    in4 = *pSrc++; 
+ 
+    /* C = A * scale */ 
+    /* Scale the inputs and then store the results in the temporary variables. */ 
+    out1 = (q7_t) (__SSAT(((in1) * scaleFract) >> kShift, 8)); 
+    out2 = (q7_t) (__SSAT(((in2) * scaleFract) >> kShift, 8)); 
+    out3 = (q7_t) (__SSAT(((in3) * scaleFract) >> kShift, 8)); 
+    out4 = (q7_t) (__SSAT(((in4) * scaleFract) >> kShift, 8)); 
+ 
+    /* Packing the individual outputs into 32bit and storing in  
+     * destination buffer in single write */ 
+    *__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A * scale */ 
+    /* Scale the input and then store the result in the destination buffer. */ 
+    *pDst++ = (q7_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 8)); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of scale group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_shift_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,152 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_shift_q15.c  
+*  
+* Description:	Shifts elements of a Q15 vector a specified number of bits.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup shift  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Shifts the elements of a Q15 vector a specified number of bits.  
+ * @param  *pSrc points to the input vector  
+ * @param  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.  
+ * @param  *pDst points to the output vector  
+ * @param  blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.  
+ */ 
+ 
+void arm_shift_q15( 
+  q15_t * pSrc, 
+  int8_t shiftBits, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  uint8_t sign;                                  /* Sign of shiftBits */ 
+  q15_t in1, in2;                                /* Temporary variables */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Getting the sign of shiftBits */ 
+  sign = (shiftBits & 0x80); 
+ 
+  /* If the shift value is positive then do right shift else left shift */ 
+  if(sign == 0u) 
+  { 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* Read 2 inputs */ 
+      in1 = *pSrc++; 
+      in2 = *pSrc++; 
+      /* C = A << shiftBits */ 
+      /* Shift the inputs and then store the results in the destination buffer. */ 
+      *__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16), 
+                                  __SSAT((in2 << shiftBits), 16), 16); 
+ 
+      in1 = *pSrc++; 
+      in2 = *pSrc++; 
+ 
+      *__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16), 
+                                  __SSAT((in2 << shiftBits), 16), 16); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C = A << shiftBits */ 
+      /* Shift and then store the results in the destination buffer. */ 
+      *pDst++ = __SSAT((*pSrc++ << shiftBits), 16); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* Read 2 inputs */ 
+      in1 = *pSrc++; 
+      in2 = *pSrc++; 
+      /* C = A >> shiftBits */ 
+      /* Shift the inputs and then store the results in the destination buffer. */ 
+      *__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits), 
+                                  (in2 >> -shiftBits), 16); 
+      in1 = *pSrc++; 
+      in2 = *pSrc++; 
+ 
+      *__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits), 
+                                  (in2 >> -shiftBits), 16); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C = A >> shiftBits */ 
+      /* Shift the inputs and then store the results in the destination buffer. */ 
+      *pDst++ = (*pSrc++ >> -shiftBits); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+} 
+ 
+/**  
+ * @} end of shift group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_shift_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,124 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_shift_q31.c  
+*  
+* Description:	Shifts the elements of a Q31 vector  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+/**  
+ * @defgroup shift Vector Shift  
+ *  
+ * Shifts the elements of a fixed-point vector by a specified number of bits.  
+ * There are separate functions for Q7, Q15, and Q31 data types.  
+ * The underlying algorithm used is:  
+ *  
+ * <pre>  
+ *     pDst[n] = pSrc[n] << shift,   0 <= n < blockSize.  
+ * </pre>  
+ *  
+ * If <code>shift</code> is positive then the elements of the vector are shifted to the left.  
+ * If <code>shift</code> is negative then the elements of the vector are shifted to the right.  
+ */ 
+ 
+/**  
+ * @addtogroup shift  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Shifts the elements of a Q31 vector a specified number of bits.  
+ * @param  *pSrc points to the input vector  
+ * @param  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.  
+ * @param  *pDst points to the output vector  
+ * @param  blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.  
+ */ 
+ 
+void arm_shift_q31( 
+  q31_t * pSrc, 
+  int8_t shiftBits, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  uint32_t sign;                                 /* Sign of shiftBits */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Getting the sign of shiftBits */ 
+  sign = (shiftBits & 0x80000000); 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A (>> or <<) shiftBits */ 
+    /* Shift the input and then store the results in the destination buffer. */ 
+    *pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) : 
+                             (*pSrc++ >> -shiftBits); 
+    *pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) : 
+                             (*pSrc++ >> -shiftBits); 
+    *pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) : 
+                             (*pSrc++ >> -shiftBits); 
+    *pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) : 
+                             (*pSrc++ >> -shiftBits); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A (>> or <<) shiftBits */ 
+    /* Shift the input and then store the result in the destination buffer. */ 
+    *pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) : 
+                             (*pSrc++ >> -shiftBits); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of shift group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_shift_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,153 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_shift_q7.c  
+*  
+* Description:	Processing function for the Q7 Shifting  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup shift  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief  Shifts the elements of a Q7 vector a specified number of bits.  
+ * @param  *pSrc points to the input vector  
+ * @param  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.  
+ * @param  *pDst points to the output vector  
+ * @param  blockSize number of samples in the vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.  
+ */ 
+ 
+void arm_shift_q7( 
+  q7_t * pSrc, 
+  int8_t shiftBits, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  uint8_t sign;                                  /* Sign of shiftBits */ 
+  q7_t in1;                                      /* Input value1 */ 
+  q7_t in2;                                      /* Input value2 */ 
+  q7_t in3;                                      /* Input value3 */ 
+  q7_t in4;                                      /* Input value4 */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Getting the sign of shiftBits */ 
+  sign = (shiftBits & 0x80); 
+ 
+  /* If the shift value is positive then do right shift else left shift */ 
+  if(sign == 0u) 
+  { 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* C = A << shiftBits */ 
+      /* Read 4 inputs */ 
+      in1 = *pSrc++; 
+      in2 = *pSrc++; 
+      in3 = *pSrc++; 
+      in4 = *pSrc++; 
+ 
+      /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */ 
+      *__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8), 
+                                   __SSAT((in2 << shiftBits), 8), 
+                                   __SSAT((in3 << shiftBits), 8), 
+                                   __SSAT((in4 << shiftBits), 8)); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C = A << shiftBits */ 
+      /* Shift the input and then store the result in the destination buffer. */ 
+      *pDst++ = (q7_t) __SSAT((*pSrc++ << shiftBits), 8); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* C = A >> shiftBits */ 
+      /* Read 4 inputs */ 
+      in1 = *pSrc++; 
+      in2 = *pSrc++; 
+      in3 = *pSrc++; 
+      in4 = *pSrc++; 
+ 
+      /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */ 
+      *__SIMD32(pDst)++ = __PACKq7((in1 >> -shiftBits), (in2 >> -shiftBits), 
+                                   (in3 >> -shiftBits), (in4 >> -shiftBits)); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C = A >> shiftBits */ 
+      /* Shift the input and then store the result in the destination buffer. */ 
+      *pDst++ = (*pSrc++ >> -shiftBits); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+} 
+ 
+/**  
+ * @} end of shift group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_sub_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,106 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_sub_f32.c  
+*  
+* Description:	Floating-point vector subtraction.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @defgroup BasicSub Vector Subtraction  
+ *  
+ * Element-by-element subtraction of two vectors.  
+ *  
+ * <pre>  
+ *     pDst[n] = pSrcA[n] - pSrcB[n],   0 <= n < blockSize.  
+ * </pre>  
+ *  
+ * There are separate functions for floating-point, Q7, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup BasicSub  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Floating-point vector subtraction.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ */ 
+ 
+void arm_sub_f32( 
+  float32_t * pSrcA, 
+  float32_t * pSrcB, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A - B */ 
+    /* Subtract and then store the results in the destination buffer. */ 
+    *pDst++ = (*pSrcA++) - (*pSrcB++); 
+    *pDst++ = (*pSrcA++) - (*pSrcB++); 
+    *pDst++ = (*pSrcA++) - (*pSrcB++); 
+    *pDst++ = (*pSrcA++) - (*pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A - B */ 
+    /* Subtract and then store the results in the destination buffer. */ 
+    *pDst++ = (*pSrcA++) - (*pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicSub group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_sub_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,98 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_sub_q15.c  
+*  
+* Description:	Q15 vector subtraction.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicSub  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q15 vector subtraction.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.  
+ */ 
+ 
+void arm_sub_q15( 
+  q15_t * pSrcA, 
+  q15_t * pSrcB, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A - B */ 
+    /* Subtract and then store the results in the destination buffer two samples at a time. */ 
+    *__SIMD32(pDst)++ = __QSUB16(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++); 
+    *__SIMD32(pDst)++ = __QSUB16(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A - B */ 
+    /* Subtract and then store the result in the destination buffer. */ 
+    *pDst++ = (q15_t) __QSUB16(*pSrcA++, *pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of BasicSub group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_sub_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,99 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_sub_q31.c  
+*  
+* Description:	Q31 vector subtraction.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicSub  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q31 vector subtraction.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.  
+ */ 
+ 
+void arm_sub_q31( 
+  q31_t * pSrcA, 
+  q31_t * pSrcB, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A - B */ 
+    /* Subtract and then store the results in the destination buffer. */ 
+    *pDst++ = __QSUB(*pSrcA++, *pSrcB++); 
+    *pDst++ = __QSUB(*pSrcA++, *pSrcB++); 
+    *pDst++ = __QSUB(*pSrcA++, *pSrcB++); 
+    *pDst++ = __QSUB(*pSrcA++, *pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A - B */ 
+    /* Subtract and then store the result in the destination buffer. */ 
+    *pDst++ = __QSUB(*pSrcA++, *pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicSub group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/BasicMathFunctions/arm_sub_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,97 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_sub_q7.c  
+*  
+* Description:	Q7 vector subtraction.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMath  
+ */ 
+ 
+/**  
+ * @addtogroup BasicSub  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q7 vector subtraction.  
+ * @param[in]       *pSrcA points to the first input vector  
+ * @param[in]       *pSrcB points to the second input vector  
+ * @param[out]      *pDst points to the output vector  
+ * @param[in]       blockSize number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.  
+ */ 
+ 
+void arm_sub_q7( 
+  q7_t * pSrcA, 
+  q7_t * pSrcB, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A - B */ 
+    /* Subtract and then store the results in the destination buffer 4 samples at a time. */ 
+    *__SIMD32(pDst)++ = __QSUB8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A - B */ 
+    /* Subtract and then store the result in the destination buffer. */ 
+    *pDst++ = __SSAT(*pSrcA++ - *pSrcB++, 8); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of BasicSub group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/CommonTables/arm_common_tables.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,142 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_common_tables.c  
+*  
+* Description:	This file has common tables like Bitreverse, reciprocal etc which are used across different functions  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+ 
+#include "arm_math.h" 
+#include "arm_common_tables.h"
+
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @addtogroup CFFT_CIFFT  
+ * @{  
+ */ 
+ 
+/**  
+* \par  
+* Pseudo code for Generation of Bit reversal Table is  
+* \par  
+* <pre>for(l=1;l <= N/4;l++)  
+* {  
+*   for(i=0;i<logN2;i++)  
+*   {   
+*     a[i]=l&(1<<i);  
+*   }  
+*   for(j=0; j<logN2; j++)  
+*   {  
+*     if (a[j]!=0)  
+*     y[l]+=(1<<((logN2-1)-j));  
+*   }  
+*   y[l] = y[l] >> 1;  
+*  } </pre>  
+* \par  
+* where N = 1024	logN2 = 10  
+* \par  
+* N is the maximum FFT Size supported  
+*/ 
+ 
+/*  
+* @brief  Table for bit reversal process  
+*/ 
+const uint16_t armBitRevTable[256] = { 
+  0x100, 0x80, 0x180, 0x40, 0x140, 0xc0, 0x1c0, 
+  0x20, 0x120, 0xa0, 0x1a0, 0x60, 0x160, 0xe0, 
+  0x1e0, 0x10, 0x110, 0x90, 0x190, 0x50, 0x150, 
+  0xd0, 0x1d0, 0x30, 0x130, 0xb0, 0x1b0, 0x70, 
+  0x170, 0xf0, 0x1f0, 0x8, 0x108, 0x88, 0x188, 
+  0x48, 0x148, 0xc8, 0x1c8, 0x28, 0x128, 0xa8, 
+  0x1a8, 0x68, 0x168, 0xe8, 0x1e8, 0x18, 0x118, 
+  0x98, 0x198, 0x58, 0x158, 0xd8, 0x1d8, 0x38, 
+  0x138, 0xb8, 0x1b8, 0x78, 0x178, 0xf8, 0x1f8, 
+  0x4, 0x104, 0x84, 0x184, 0x44, 0x144, 0xc4, 
+  0x1c4, 0x24, 0x124, 0xa4, 0x1a4, 0x64, 0x164, 
+  0xe4, 0x1e4, 0x14, 0x114, 0x94, 0x194, 0x54, 
+  0x154, 0xd4, 0x1d4, 0x34, 0x134, 0xb4, 0x1b4, 
+  0x74, 0x174, 0xf4, 0x1f4, 0xc, 0x10c, 0x8c, 
+  0x18c, 0x4c, 0x14c, 0xcc, 0x1cc, 0x2c, 0x12c, 
+  0xac, 0x1ac, 0x6c, 0x16c, 0xec, 0x1ec, 0x1c, 
+  0x11c, 0x9c, 0x19c, 0x5c, 0x15c, 0xdc, 0x1dc, 
+  0x3c, 0x13c, 0xbc, 0x1bc, 0x7c, 0x17c, 0xfc, 
+  0x1fc, 0x2, 0x102, 0x82, 0x182, 0x42, 0x142, 
+  0xc2, 0x1c2, 0x22, 0x122, 0xa2, 0x1a2, 0x62, 
+  0x162, 0xe2, 0x1e2, 0x12, 0x112, 0x92, 0x192, 
+  0x52, 0x152, 0xd2, 0x1d2, 0x32, 0x132, 0xb2, 
+  0x1b2, 0x72, 0x172, 0xf2, 0x1f2, 0xa, 0x10a, 
+  0x8a, 0x18a, 0x4a, 0x14a, 0xca, 0x1ca, 0x2a, 
+  0x12a, 0xaa, 0x1aa, 0x6a, 0x16a, 0xea, 0x1ea, 
+  0x1a, 0x11a, 0x9a, 0x19a, 0x5a, 0x15a, 0xda, 
+  0x1da, 0x3a, 0x13a, 0xba, 0x1ba, 0x7a, 0x17a, 
+  0xfa, 0x1fa, 0x6, 0x106, 0x86, 0x186, 0x46, 
+  0x146, 0xc6, 0x1c6, 0x26, 0x126, 0xa6, 0x1a6, 
+  0x66, 0x166, 0xe6, 0x1e6, 0x16, 0x116, 0x96, 
+  0x196, 0x56, 0x156, 0xd6, 0x1d6, 0x36, 0x136, 
+  0xb6, 0x1b6, 0x76, 0x176, 0xf6, 0x1f6, 0xe, 
+  0x10e, 0x8e, 0x18e, 0x4e, 0x14e, 0xce, 0x1ce, 
+  0x2e, 0x12e, 0xae, 0x1ae, 0x6e, 0x16e, 0xee, 
+  0x1ee, 0x1e, 0x11e, 0x9e, 0x19e, 0x5e, 0x15e, 
+  0xde, 0x1de, 0x3e, 0x13e, 0xbe, 0x1be, 0x7e, 
+  0x17e, 0xfe, 0x1fe, 0x1 
+}; 
+ 
+/**  
+ * @} end of CFFT_CIFFT group  
+ */ 
+ 
+/*  
+* @brief  Q15 table for reciprocal  
+*/ 
+const q15_t armRecipTableQ15[64] = { 
+  0x7F03, 0x7D13, 0x7B31, 0x795E, 0x7798, 0x75E0, 
+  0x7434, 0x7294, 0x70FF, 0x6F76, 0x6DF6, 0x6C82, 
+  0x6B16, 0x69B5, 0x685C, 0x670C, 0x65C4, 0x6484, 
+  0x634C, 0x621C, 0x60F3, 0x5FD0, 0x5EB5, 0x5DA0, 
+  0x5C91, 0x5B88, 0x5A85, 0x5988, 0x5890, 0x579E, 
+  0x56B0, 0x55C8, 0x54E4, 0x5405, 0x532B, 0x5255, 
+  0x5183, 0x50B6, 0x4FEC, 0x4F26, 0x4E64, 0x4DA6, 
+  0x4CEC, 0x4C34, 0x4B81, 0x4AD0, 0x4A23, 0x4978, 
+  0x48D1, 0x482D, 0x478C, 0x46ED, 0x4651, 0x45B8, 
+  0x4521, 0x448D, 0x43FC, 0x436C, 0x42DF, 0x4255, 
+  0x41CC, 0x4146, 0x40C2, 0x4040 
+}; 
+ 
+/*  
+* @brief  Q31 table for reciprocal  
+*/ 
+const q31_t armRecipTableQ31[64] = { 
+  0x7F03F03F, 0x7D137420, 0x7B31E739, 0x795E9F94, 0x7798FD29, 0x75E06928, 
+  0x7434554D, 0x72943B4B, 0x70FF9C40, 0x6F760031, 0x6DF6F593, 0x6C8210E3, 
+  0x6B16EC3A, 0x69B526F6, 0x685C655F, 0x670C505D, 0x65C4952D, 0x6484E519, 
+  0x634CF53E, 0x621C7E4F, 0x60F33C61, 0x5FD0EEB3, 0x5EB55785, 0x5DA03BEB, 
+  0x5C9163A1, 0x5B8898E6, 0x5A85A85A, 0x598860DF, 0x58909373, 0x579E1318, 
+  0x56B0B4B8, 0x55C84F0B, 0x54E4BA80, 0x5405D124, 0x532B6E8F, 0x52556FD0, 
+  0x5183B35A, 0x50B618F3, 0x4FEC81A2, 0x4F26CFA2, 0x4E64E64E, 0x4DA6AA1D, 
+  0x4CEC008B, 0x4C34D010, 0x4B810016, 0x4AD078EF, 0x4A2323C4, 0x4978EA96, 
+  0x48D1B827, 0x482D77FE, 0x478C1657, 0x46ED801D, 0x4651A2E5, 0x45B86CE2, 
+  0x4521CCE1, 0x448DB244, 0x43FC0CFA, 0x436CCD78, 0x42DFE4B4, 0x42554426, 
+  0x41CCDDB6, 0x4146A3C6, 0x40C28923, 0x40408102 
+};

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_conj_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,116 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_conj_f32.c  
+*  
+* Description:	Floating-point complex conjugate.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @defgroup cmplx_conj Complex Conjugate  
+ *  
+ * Conjugates the elements of a complex data vector.  
+ * 
+ * The <code>pSrc</code> points to the source data and  
+ * <code>pDst</code> points to the where the result should be written.  
+ * <code>numSamples</code> specifies the number of complex samples  
+ * and the data in each array is stored in an interleaved fashion  
+ * (real, imag, real, imag, ...).  
+ * Each array has a total of <code>2*numSamples</code> values.  
+ * The underlying algorithm is used:  
+ *  
+ * <pre>  
+ * for(n=0; n<numSamples; n++) {  
+ *     pDst[(2*n)+0)] = pSrc[(2*n)+0];     // real part  
+ *     pDst[(2*n)+1)] = -pSrc[(2*n)+1];    // imag part  
+ * }  
+ * </pre>  
+ *  
+ * There are separate functions for floating-point, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_conj  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Floating-point complex conjugate.  
+ * @param  *pSrc points to the input vector  
+ * @param  *pDst points to the output vector  
+ * @param  numSamples number of complex samples in each vector  
+ * @return none.  
+ */ 
+ 
+void arm_cmplx_conj_f32( 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0]+jC[1] = A[0]+ j (-1) A[1] */ 
+    /* Calculate Complex Conjugate and then store the results in the destination buffer. */ 
+    *pDst++ = *pSrc++; 
+    *pDst++ = -*pSrc++; 
+    *pDst++ = *pSrc++; 
+    *pDst++ = -*pSrc++; 
+    *pDst++ = *pSrc++; 
+    *pDst++ = -*pSrc++; 
+    *pDst++ = *pSrc++; 
+    *pDst++ = -*pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0]+jC[1] = A[0]+ j (-1) A[1] */ 
+    /* Calculate Complex Conjugate and then store the results in the destination buffer. */ 
+    *pDst++ = *pSrc++; 
+    *pDst++ = -*pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of cmplx_conj group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_conj_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,98 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_conj_q15.c  
+*  
+* Description:	Q15 complex conjugate.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_conj  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Q15 complex conjugate.  
+ * @param  *pSrc points to the input vector  
+ * @param  *pDst points to the output vector  
+ * @param  numSamples number of complex samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.  
+ */ 
+ 
+void arm_cmplx_conj_q15( 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0]+jC[1] = A[0]+ j (-1) A[1] */ 
+    /* Calculate Complex Conjugate and then store the results in the destination buffer. */ 
+    *pDst++ = *pSrc++; 
+    *pDst++ = __SSAT(-*pSrc++, 16); 
+    *pDst++ = *pSrc++; 
+    *pDst++ = __SSAT(-*pSrc++, 16); 
+    *pDst++ = *pSrc++; 
+    *pDst++ = __SSAT(-*pSrc++, 16); 
+    *pDst++ = *pSrc++; 
+    *pDst++ = __SSAT(-*pSrc++, 16); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0]+jC[1] = A[0]+ j (-1) A[1] */ 
+    /* Calculate Complex Conjugate and then store the results in the destination buffer. */ 
+    *pDst++ = *pSrc++; 
+    *pDst++ = __SSAT(-*pSrc++, 16); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of cmplx_conj group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_conj_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,106 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_conj_q31.c  
+*  
+* Description:	Q31 complex conjugate.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_conj  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Q31 complex conjugate.  
+ * @param  *pSrc points to the input vector  
+ * @param  *pDst points to the output vector  
+ * @param  numSamples number of complex samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.  
+ */ 
+ 
+void arm_cmplx_conj_q31( 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q31_t in;                                      /* Input value */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0]+jC[1] = A[0]+ j (-1) A[1] */ 
+    /* Calculate Complex Conjugate and then store the results in the destination buffer. */ 
+	/* Saturated to 0x7fffffff if the input is -1(0x80000000)*/ 
+    *pDst++ = *pSrc++; 
+    in = *pSrc++; 
+    *pDst++ = (in == 0x80000000) ? 0x7fffffff : -in; 
+    *pDst++ = *pSrc++; 
+    in = *pSrc++; 
+    *pDst++ = (in == 0x80000000) ? 0x7fffffff : -in; 
+    *pDst++ = *pSrc++; 
+    in = *pSrc++; 
+    *pDst++ = (in == 0x80000000) ? 0x7fffffff : -in; 
+    *pDst++ = *pSrc++; 
+    in = *pSrc++; 
+    *pDst++ = (in == 0x80000000) ? 0x7fffffff : -in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0]+jC[1] = A[0]+ j (-1) A[1] */ 
+    /* Calculate Complex Conjugate and then store the results in the destination buffer. */ 
+	/* Saturated to 0x7fffffff if the input is -1(0x80000000)*/ 
+    *pDst++ = *pSrc++; 
+    in = *pSrc++; 
+    *pDst++ = (in == 0x80000000) ? 0x7fffffff : -in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of cmplx_conj group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,132 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_dot_prod_f32.c  
+*  
+* Description:	Floating-point complex dot product  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @defgroup cmplx_dot_prod Complex Dot Product  
+ *  
+ * Computes the dot product of two complex vectors.  
+ * The vectors are multiplied element-by-element and then summed.  
+ * 
+ * The <code>pSrcA</code> points to the first complex input vector and  
+ * <code>pSrcB</code> points to the second complex input vector.  
+ * <code>numSamples</code> specifies the number of complex samples  
+ * and the data in each array is stored in an interleaved fashion  
+ * (real, imag, real, imag, ...).  
+ * Each array has a total of <code>2*numSamples</code> values.  
+ *  
+ * The underlying algorithm is used:  
+ * <pre>  
+ * realResult=0;  
+ * imagResult=0;  
+ * for(n=0; n<numSamples; n++) {  
+ *     realResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+0] - pSrcA[(2*n)+1]*pSrcB[(2*n)+1];  
+ *     imagResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+1] + pSrcA[(2*n)+1]*pSrcB[(2*n)+0];  
+ * }  
+ * </pre>  
+ *  
+ * There are separate functions for floating-point, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_dot_prod  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Floating-point complex dot product  
+ * @param  *pSrcA points to the first input vector  
+ * @param  *pSrcB points to the second input vector  
+ * @param  numSamples number of complex samples in each vector  
+ * @param  *realResult real part of the result returned here  
+ * @param  *imagResult imaginary part of the result returned here  
+ * @return none.  
+ */ 
+ 
+void arm_cmplx_dot_prod_f32( 
+  float32_t * pSrcA, 
+  float32_t * pSrcB, 
+  uint32_t numSamples, 
+  float32_t * realResult, 
+  float32_t * imagResult) 
+{ 
+  float32_t real_sum = 0.0f, imag_sum = 0.0f;    /* Temporary result storage */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */ 
+    real_sum += (*pSrcA++) * (*pSrcB++); 
+    /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */ 
+    imag_sum += (*pSrcA++) * (*pSrcB++); 
+ 
+    real_sum += (*pSrcA++) * (*pSrcB++); 
+    imag_sum += (*pSrcA++) * (*pSrcB++); 
+ 
+    real_sum += (*pSrcA++) * (*pSrcB++); 
+    imag_sum += (*pSrcA++) * (*pSrcB++); 
+ 
+    real_sum += (*pSrcA++) * (*pSrcB++); 
+    imag_sum += (*pSrcA++) * (*pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */ 
+    real_sum += (*pSrcA++) * (*pSrcB++); 
+    /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */ 
+    imag_sum += (*pSrcA++) * (*pSrcB++); 
+ 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the real and imaginary results in the destination buffers */ 
+  *realResult = real_sum; 
+  *imagResult = imag_sum; 
+} 
+ 
+/**  
+ * @} end of cmplx_dot_prod group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,117 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_dot_prod_q15.c  
+*  
+* Description:	Processing function for the Q15 Complex Dot product  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_dot_prod  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Q15 complex dot product  
+ * @param  *pSrcA points to the first input vector  
+ * @param  *pSrcB points to the second input vector  
+ * @param  numSamples number of complex samples in each vector  
+ * @param  *realResult real part of the result returned here  
+ * @param  *imagResult imaginary part of the result returned here  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The intermediate 1.15 by 1.15 multiplications are performed with full precision and yield a 2.30 result.  
+ * These are accumulated in a 64-bit accumulator with 34.30 precision.  
+ * As a final step, the accumulators are converted to 8.24 format.  
+ * The return results <code>realResult</code> and <code>imagResult</code> are in 8.24 format.  
+ */ 
+ 
+void arm_cmplx_dot_prod_q15( 
+  q15_t * pSrcA, 
+  q15_t * pSrcB, 
+  uint32_t numSamples, 
+  q31_t * realResult, 
+  q31_t * imagResult) 
+{ 
+  q63_t real_sum = 0, imag_sum = 0;              /* Temporary result storage */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */ 
+    real_sum += ((q31_t) * pSrcA++ * *pSrcB++); 
+ 
+    /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */ 
+    imag_sum += ((q31_t) * pSrcA++ * *pSrcB++); 
+ 
+    real_sum += ((q31_t) * pSrcA++ * *pSrcB++); 
+    imag_sum += ((q31_t) * pSrcA++ * *pSrcB++); 
+ 
+    real_sum += ((q31_t) * pSrcA++ * *pSrcB++); 
+    imag_sum += ((q31_t) * pSrcA++ * *pSrcB++); 
+ 
+    real_sum += ((q31_t) * pSrcA++ * *pSrcB++); 
+    imag_sum += ((q31_t) * pSrcA++ * *pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */ 
+    real_sum += ((q31_t) * pSrcA++ * *pSrcB++); 
+    /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */ 
+    imag_sum += ((q31_t) * pSrcA++ * *pSrcB++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the real and imaginary results in 8.24 format  */ 
+  /* Convert real data in 34.30 to 8.24 by 6 right shifts */ 
+  *realResult = (q31_t) (real_sum) >> 6; 
+  /* Convert imaginary data in 34.30 to 8.24 by 6 right shifts */ 
+  *imagResult = (q31_t) (imag_sum) >> 6; 
+} 
+ 
+/**  
+ * @} end of cmplx_dot_prod group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,118 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_dot_prod_q31.c  
+*  
+* Description:	Q31 complex dot product  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_dot_prod  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Q31 complex dot product  
+ * @param  *pSrcA points to the first input vector  
+ * @param  *pSrcB points to the second input vector  
+ * @param  numSamples number of complex samples in each vector  
+ * @param  *realResult real part of the result returned here  
+ * @param  *imagResult imaginary part of the result returned here  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format.  
+ * The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits.  
+ * Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768.  
+ * The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format.  
+ * Input down scaling is not required.  
+ */ 
+ 
+void arm_cmplx_dot_prod_q31( 
+  q31_t * pSrcA, 
+  q31_t * pSrcB, 
+  uint32_t numSamples, 
+  q63_t * realResult, 
+  q63_t * imagResult) 
+{ 
+  q63_t real_sum = 0, imag_sum = 0;              /* Temporary result storage */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */ 
+    /* Convert real data in 2.62 to 16.48 by 14 right shifts */ 
+    real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 
+    /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */ 
+    /* Convert imag data in 2.62 to 16.48 by 14 right shifts */ 
+    imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 
+ 
+    real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 
+    imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 
+ 
+    real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 
+    imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 
+ 
+    real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 
+    imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 
+ 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples  is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */ 
+    real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 
+    /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */ 
+    imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the real and imaginary results in 16.48 format  */ 
+  *realResult = real_sum; 
+  *imagResult = imag_sum; 
+} 
+ 
+/**  
+ * @} end of cmplx_dot_prod group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,129 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_mag_f32.c  
+*  
+* Description:	Floating-point complex magnitude.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @defgroup cmplx_mag Complex Magnitude  
+ *  
+ * Computes the magnitude of the elements of a complex data vector.  
+ * 
+ * The <code>pSrc</code> points to the source data and  
+ * <code>pDst</code> points to the where the result should be written.  
+ * <code>numSamples</code> specifies the number of complex samples  
+ * in the input array and the data is stored in an interleaved fashion  
+ * (real, imag, real, imag, ...).  
+ * The input array has a total of <code>2*numSamples</code> values;  
+ * the output array has a total of <code>numSamples</code> values.  
+ * The underlying algorithm is used:  
+ *  
+ * <pre>  
+ * for(n=0; n<numSamples; n++) {  
+ *     pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);  
+ * }  
+ * </pre>  
+ *  
+ * There are separate functions for floating-point, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_mag  
+ * @{  
+ */ 
+/**  
+ * @brief Floating-point complex magnitude.  
+ * @param[in]       *pSrc points to complex input buffer  
+ * @param[out]      *pDst points to real output buffer  
+ * @param[in]       numSamples number of complex samples in the input vector  
+ * @return none.  
+ *  
+ */ 
+ 
+ 
+void arm_cmplx_mag_f32( 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  float32_t realIn, imagIn;                      /* Temporary variables to hold input values */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+ 
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ 
+    realIn = *pSrc++; 
+    imagIn = *pSrc++; 
+    /* store the result in the destination buffer. */ 
+    arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++); 
+ 
+    realIn = *pSrc++; 
+    imagIn = *pSrc++; 
+    arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++); 
+ 
+    realIn = *pSrc++; 
+    imagIn = *pSrc++; 
+    arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++); 
+ 
+    realIn = *pSrc++; 
+    imagIn = *pSrc++; 
+    arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++); 
+ 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ 
+    realIn = *pSrc++; 
+    imagIn = *pSrc++; 
+    /* store the result in the destination buffer. */ 
+    arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of cmplx_mag group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,123 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_mag_q15.c  
+*  
+* Description:	Q15 complex magnitude.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_mag  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief  Q15 complex magnitude  
+ * @param  *pSrc points to the complex input vector  
+ * @param  *pDst points to the real output vector  
+ * @param  numSamples number of complex samples in the input vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.  
+ */ 
+ 
+void arm_cmplx_mag_q15( 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  q15_t real, imag;                              /* Temporary variables to hold input values */ 
+  q31_t acc0, acc1;                              /* Accumulators */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+ 
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = __SMUAD(real, real); 
+    acc1 = __SMUAD(imag, imag); 
+    /* store the result in 2.14 format in the destination buffer. */ 
+    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = __SMUAD(real, real); 
+    acc1 = __SMUAD(imag, imag); 
+    /* store the result in 2.14 format in the destination buffer. */ 
+    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = __SMUAD(real, real); 
+    acc1 = __SMUAD(imag, imag); 
+    /* store the result in 2.14 format in the destination buffer. */ 
+    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = __SMUAD(real, real); 
+    acc1 = __SMUAD(imag, imag); 
+    /* store the result in 2.14 format in the destination buffer. */ 
+    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = __SMUAD(real, real); 
+    acc1 = __SMUAD(imag, imag); 
+    /* store the result in 2.14 format in the destination buffer. */ 
+    arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of cmplx_mag group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,123 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_mag_q31.c  
+*  
+* Description:	Q31 complex magnitude  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_mag  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Q31 complex magnitude  
+ * @param  *pSrc points to the complex input vector  
+ * @param  *pDst points to the real output vector  
+ * @param  numSamples number of complex samples in the input vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function implements 1.31 by 1.31 multiplications and finally output is converted into 2.30 format.  
+ * Input down scaling is not required.  
+ */ 
+ 
+void arm_cmplx_mag_q31( 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  q31_t real, imag;                              /* Temporary variables to hold input values */ 
+  q31_t acc0, acc1;                              /* Accumulators */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+ 
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = (q31_t) (((q63_t) real * real) >> 33); 
+    acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 
+    /* store the result in 2.30 format in the destination buffer. */ 
+    arm_sqrt_q31(acc0 + acc1, pDst++); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = (q31_t) (((q63_t) real * real) >> 33); 
+    acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 
+    /* store the result in 2.30 format in the destination buffer. */ 
+    arm_sqrt_q31(acc0 + acc1, pDst++); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = (q31_t) (((q63_t) real * real) >> 33); 
+    acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 
+    /* store the result in 2.30 format in the destination buffer. */ 
+    arm_sqrt_q31(acc0 + acc1, pDst++); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = (q31_t) (((q63_t) real * real) >> 33); 
+    acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 
+    /* store the result in 2.30 format in the destination buffer. */ 
+    arm_sqrt_q31(acc0 + acc1, pDst++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = (q31_t) (((q63_t) real * real) >> 33); 
+    acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 
+    /* store the result in 2.30 format in the destination buffer. */ 
+    arm_sqrt_q31(acc0 + acc1, pDst++); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of cmplx_mag group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,127 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_mag_squared_f32.c  
+*  
+* Description:	Floating-point complex magnitude squared.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @defgroup cmplx_mag_squared Complex Magnitude Squared  
+ *  
+ * Computes the magnitude squared of the elements of a complex data vector.  
+ * 
+ * The <code>pSrc</code> points to the source data and  
+ * <code>pDst</code> points to the where the result should be written.  
+ * <code>numSamples</code> specifies the number of complex samples  
+ * in the input array and the data is stored in an interleaved fashion  
+ * (real, imag, real, imag, ...).  
+ * The input array has a total of <code>2*numSamples</code> values;  
+ * the output array has a total of <code>numSamples</code> values.  
+ *  
+ * The underlying algorithm is used:  
+ *  
+ * <pre>  
+ * for(n=0; n<numSamples; n++) {  
+ *     pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;  
+ * }  
+ * </pre>  
+ *  
+ * There are separate functions for floating-point, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_mag_squared  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief  Floating-point complex magnitude squared  
+ * @param[in]  *pSrc points to the complex input vector  
+ * @param[out]  *pDst points to the real output vector  
+ * @param[in]  numSamples number of complex samples in the input vector  
+ * @return none.  
+ */ 
+ 
+void arm_cmplx_mag_squared_f32( 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  float32_t real, imag;                          /* Temporary variables to store real and imaginary values */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    /* store the result in the destination buffer. */ 
+    *pDst++ = (real * real) + (imag * imag); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    *pDst++ = (real * real) + (imag * imag); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    *pDst++ = (real * real) + (imag * imag); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    *pDst++ = (real * real) + (imag * imag); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    /* store the result in the destination buffer. */ 
+    *pDst++ = (real * real) + (imag * imag); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of cmplx_mag_squared group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,120 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_mag_squared_q15.c  
+*  
+* Description:	Q15 complex magnitude squared.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_mag_squared  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Q15 complex magnitude squared  
+ * @param  *pSrc points to the complex input vector  
+ * @param  *pDst points to the real output vector  
+ * @param  numSamples number of complex samples in the input vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.  
+ */ 
+ 
+void arm_cmplx_mag_squared_q15( 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  q15_t real, imag;                              /* Temporary variables to store real and imaginary values */ 
+  q31_t acc0, acc1;                              /* Accumulators */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = __SMUAD(real, real); 
+    acc1 = __SMUAD(imag, imag); 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = __SMUAD(real, real); 
+    acc1 = __SMUAD(imag, imag); 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = __SMUAD(real, real); 
+    acc1 = __SMUAD(imag, imag); 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17); 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = __SMUAD(real, real); 
+    acc1 = __SMUAD(imag, imag); 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = __SMUAD(real, real); 
+    acc1 = __SMUAD(imag, imag); 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of cmplx_mag_squared group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,122 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cmplx_mag_squared_q31.c  
+*  
+* Description:	Q31 complex magnitude squared.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup cmplx_mag_squared  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief  Q31 complex magnitude squared  
+ * @param  *pSrc points to the complex input vector  
+ * @param  *pDst points to the real output vector  
+ * @param  numSamples number of complex samples in the input vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.  
+ * Input down scaling is not required.  
+ */ 
+ 
+void arm_cmplx_mag_squared_q31( 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  q31_t real, imag;                              /* Temporary variables to store real and imaginary values */ 
+  q31_t acc0, acc1;                              /* Accumulators */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = (q31_t) (((q63_t) real * real) >> 33); 
+    acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = acc0 + acc1; 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = (q31_t) (((q63_t) real * real) >> 33); 
+    acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = acc0 + acc1; 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = (q31_t) (((q63_t) real * real) >> 33); 
+    acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = acc0 + acc1; 
+ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = (q31_t) (((q63_t) real * real) >> 33); 
+    acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = acc0 + acc1; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ 
+    real = *pSrc++; 
+    imag = *pSrc++; 
+    acc0 = (q31_t) (((q63_t) real * real) >> 33); 
+    acc1 = (q31_t) (((q63_t) imag * imag) >> 33); 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = acc0 + acc1; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of cmplx_mag_squared group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,149 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cmplx_mult_cmplx_f32.c  
+*  
+* Description:	Floating-point complex-by-complex multiplication  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication  
+ *  
+ * Multiplies a complex vector by another complex vector and generates a complex result.  
+ * The data in the complex arrays is stored in an interleaved fashion  
+ * (real, imag, real, imag, ...).  
+ * The parameter <code>numSamples</code> represents the number of complex  
+ * samples processed.  The complex arrays have a total of <code>2*numSamples</code>  
+ * real values.  
+ *  
+ * The underlying algorithm is used:  
+ *  
+ * <pre>  
+ * for(n=0; n<numSamples; n++) {  
+ *     pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];  
+ *     pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];  
+ * }  
+ * </pre>  
+ *  
+ * There are separate functions for floating-point, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup CmplxByCmplxMult  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief  Floating-point complex-by-complex multiplication  
+ * @param[in]  *pSrcA points to the first input vector  
+ * @param[in]  *pSrcB points to the second input vector  
+ * @param[out]  *pDst  points to the output vector  
+ * @param[in]  numSamples number of complex samples in each vector  
+ * @return none.  
+ */ 
+ 
+void arm_cmplx_mult_cmplx_f32( 
+  float32_t * pSrcA, 
+  float32_t * pSrcB, 
+  float32_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  float32_t a, b, c, d;                          /* Temporary variables to store real and imaginary values */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */ 
+    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the result in the destination buffer. */ 
+    *pDst++ = (a * c) - (b * d); 
+    *pDst++ = (a * d) + (b * c); 
+ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    *pDst++ = (a * c) - (b * d); 
+    *pDst++ = (a * d) + (b * c); 
+ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    *pDst++ = (a * c) - (b * d); 
+    *pDst++ = (a * d) + (b * c); 
+ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    *pDst++ = (a * c) - (b * d); 
+    *pDst++ = (a * d) + (b * c); 
+ 
+    /* Decrement the numSamples loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */ 
+    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the result in the destination buffer. */ 
+    *pDst++ = (a * c) - (b * d); 
+    *pDst++ = (a * d) + (b * c); 
+ 
+    /* Decrement the numSamples loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of CmplxByCmplxMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,148 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cmplx_mult_cmplx_q15.c  
+*  
+* Description:	Q15 complex-by-complex multiplication  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup CmplxByCmplxMult  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Q15 complex-by-complex multiplication  
+ * @param[in]  *pSrcA points to the first input vector  
+ * @param[in]  *pSrcB points to the second input vector  
+ * @param[out]  *pDst  points to the output vector  
+ * @param[in]  numSamples number of complex samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.  
+ */ 
+ 
+void arm_cmplx_mult_cmplx_q15( 
+  q15_t * pSrcA, 
+  q15_t * pSrcB, 
+  q15_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  q15_t a, b, c, d;                              /* Temporary variables to store real and imaginary values */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */ 
+    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = 
+      (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17); 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = 
+      (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17); 
+ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = 
+      (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17); 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = 
+      (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17); 
+ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = 
+      (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17); 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = 
+      (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17); 
+ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = 
+      (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17); 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = 
+      (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */ 
+    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = 
+      (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17); 
+    /* store the result in 3.13 format in the destination buffer. */ 
+    *pDst++ = 
+      (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of CmplxByCmplxMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,140 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cmplx_mult_cmplx_q31.c  
+*  
+* Description:	Q31 complex-by-complex multiplication  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup CmplxByCmplxMult  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief  Q31 complex-by-complex multiplication  
+ * @param[in]  *pSrcA points to the first input vector  
+ * @param[in]  *pSrcB points to the second input vector  
+ * @param[out]  *pDst  points to the output vector  
+ * @param[in]  numSamples number of complex samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.  
+ * Input down scaling is not required.  
+ */ 
+ 
+void arm_cmplx_mult_cmplx_q31( 
+  q31_t * pSrcA, 
+  q31_t * pSrcB, 
+  q31_t * pDst, 
+  uint32_t numSamples) 
+{ 
+  q31_t a, b, c, d;                              /* Temporary variables to store real and imaginary values */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */ 
+    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the real result in 3.29 format in the destination buffer. */ 
+    *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 
+    /* store the imag result in 3.29 format in the destination buffer. */ 
+    *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 
+ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 
+ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 
+ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */ 
+    /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */ 
+    a = *pSrcA++; 
+    b = *pSrcA++; 
+    c = *pSrcB++; 
+    d = *pSrcB++; 
+ 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = (q31_t) ((((q63_t) a * c) >> 33) - (((q63_t) b * d) >> 33)); 
+    /* store the result in 3.29 format in the destination buffer. */ 
+    *pDst++ = (q31_t) ((((q63_t) a * d) >> 33) + (((q63_t) b * c) >> 33)); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of CmplxByCmplxMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_real_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,130 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cmplx_mult_real_f32.c  
+*  
+* Description:	Floating-point complex by real multiplication  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @defgroup CmplxByRealMult Complex-by-Real Multiplication  
+ *  
+ * Multiplies a complex vector by a real vector and generates a complex result.  
+ * The data in the complex arrays is stored in an interleaved fashion  
+ * (real, imag, real, imag, ...).  
+ * The parameter <code>numSamples</code> represents the number of complex  
+ * samples processed.  The complex arrays have a total of <code>2*numSamples</code>  
+ * real values while the real array has a total of <code>numSamples</code>  
+ * real values.  
+ *  
+ * The underlying algorithm is used:  
+ *  
+ * <pre>  
+ * for(n=0; n<numSamples; n++) {  
+ *     pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];  
+ *     pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];  
+ * }  
+ * </pre>  
+ *  
+ * There are separate functions for floating-point, Q15, and Q31 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup CmplxByRealMult  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief  Floating-point complex-by-real multiplication  
+ * @param[in]  *pSrcCmplx points to the complex input vector  
+ * @param[in]  *pSrcReal points to the real input vector  
+ * @param[out]  *pCmplxDst points to the complex output vector  
+ * @param[in]  numSamples number of samples in each vector  
+ * @return none.  
+ */ 
+ 
+void arm_cmplx_mult_real_f32( 
+  float32_t * pSrcCmplx, 
+  float32_t * pSrcReal, 
+  float32_t * pCmplxDst, 
+  uint32_t numSamples) 
+{ 
+  float32_t in;                                  /* Temporary variable to store input value */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[i].            */ 
+    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */ 
+    in = *pSrcReal++; 
+    /* store the result in the destination buffer. */ 
+    *pCmplxDst++ = (*pSrcCmplx++) * (in); 
+    *pCmplxDst++ = (*pSrcCmplx++) * (in); 
+ 
+    in = *pSrcReal++; 
+    *pCmplxDst++ = (*pSrcCmplx++) * (in); 
+    *pCmplxDst++ = (*pSrcCmplx++) * (in); 
+ 
+    in = *pSrcReal++; 
+    *pCmplxDst++ = (*pSrcCmplx++) * (in); 
+    *pCmplxDst++ = (*pSrcCmplx++) * (in); 
+ 
+    in = *pSrcReal++; 
+    *pCmplxDst++ = (*pSrcCmplx++) * (in); 
+    *pCmplxDst++ = (*pSrcCmplx++) * (in); 
+ 
+    /* Decrement the numSamples loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[i].            */ 
+    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */ 
+    in = *pSrcReal++; 
+    /* store the result in the destination buffer. */ 
+    *pCmplxDst++ = (*pSrcCmplx++) * (in); 
+    *pCmplxDst++ = (*pSrcCmplx++) * (in); 
+ 
+    /* Decrement the numSamples loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of CmplxByRealMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_real_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,112 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cmplx_mult_real_q15.c  
+*  
+* Description:	Q15 complex by real multiplication  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup CmplxByRealMult  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief  Q15 complex-by-real multiplication  
+ * @param[in]  *pSrcCmplx points to the complex input vector  
+ * @param[in]  *pSrcReal points to the real input vector  
+ * @param[out]  *pCmplxDst points to the complex output vector  
+ * @param[in]  numSamples number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.  
+ */ 
+ 
+void arm_cmplx_mult_real_q15( 
+  q15_t * pSrcCmplx, 
+  q15_t * pSrcReal, 
+  q15_t * pCmplxDst, 
+  uint32_t numSamples) 
+{ 
+  q15_t in;                                      /* Temporary variable to store input value */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[i].            */ 
+    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */ 
+    in = *pSrcReal++; 
+    /* store the result in the destination buffer. */ 
+    *pCmplxDst++ = (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); 
+    *pCmplxDst++ = (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); 
+ 
+    in = *pSrcReal++; 
+    *pCmplxDst++ = (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); 
+    *pCmplxDst++ = (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); 
+ 
+    in = *pSrcReal++; 
+    *pCmplxDst++ = (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); 
+    *pCmplxDst++ = (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); 
+ 
+    in = *pSrcReal++; 
+    *pCmplxDst++ = (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); 
+    *pCmplxDst++ = (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); 
+ 
+    /* Decrement the numSamples loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[i].            */ 
+    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */ 
+    in = *pSrcReal++; 
+    /* store the result in the destination buffer. */ 
+    *pCmplxDst++ = (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); 
+    *pCmplxDst++ = (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); 
+ 
+    /* Decrement the numSamples loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of CmplxByRealMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ComplexMathFunctions/arm_cmplx_mult_real_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,112 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cmplx_mult_real_q31.c  
+*  
+* Description:	Q31 complex by real multiplication  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupCmplxMath  
+ */ 
+ 
+/**  
+ * @addtogroup CmplxByRealMult  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief  Q31 complex-by-real multiplication  
+ * @param[in]  *pSrcCmplx points to the complex input vector  
+ * @param[in]  *pSrcReal points to the real input vector  
+ * @param[out]  *pCmplxDst points to the complex output vector  
+ * @param[in]  numSamples number of samples in each vector  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.  
+ */ 
+ 
+void arm_cmplx_mult_real_q31( 
+  q31_t * pSrcCmplx, 
+  q31_t * pSrcReal, 
+  q31_t * pCmplxDst, 
+  uint32_t numSamples) 
+{ 
+  q31_t in;                                      /* Temporary variable to store input value */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = numSamples >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[i].            */ 
+    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */ 
+    in = *pSrcReal++; 
+    /* store the result in the destination buffer. */ 
+    *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * in) >> 31); 
+    *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * in) >> 31); 
+ 
+    in = *pSrcReal++; 
+    *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * in) >> 31); 
+    *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * in) >> 31); 
+ 
+    in = *pSrcReal++; 
+    *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * in) >> 31); 
+    *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * in) >> 31); 
+ 
+    in = *pSrcReal++; 
+    *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * in) >> 31); 
+    *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * in) >> 31); 
+ 
+    /* Decrement the numSamples loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = numSamples % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C[2 * i] = A[2 * i] * B[i].            */ 
+    /* C[2 * i + 1] = A[2 * i + 1] * B[i].        */ 
+    in = *pSrcReal++; 
+    /* store the result in the destination buffer. */ 
+    *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * in) >> 31); 
+    *pCmplxDst++ = (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * in) >> 31); 
+ 
+    /* Decrement the numSamples loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of CmplxByRealMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ControllerFunctions/arm_pid_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,73 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_pid_init_f32.c  
+*  
+* Description:	Floating-point PID Control initialization function  
+*				 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+ /**  
+ * @addtogroup PID  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Initialization function for the floating-point PID Control. 
+ * @param[in,out] *S points to an instance of the PID structure. 
+ * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state & 1 = reset the state. 
+ * @return none. 
+ * \par Description: 
+ * \par  
+ * The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n 
+ * The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>  
+ * using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)  
+ * also sets the state variables to all zeros.  
+ */  
+  
+void arm_pid_init_f32(  
+  arm_pid_instance_f32 * S,  
+  int32_t resetStateFlag) 
+{  
+  
+  /* Derived coefficient A0 */  
+  S->A0 = S->Kp + S->Ki + S->Kd;  
+  
+  /* Derived coefficient A1 */  
+  S->A1 = (-S->Kp) - ((float32_t) 2.0 * S->Kd);  
+  
+  /* Derived coefficient A2 */  
+  S->A2 = S->Kd;  
+  
+  /* Check whether state needs reset or not */  
+  if(resetStateFlag) 
+  {  
+    /* Clear the state buffer.  The size will be always 3 samples */  
+    memset(S->state, 0, 3u * sizeof(float32_t));  
+  }  
+  
+}  
+  
+/**  
+ * @} end of PID group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ControllerFunctions/arm_pid_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,68 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_pid_init_q15.c  
+*  
+* Description:	Q15 PID Control initialization function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+ /**  
+ * @addtogroup PID  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+ * @param[in,out] *S points to an instance of the Q15 PID structure.  
+ * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.  
+ * @return none.  
+ * \par Description: 
+ * \par  
+ * The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n 
+ * The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>  
+ * using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)  
+ * also sets the state variables to all zeros.  
+ */  
+  
+void arm_pid_init_q15(  
+  arm_pid_instance_q15 * S,  
+  int32_t resetStateFlag)  
+{  
+  /* Derived coefficient A0 */  
+  S->A0 = __QADD16(__QADD16(S->Kp, S->Ki), S->Kd);  
+  
+  /* Derived coefficients and pack into A1 */  
+  S->A1 = __PKHBT(-__QADD16(__QADD16(S->Kd, S->Kd), S->Kp), S->Kd, 16);  
+  
+  /* Check whether state needs reset or not */  
+  if(resetStateFlag) 
+  {  
+    /* Clear the state buffer.  The size will be always 3 samples */  
+    memset(S->state, 0, 3u * sizeof(q15_t));  
+  }  
+  
+}  
+  
+/**  
+ * @} end of PID group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ControllerFunctions/arm_pid_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,71 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_pid_init_q31.c  
+*  
+* Description:	Q31 PID Control initialization function   
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+ /**  
+ * @addtogroup PID  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Initialization function for the Q31 PID Control. 
+ * @param[in,out] *S points to an instance of the Q31 PID structure. 
+ * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state. 
+ * @return none.  
+ * \par Description: 
+ * \par  
+ * The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n 
+ * The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>  
+ * using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)  
+ * also sets the state variables to all zeros.  
+ */  
+  
+void arm_pid_init_q31(  
+  arm_pid_instance_q31 * S,  
+  int32_t resetStateFlag) 
+{  
+  /* Derived coefficient A0 */  
+  S->A0 = __QADD(__QADD(S->Kp, S->Ki), S->Kd);  
+  
+  /* Derived coefficient A1 */  
+  S->A1 = -__QADD(__QADD(S->Kd, S->Kd), S->Kp);  
+  
+  /* Derived coefficient A2 */  
+  S->A2 = S->Kd;  
+  
+  /* Check whether state needs reset or not */  
+  if(resetStateFlag) 
+  {  
+    /* Clear the state buffer.  The size will be always 3 samples */  
+    memset(S->state, 0, 3u * sizeof(q31_t));  
+  }  
+  
+}  
+  
+/**  
+ * @} end of PID group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ControllerFunctions/arm_pid_reset_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,51 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_pid_reset_f32.c  
+*  
+* Description:	Floating-point PID Control reset function 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+ /**  
+ * @addtogroup PID  
+ * @{  
+ */ 
+ 
+/**  
+* @brief  Reset function for the floating-point PID Control. 
+* @param[in] *S	Instance pointer of PID control data structure. 
+* @return none.  
+* \par Description: 
+* The function resets the state buffer to zeros.  
+*/  
+void arm_pid_reset_f32(  
+  arm_pid_instance_f32 * S)  
+{  
+  
+  /* Clear the state buffer.  The size will be always 3 samples */  
+  memset(S->state, 0, 3u * sizeof(float32_t));  
+}  
+  
+/**  
+ * @} end of PID group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ControllerFunctions/arm_pid_reset_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,50 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_pid_reset_q15.c  
+*  
+* Description:	Q15 PID Control reset function 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+ /**  
+ * @addtogroup PID  
+ * @{  
+ */ 
+ 
+/**  
+* @brief  Reset function for the Q15 PID Control. 
+* @param[in] *S		Instance pointer of PID control data structure. 
+* @return none.  
+* \par Description: 
+* The function resets the state buffer to zeros.  
+*/ 
+void arm_pid_reset_q15( 
+  arm_pid_instance_q15 * S) 
+{ 
+  /* Reset state to zero, The size will be always 3 samples */ 
+  memset(S->state, 0, 3u * sizeof(q15_t));  
+} 
+ 
+/**  
+ * @} end of PID group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ControllerFunctions/arm_pid_reset_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,51 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_pid_reset_q31.c  
+*  
+* Description:	Q31 PID Control reset function 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+ /**  
+ * @addtogroup PID  
+ * @{  
+ */ 
+ 
+/**  
+* @brief  Reset function for the Q31 PID Control. 
+* @param[in] *S	Instance pointer of PID control data structure. 
+* @return none.  
+* \par Description: 
+* The function resets the state buffer to zeros.  
+*/ 
+void arm_pid_reset_q31( 
+  arm_pid_instance_q31 * S) 
+{ 
+ 
+  /* Clear the state buffer.  The size will be always 3 samples */ 
+  memset(S->state, 0, 3u * sizeof(q31_t));  
+} 
+ 
+/**  
+ * @} end of PID group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ControllerFunctions/arm_sin_cos_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,405 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_sin_cos_f32.c  
+*  
+* Description:	Sine and Cosine calculation for floating-point values. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupController  
+ */ 
+ 
+/**  
+ * @defgroup SinCos Sine Cosine 
+ *  
+ * Computes the trigonometric sine and cosine values using a combination of table lookup 
+ * and linear interpolation.   
+ * There are separate functions for Q31 and floating-point data types. 
+ * The input to the floating-point version is in degrees while the 
+ * fixed-point Q31 have a scaled input with the range 
+ * [-1 1) mapping to [-180 180) degrees. 
+ * 
+ * The implementation is based on table lookup using 360 values together with linear interpolation. 
+ * The steps used are: 
+ *  -# Calculation of the nearest integer table index. 
+ *  -# Compute the fractional portion (fract) of the input. 
+ *  -# Fetch the value corresponding to \c index from sine table to \c y0 and also value from \c index+1 to \c y1.    
+ *  -# Sine value is computed as <code> *psinVal = y0 + (fract * (y1 - y0))</code>.  
+ *  -# Fetch the value corresponding to \c index from cosine table to \c y0 and also value from \c index+1 to \c y1.    
+ *  -# Cosine value is computed as <code> *pcosVal = y0 + (fract * (y1 - y0))</code>.  
+ */ 
+ 
+ /**  
+ * @addtogroup SinCos  
+ * @{  
+ */ 
+ 
+ 
+/**  
+* \par  
+* Cosine Table is generated from following loop  
+* <pre>for(i = 0; i < 360; i++)  
+* {  
+*    cosTable[i]= cos((i-180) * PI/180.0);  
+* } </pre> 
+*/ 
+ 
+static const float32_t cosTable[360] = { 
+  -0.999847695156391270f, -0.999390827019095760f, -0.998629534754573830f, 
+  -0.997564050259824200f, -0.996194698091745550f, -0.994521895368273290f, 
+  -0.992546151641321980f, -0.990268068741570250f, 
+  -0.987688340595137660f, -0.984807753012208020f, -0.981627183447663980f, 
+  -0.978147600733805690f, -0.974370064785235250f, -0.970295726275996470f, 
+  -0.965925826289068200f, -0.961261695938318670f, 
+  -0.956304755963035440f, -0.951056516295153530f, -0.945518575599316740f, 
+  -0.939692620785908320f, -0.933580426497201740f, -0.927183854566787310f, 
+  -0.920504853452440150f, -0.913545457642600760f, 
+  -0.906307787036649940f, -0.898794046299167040f, -0.891006524188367790f, 
+  -0.882947592858926770f, -0.874619707139395740f, -0.866025403784438710f, 
+  -0.857167300702112220f, -0.848048096156425960f, 
+  -0.838670567945424160f, -0.829037572555041620f, -0.819152044288991580f, 
+  -0.809016994374947340f, -0.798635510047292940f, -0.788010753606721900f, 
+  -0.777145961456970680f, -0.766044443118977900f, 
+  -0.754709580222772010f, -0.743144825477394130f, -0.731353701619170460f, 
+  -0.719339800338651300f, -0.707106781186547460f, -0.694658370458997030f, 
+  -0.681998360062498370f, -0.669130606358858240f, 
+  -0.656059028990507500f, -0.642787609686539360f, -0.629320391049837280f, 
+  -0.615661475325658290f, -0.601815023152048380f, -0.587785252292473030f, 
+  -0.573576436351045830f, -0.559192903470746680f, 
+  -0.544639035015027080f, -0.529919264233204790f, -0.515038074910054270f, 
+  -0.499999999999999780f, -0.484809620246337000f, -0.469471562785890530f, 
+  -0.453990499739546750f, -0.438371146789077510f, 
+  -0.422618261740699330f, -0.406736643075800100f, -0.390731128489273600f, 
+  -0.374606593415912070f, -0.358367949545300270f, -0.342020143325668710f, 
+  -0.325568154457156420f, -0.309016994374947340f, 
+  -0.292371704722736660f, -0.275637355816999050f, -0.258819045102520850f, 
+  -0.241921895599667790f, -0.224951054343864810f, -0.207911690817759120f, 
+  -0.190808995376544800f, -0.173648177666930300f, 
+  -0.156434465040231040f, -0.139173100960065350f, -0.121869343405147370f, 
+  -0.104528463267653330f, -0.087155742747658235f, -0.069756473744125330f, 
+  -0.052335956242943620f, -0.034899496702500733f, 
+  -0.017452406437283477f, 0.000000000000000061f, 0.017452406437283376f, 
+  0.034899496702501080f, 0.052335956242943966f, 0.069756473744125455f, 
+  0.087155742747658138f, 0.104528463267653460f, 
+  0.121869343405147490f, 0.139173100960065690f, 0.156434465040230920f, 
+  0.173648177666930410f, 0.190808995376544920f, 0.207911690817759450f, 
+  0.224951054343864920f, 0.241921895599667900f, 
+  0.258819045102520740f, 0.275637355816999160f, 0.292371704722736770f, 
+  0.309016994374947450f, 0.325568154457156760f, 0.342020143325668820f, 
+  0.358367949545300380f, 0.374606593415911960f, 
+  0.390731128489273940f, 0.406736643075800210f, 0.422618261740699440f, 
+  0.438371146789077460f, 0.453990499739546860f, 0.469471562785890860f, 
+  0.484809620246337110f, 0.500000000000000110f, 
+  0.515038074910054380f, 0.529919264233204900f, 0.544639035015027200f, 
+  0.559192903470746790f, 0.573576436351046050f, 0.587785252292473140f, 
+  0.601815023152048270f, 0.615661475325658290f, 
+  0.629320391049837500f, 0.642787609686539360f, 0.656059028990507280f, 
+  0.669130606358858240f, 0.681998360062498480f, 0.694658370458997370f, 
+  0.707106781186547570f, 0.719339800338651190f, 
+  0.731353701619170570f, 0.743144825477394240f, 0.754709580222772010f, 
+  0.766044443118978010f, 0.777145961456970900f, 0.788010753606722010f, 
+  0.798635510047292830f, 0.809016994374947450f, 
+  0.819152044288991800f, 0.829037572555041620f, 0.838670567945424050f, 
+  0.848048096156425960f, 0.857167300702112330f, 0.866025403784438710f, 
+  0.874619707139395740f, 0.882947592858926990f, 
+  0.891006524188367900f, 0.898794046299167040f, 0.906307787036649940f, 
+  0.913545457642600870f, 0.920504853452440370f, 0.927183854566787420f, 
+  0.933580426497201740f, 0.939692620785908430f, 
+  0.945518575599316850f, 0.951056516295153530f, 0.956304755963035440f, 
+  0.961261695938318890f, 0.965925826289068310f, 0.970295726275996470f, 
+  0.974370064785235250f, 0.978147600733805690f, 
+  0.981627183447663980f, 0.984807753012208020f, 0.987688340595137770f, 
+  0.990268068741570360f, 0.992546151641321980f, 0.994521895368273290f, 
+  0.996194698091745550f, 0.997564050259824200f, 
+  0.998629534754573830f, 0.999390827019095760f, 0.999847695156391270f, 
+  1.000000000000000000f, 0.999847695156391270f, 0.999390827019095760f, 
+  0.998629534754573830f, 0.997564050259824200f, 
+  0.996194698091745550f, 0.994521895368273290f, 0.992546151641321980f, 
+  0.990268068741570360f, 0.987688340595137770f, 0.984807753012208020f, 
+  0.981627183447663980f, 0.978147600733805690f, 
+  0.974370064785235250f, 0.970295726275996470f, 0.965925826289068310f, 
+  0.961261695938318890f, 0.956304755963035440f, 0.951056516295153530f, 
+  0.945518575599316850f, 0.939692620785908430f, 
+  0.933580426497201740f, 0.927183854566787420f, 0.920504853452440370f, 
+  0.913545457642600870f, 0.906307787036649940f, 0.898794046299167040f, 
+  0.891006524188367900f, 0.882947592858926990f, 
+  0.874619707139395740f, 0.866025403784438710f, 0.857167300702112330f, 
+  0.848048096156425960f, 0.838670567945424050f, 0.829037572555041620f, 
+  0.819152044288991800f, 0.809016994374947450f, 
+  0.798635510047292830f, 0.788010753606722010f, 0.777145961456970900f, 
+  0.766044443118978010f, 0.754709580222772010f, 0.743144825477394240f, 
+  0.731353701619170570f, 0.719339800338651190f, 
+  0.707106781186547570f, 0.694658370458997370f, 0.681998360062498480f, 
+  0.669130606358858240f, 0.656059028990507280f, 0.642787609686539360f, 
+  0.629320391049837500f, 0.615661475325658290f, 
+  0.601815023152048270f, 0.587785252292473140f, 0.573576436351046050f, 
+  0.559192903470746790f, 0.544639035015027200f, 0.529919264233204900f, 
+  0.515038074910054380f, 0.500000000000000110f, 
+  0.484809620246337110f, 0.469471562785890860f, 0.453990499739546860f, 
+  0.438371146789077460f, 0.422618261740699440f, 0.406736643075800210f, 
+  0.390731128489273940f, 0.374606593415911960f, 
+  0.358367949545300380f, 0.342020143325668820f, 0.325568154457156760f, 
+  0.309016994374947450f, 0.292371704722736770f, 0.275637355816999160f, 
+  0.258819045102520740f, 0.241921895599667900f, 
+  0.224951054343864920f, 0.207911690817759450f, 0.190808995376544920f, 
+  0.173648177666930410f, 0.156434465040230920f, 0.139173100960065690f, 
+  0.121869343405147490f, 0.104528463267653460f, 
+  0.087155742747658138f, 0.069756473744125455f, 0.052335956242943966f, 
+  0.034899496702501080f, 0.017452406437283376f, 0.000000000000000061f, 
+  -0.017452406437283477f, -0.034899496702500733f, 
+  -0.052335956242943620f, -0.069756473744125330f, -0.087155742747658235f, 
+  -0.104528463267653330f, -0.121869343405147370f, -0.139173100960065350f, 
+  -0.156434465040231040f, -0.173648177666930300f, 
+  -0.190808995376544800f, -0.207911690817759120f, -0.224951054343864810f, 
+  -0.241921895599667790f, -0.258819045102520850f, -0.275637355816999050f, 
+  -0.292371704722736660f, -0.309016994374947340f, 
+  -0.325568154457156420f, -0.342020143325668710f, -0.358367949545300270f, 
+  -0.374606593415912070f, -0.390731128489273600f, -0.406736643075800100f, 
+  -0.422618261740699330f, -0.438371146789077510f, 
+  -0.453990499739546750f, -0.469471562785890530f, -0.484809620246337000f, 
+  -0.499999999999999780f, -0.515038074910054270f, -0.529919264233204790f, 
+  -0.544639035015027080f, -0.559192903470746680f, 
+  -0.573576436351045830f, -0.587785252292473030f, -0.601815023152048380f, 
+  -0.615661475325658290f, -0.629320391049837280f, -0.642787609686539360f, 
+  -0.656059028990507500f, -0.669130606358858240f, 
+  -0.681998360062498370f, -0.694658370458997030f, -0.707106781186547460f, 
+  -0.719339800338651300f, -0.731353701619170460f, -0.743144825477394130f, 
+  -0.754709580222772010f, -0.766044443118977900f, 
+  -0.777145961456970680f, -0.788010753606721900f, -0.798635510047292940f, 
+  -0.809016994374947340f, -0.819152044288991580f, -0.829037572555041620f, 
+  -0.838670567945424160f, -0.848048096156425960f, 
+  -0.857167300702112220f, -0.866025403784438710f, -0.874619707139395740f, 
+  -0.882947592858926770f, -0.891006524188367790f, -0.898794046299167040f, 
+  -0.906307787036649940f, -0.913545457642600760f, 
+  -0.920504853452440150f, -0.927183854566787310f, -0.933580426497201740f, 
+  -0.939692620785908320f, -0.945518575599316740f, -0.951056516295153530f, 
+  -0.956304755963035440f, -0.961261695938318670f, 
+  -0.965925826289068200f, -0.970295726275996470f, -0.974370064785235250f, 
+  -0.978147600733805690f, -0.981627183447663980f, -0.984807753012208020f, 
+  -0.987688340595137660f, -0.990268068741570250f, 
+  -0.992546151641321980f, -0.994521895368273290f, -0.996194698091745550f, 
+  -0.997564050259824200f, -0.998629534754573830f, -0.999390827019095760f, 
+  -0.999847695156391270f, -1.000000000000000000f 
+}; 
+ 
+/**  
+* \par  
+* Sine Table is generated from following loop  
+* <pre>for(i = 0; i < 360; i++)  
+* {  
+*    sinTable[i]= sin((i-180) * PI/180.0);  
+* } </pre>  
+*/ 
+ 
+ 
+static const float32_t sinTable[360] = { 
+  -0.017452406437283439f, -0.034899496702500699f, -0.052335956242943807f, 
+  -0.069756473744125524f, -0.087155742747658638f, -0.104528463267653730f, 
+  -0.121869343405147550f, -0.139173100960065740f, 
+  -0.156434465040230980f, -0.173648177666930280f, -0.190808995376544970f, 
+  -0.207911690817759310f, -0.224951054343864780f, -0.241921895599667730f, 
+  -0.258819045102521020f, -0.275637355816999660f, 
+  -0.292371704722737050f, -0.309016994374947510f, -0.325568154457156980f, 
+  -0.342020143325668880f, -0.358367949545300210f, -0.374606593415912240f, 
+  -0.390731128489274160f, -0.406736643075800430f, 
+  -0.422618261740699500f, -0.438371146789077290f, -0.453990499739546860f, 
+  -0.469471562785891080f, -0.484809620246337170f, -0.499999999999999940f, 
+  -0.515038074910054380f, -0.529919264233204900f, 
+  -0.544639035015026860f, -0.559192903470746900f, -0.573576436351046380f, 
+  -0.587785252292473250f, -0.601815023152048160f, -0.615661475325658400f, 
+  -0.629320391049837720f, -0.642787609686539470f, 
+  -0.656059028990507280f, -0.669130606358858350f, -0.681998360062498590f, 
+  -0.694658370458997140f, -0.707106781186547570f, -0.719339800338651410f, 
+  -0.731353701619170570f, -0.743144825477394240f, 
+  -0.754709580222771790f, -0.766044443118978010f, -0.777145961456971010f, 
+  -0.788010753606722010f, -0.798635510047292720f, -0.809016994374947450f, 
+  -0.819152044288992020f, -0.829037572555041740f, 
+  -0.838670567945424050f, -0.848048096156426070f, -0.857167300702112330f, 
+  -0.866025403784438710f, -0.874619707139395850f, -0.882947592858927100f, 
+  -0.891006524188367900f, -0.898794046299166930f, 
+  -0.906307787036650050f, -0.913545457642600980f, -0.920504853452440370f, 
+  -0.927183854566787420f, -0.933580426497201740f, -0.939692620785908430f, 
+  -0.945518575599316850f, -0.951056516295153640f, 
+  -0.956304755963035550f, -0.961261695938318890f, -0.965925826289068310f, 
+  -0.970295726275996470f, -0.974370064785235250f, -0.978147600733805690f, 
+  -0.981627183447663980f, -0.984807753012208020f, 
+  -0.987688340595137660f, -0.990268068741570360f, -0.992546151641322090f, 
+  -0.994521895368273400f, -0.996194698091745550f, -0.997564050259824200f, 
+  -0.998629534754573830f, -0.999390827019095760f, 
+  -0.999847695156391270f, -1.000000000000000000f, -0.999847695156391270f, 
+  -0.999390827019095760f, -0.998629534754573830f, -0.997564050259824200f, 
+  -0.996194698091745550f, -0.994521895368273290f, 
+  -0.992546151641321980f, -0.990268068741570250f, -0.987688340595137770f, 
+  -0.984807753012208020f, -0.981627183447663980f, -0.978147600733805580f, 
+  -0.974370064785235250f, -0.970295726275996470f, 
+  -0.965925826289068310f, -0.961261695938318890f, -0.956304755963035440f, 
+  -0.951056516295153530f, -0.945518575599316740f, -0.939692620785908320f, 
+  -0.933580426497201740f, -0.927183854566787420f, 
+  -0.920504853452440260f, -0.913545457642600870f, -0.906307787036649940f, 
+  -0.898794046299167040f, -0.891006524188367790f, -0.882947592858926880f, 
+  -0.874619707139395740f, -0.866025403784438600f, 
+  -0.857167300702112220f, -0.848048096156426070f, -0.838670567945423940f, 
+  -0.829037572555041740f, -0.819152044288991800f, -0.809016994374947450f, 
+  -0.798635510047292830f, -0.788010753606722010f, 
+  -0.777145961456970790f, -0.766044443118978010f, -0.754709580222772010f, 
+  -0.743144825477394240f, -0.731353701619170460f, -0.719339800338651080f, 
+  -0.707106781186547460f, -0.694658370458997250f, 
+  -0.681998360062498480f, -0.669130606358858240f, -0.656059028990507160f, 
+  -0.642787609686539250f, -0.629320391049837390f, -0.615661475325658180f, 
+  -0.601815023152048270f, -0.587785252292473140f, 
+  -0.573576436351046050f, -0.559192903470746900f, -0.544639035015027080f, 
+  -0.529919264233204900f, -0.515038074910054160f, -0.499999999999999940f, 
+  -0.484809620246337060f, -0.469471562785890810f, 
+  -0.453990499739546750f, -0.438371146789077400f, -0.422618261740699440f, 
+  -0.406736643075800150f, -0.390731128489273720f, -0.374606593415912010f, 
+  -0.358367949545300270f, -0.342020143325668710f, 
+  -0.325568154457156640f, -0.309016994374947400f, -0.292371704722736770f, 
+  -0.275637355816999160f, -0.258819045102520740f, -0.241921895599667730f, 
+  -0.224951054343865000f, -0.207911690817759310f, 
+  -0.190808995376544800f, -0.173648177666930330f, -0.156434465040230870f, 
+  -0.139173100960065440f, -0.121869343405147480f, -0.104528463267653460f, 
+  -0.087155742747658166f, -0.069756473744125302f, 
+  -0.052335956242943828f, -0.034899496702500969f, -0.017452406437283512f, 
+  0.000000000000000000f, 0.017452406437283512f, 0.034899496702500969f, 
+  0.052335956242943828f, 0.069756473744125302f, 
+  0.087155742747658166f, 0.104528463267653460f, 0.121869343405147480f, 
+  0.139173100960065440f, 0.156434465040230870f, 0.173648177666930330f, 
+  0.190808995376544800f, 0.207911690817759310f, 
+  0.224951054343865000f, 0.241921895599667730f, 0.258819045102520740f, 
+  0.275637355816999160f, 0.292371704722736770f, 0.309016994374947400f, 
+  0.325568154457156640f, 0.342020143325668710f, 
+  0.358367949545300270f, 0.374606593415912010f, 0.390731128489273720f, 
+  0.406736643075800150f, 0.422618261740699440f, 0.438371146789077400f, 
+  0.453990499739546750f, 0.469471562785890810f, 
+  0.484809620246337060f, 0.499999999999999940f, 0.515038074910054160f, 
+  0.529919264233204900f, 0.544639035015027080f, 0.559192903470746900f, 
+  0.573576436351046050f, 0.587785252292473140f, 
+  0.601815023152048270f, 0.615661475325658180f, 0.629320391049837390f, 
+  0.642787609686539250f, 0.656059028990507160f, 0.669130606358858240f, 
+  0.681998360062498480f, 0.694658370458997250f, 
+  0.707106781186547460f, 0.719339800338651080f, 0.731353701619170460f, 
+  0.743144825477394240f, 0.754709580222772010f, 0.766044443118978010f, 
+  0.777145961456970790f, 0.788010753606722010f, 
+  0.798635510047292830f, 0.809016994374947450f, 0.819152044288991800f, 
+  0.829037572555041740f, 0.838670567945423940f, 0.848048096156426070f, 
+  0.857167300702112220f, 0.866025403784438600f, 
+  0.874619707139395740f, 0.882947592858926880f, 0.891006524188367790f, 
+  0.898794046299167040f, 0.906307787036649940f, 0.913545457642600870f, 
+  0.920504853452440260f, 0.927183854566787420f, 
+  0.933580426497201740f, 0.939692620785908320f, 0.945518575599316740f, 
+  0.951056516295153530f, 0.956304755963035440f, 0.961261695938318890f, 
+  0.965925826289068310f, 0.970295726275996470f, 
+  0.974370064785235250f, 0.978147600733805580f, 0.981627183447663980f, 
+  0.984807753012208020f, 0.987688340595137770f, 0.990268068741570250f, 
+  0.992546151641321980f, 0.994521895368273290f, 
+  0.996194698091745550f, 0.997564050259824200f, 0.998629534754573830f, 
+  0.999390827019095760f, 0.999847695156391270f, 1.000000000000000000f, 
+  0.999847695156391270f, 0.999390827019095760f, 
+  0.998629534754573830f, 0.997564050259824200f, 0.996194698091745550f, 
+  0.994521895368273400f, 0.992546151641322090f, 0.990268068741570360f, 
+  0.987688340595137660f, 0.984807753012208020f, 
+  0.981627183447663980f, 0.978147600733805690f, 0.974370064785235250f, 
+  0.970295726275996470f, 0.965925826289068310f, 0.961261695938318890f, 
+  0.956304755963035550f, 0.951056516295153640f, 
+  0.945518575599316850f, 0.939692620785908430f, 0.933580426497201740f, 
+  0.927183854566787420f, 0.920504853452440370f, 0.913545457642600980f, 
+  0.906307787036650050f, 0.898794046299166930f, 
+  0.891006524188367900f, 0.882947592858927100f, 0.874619707139395850f, 
+  0.866025403784438710f, 0.857167300702112330f, 0.848048096156426070f, 
+  0.838670567945424050f, 0.829037572555041740f, 
+  0.819152044288992020f, 0.809016994374947450f, 0.798635510047292720f, 
+  0.788010753606722010f, 0.777145961456971010f, 0.766044443118978010f, 
+  0.754709580222771790f, 0.743144825477394240f, 
+  0.731353701619170570f, 0.719339800338651410f, 0.707106781186547570f, 
+  0.694658370458997140f, 0.681998360062498590f, 0.669130606358858350f, 
+  0.656059028990507280f, 0.642787609686539470f, 
+  0.629320391049837720f, 0.615661475325658400f, 0.601815023152048160f, 
+  0.587785252292473250f, 0.573576436351046380f, 0.559192903470746900f, 
+  0.544639035015026860f, 0.529919264233204900f, 
+  0.515038074910054380f, 0.499999999999999940f, 0.484809620246337170f, 
+  0.469471562785891080f, 0.453990499739546860f, 0.438371146789077290f, 
+  0.422618261740699500f, 0.406736643075800430f, 
+  0.390731128489274160f, 0.374606593415912240f, 0.358367949545300210f, 
+  0.342020143325668880f, 0.325568154457156980f, 0.309016994374947510f, 
+  0.292371704722737050f, 0.275637355816999660f, 
+  0.258819045102521020f, 0.241921895599667730f, 0.224951054343864780f, 
+  0.207911690817759310f, 0.190808995376544970f, 0.173648177666930280f, 
+  0.156434465040230980f, 0.139173100960065740f, 
+  0.121869343405147550f, 0.104528463267653730f, 0.087155742747658638f, 
+  0.069756473744125524f, 0.052335956242943807f, 0.034899496702500699f, 
+  0.017452406437283439f, 0.000000000000000122f 
+}; 
+ 
+ 
+/**  
+ * @brief  Floating-point sin_cos function. 
+ * @param[in]  theta    input value in degrees  
+ * @param[out] *pSinVal points to the processed sine output.  
+ * @param[out] *pCosVal points to the processed cos output.  
+ * @return none. 
+ */ 
+ 
+ 
+void arm_sin_cos_f32( 
+  float32_t theta, 
+  float32_t * pSinVal, 
+  float32_t * pCosVal) 
+{ 
+  uint32_t i;                                    /* Index for reading nearwst output values */ 
+  float32_t x1 = -179.0f;                        /* Initial input value */ 
+  float32_t y0, y1;                              /* nearest output values */ 
+  float32_t fract;                               /* fractional part of input */ 
+ 
+  /* Calculation of fractional part */ 
+  if(theta > 0.0f) 
+  { 
+    fract = theta - (float32_t) ((int32_t) theta); 
+  } 
+  else 
+  { 
+    fract = (theta - (float32_t) ((int32_t) theta)) + 1.0f; 
+  } 
+ 
+  /* index calculation for reading nearest output values */ 
+  i = (uint32_t) (theta - x1); 
+ 
+  /* reading nearest sine output values */ 
+  y0 = sinTable[i]; 
+  y1 = sinTable[i + 1u]; 
+ 
+  /* Calculation of sine value */ 
+  *pSinVal = y0 + (fract * (y1 - y0)); 
+ 
+  /* reading nearest cosine output values */ 
+  y0 = cosTable[i]; 
+  y1 = cosTable[i + 1u]; 
+ 
+  /* Calculation of cosine value */ 
+  *pCosVal = y0 + (fract * (y1 - y0)); 
+ 
+} 
+ 
+/**  
+ * @} end of SinCos group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/ControllerFunctions/arm_sin_cos_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,307 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_sin_cos_q31.c  
+*  
+* Description:	Cosine & Sine calculation for Q31 values. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupController  
+ */ 
+ 
+ /**  
+ * @addtogroup SinCos  
+ * @{  
+ */ 
+ 
+/**  
+* \par  
+* Sine Table is generated from following loop  
+* <pre>for(i = 0; i < 360; i++)  
+* {  
+*    sinTable[i]= sin((i-180) * PI/180.0);  
+* } </pre> 
+* Convert above coefficients to fixed point 1.31 format.  
+*/ 
+ 
+static const int32_t sinTableQ31[360] = { 
+ 
+  0x0, 0xfdc41e9b, 0xfb8869ce, 0xf94d0e2e, 0xf7123849, 0xf4d814a4, 0xf29ecfb2, 
+  0xf06695da, 
+  0xee2f9369, 0xebf9f498, 0xe9c5e582, 0xe7939223, 0xe5632654, 0xe334cdc9, 
+  0xe108b40d, 0xdedf047d, 
+  0xdcb7ea46, 0xda939061, 0xd8722192, 0xd653c860, 0xd438af17, 0xd220ffc0, 
+  0xd00ce422, 0xcdfc85bb, 
+  0xcbf00dbe, 0xc9e7a512, 0xc7e3744b, 0xc5e3a3a9, 0xc3e85b18, 0xc1f1c224, 
+  0xc0000000, 0xbe133b7c, 
+  0xbc2b9b05, 0xba4944a2, 0xb86c5df0, 0xb6950c1e, 0xb4c373ee, 0xb2f7b9af, 
+  0xb1320139, 0xaf726def, 
+  0xadb922b7, 0xac0641fb, 0xaa59eda4, 0xa8b4471a, 0xa7156f3c, 0xa57d8666, 
+  0xa3ecac65, 0xa263007d, 
+  0xa0e0a15f, 0x9f65ad2d, 0x9df24175, 0x9c867b2c, 0x9b2276b0, 0x99c64fc5, 
+  0x98722192, 0x9726069c, 
+  0x95e218c9, 0x94a6715d, 0x937328f5, 0x92485786, 0x9126145f, 0x900c7621, 
+  0x8efb92c2, 0x8df37f8b, 
+  0x8cf45113, 0x8bfe1b3f, 0x8b10f144, 0x8a2ce59f, 0x89520a1a, 0x88806fc4, 
+  0x87b826f7, 0x86f93f50, 
+  0x8643c7b3, 0x8597ce46, 0x84f56073, 0x845c8ae3, 0x83cd5982, 0x8347d77b, 
+  0x82cc0f36, 0x825a0a5b, 
+  0x81f1d1ce, 0x81936daf, 0x813ee55b, 0x80f43f69, 0x80b381ac, 0x807cb130, 
+  0x804fd23a, 0x802ce84c, 
+  0x8013f61d, 0x8004fda0, 0x80000000, 0x8004fda0, 0x8013f61d, 0x802ce84c, 
+  0x804fd23a, 0x807cb130, 
+  0x80b381ac, 0x80f43f69, 0x813ee55b, 0x81936daf, 0x81f1d1ce, 0x825a0a5b, 
+  0x82cc0f36, 0x8347d77b, 
+  0x83cd5982, 0x845c8ae3, 0x84f56073, 0x8597ce46, 0x8643c7b3, 0x86f93f50, 
+  0x87b826f7, 0x88806fc4, 
+  0x89520a1a, 0x8a2ce59f, 0x8b10f144, 0x8bfe1b3f, 0x8cf45113, 0x8df37f8b, 
+  0x8efb92c2, 0x900c7621, 
+  0x9126145f, 0x92485786, 0x937328f5, 0x94a6715d, 0x95e218c9, 0x9726069c, 
+  0x98722192, 0x99c64fc5, 
+  0x9b2276b0, 0x9c867b2c, 0x9df24175, 0x9f65ad2d, 0xa0e0a15f, 0xa263007d, 
+  0xa3ecac65, 0xa57d8666, 
+  0xa7156f3c, 0xa8b4471a, 0xaa59eda4, 0xac0641fb, 0xadb922b7, 0xaf726def, 
+  0xb1320139, 0xb2f7b9af, 
+  0xb4c373ee, 0xb6950c1e, 0xb86c5df0, 0xba4944a2, 0xbc2b9b05, 0xbe133b7c, 
+  0xc0000000, 0xc1f1c224, 
+  0xc3e85b18, 0xc5e3a3a9, 0xc7e3744b, 0xc9e7a512, 0xcbf00dbe, 0xcdfc85bb, 
+  0xd00ce422, 0xd220ffc0, 
+  0xd438af17, 0xd653c860, 0xd8722192, 0xda939061, 0xdcb7ea46, 0xdedf047d, 
+  0xe108b40d, 0xe334cdc9, 
+  0xe5632654, 0xe7939223, 0xe9c5e582, 0xebf9f498, 0xee2f9369, 0xf06695da, 
+  0xf29ecfb2, 0xf4d814a4, 
+  0xf7123849, 0xf94d0e2e, 0xfb8869ce, 0xfdc41e9b, 0x0, 0x23be165, 0x4779632, 
+  0x6b2f1d2, 
+  0x8edc7b7, 0xb27eb5c, 0xd61304e, 0xf996a26, 0x11d06c97, 0x14060b68, 
+  0x163a1a7e, 0x186c6ddd, 
+  0x1a9cd9ac, 0x1ccb3237, 0x1ef74bf3, 0x2120fb83, 0x234815ba, 0x256c6f9f, 
+  0x278dde6e, 0x29ac37a0, 
+  0x2bc750e9, 0x2ddf0040, 0x2ff31bde, 0x32037a45, 0x340ff242, 0x36185aee, 
+  0x381c8bb5, 0x3a1c5c57, 
+  0x3c17a4e8, 0x3e0e3ddc, 0x40000000, 0x41ecc484, 0x43d464fb, 0x45b6bb5e, 
+  0x4793a210, 0x496af3e2, 
+  0x4b3c8c12, 0x4d084651, 0x4ecdfec7, 0x508d9211, 0x5246dd49, 0x53f9be05, 
+  0x55a6125c, 0x574bb8e6, 
+  0x58ea90c4, 0x5a82799a, 0x5c13539b, 0x5d9cff83, 0x5f1f5ea1, 0x609a52d3, 
+  0x620dbe8b, 0x637984d4, 
+  0x64dd8950, 0x6639b03b, 0x678dde6e, 0x68d9f964, 0x6a1de737, 0x6b598ea3, 
+  0x6c8cd70b, 0x6db7a87a, 
+  0x6ed9eba1, 0x6ff389df, 0x71046d3e, 0x720c8075, 0x730baeed, 0x7401e4c1, 
+  0x74ef0ebc, 0x75d31a61, 
+  0x76adf5e6, 0x777f903c, 0x7847d909, 0x7906c0b0, 0x79bc384d, 0x7a6831ba, 
+  0x7b0a9f8d, 0x7ba3751d, 
+  0x7c32a67e, 0x7cb82885, 0x7d33f0ca, 0x7da5f5a5, 0x7e0e2e32, 0x7e6c9251, 
+  0x7ec11aa5, 0x7f0bc097, 
+  0x7f4c7e54, 0x7f834ed0, 0x7fb02dc6, 0x7fd317b4, 0x7fec09e3, 0x7ffb0260, 
+  0x7fffffff, 0x7ffb0260, 
+  0x7fec09e3, 0x7fd317b4, 0x7fb02dc6, 0x7f834ed0, 0x7f4c7e54, 0x7f0bc097, 
+  0x7ec11aa5, 0x7e6c9251, 
+  0x7e0e2e32, 0x7da5f5a5, 0x7d33f0ca, 0x7cb82885, 0x7c32a67e, 0x7ba3751d, 
+  0x7b0a9f8d, 0x7a6831ba, 
+  0x79bc384d, 0x7906c0b0, 0x7847d909, 0x777f903c, 0x76adf5e6, 0x75d31a61, 
+  0x74ef0ebc, 0x7401e4c1, 
+  0x730baeed, 0x720c8075, 0x71046d3e, 0x6ff389df, 0x6ed9eba1, 0x6db7a87a, 
+  0x6c8cd70b, 0x6b598ea3, 
+  0x6a1de737, 0x68d9f964, 0x678dde6e, 0x6639b03b, 0x64dd8950, 0x637984d4, 
+  0x620dbe8b, 0x609a52d3, 
+  0x5f1f5ea1, 0x5d9cff83, 0x5c13539b, 0x5a82799a, 0x58ea90c4, 0x574bb8e6, 
+  0x55a6125c, 0x53f9be05, 
+  0x5246dd49, 0x508d9211, 0x4ecdfec7, 0x4d084651, 0x4b3c8c12, 0x496af3e2, 
+  0x4793a210, 0x45b6bb5e, 
+  0x43d464fb, 0x41ecc484, 0x40000000, 0x3e0e3ddc, 0x3c17a4e8, 0x3a1c5c57, 
+  0x381c8bb5, 0x36185aee, 
+  0x340ff242, 0x32037a45, 0x2ff31bde, 0x2ddf0040, 0x2bc750e9, 0x29ac37a0, 
+  0x278dde6e, 0x256c6f9f, 
+  0x234815ba, 0x2120fb83, 0x1ef74bf3, 0x1ccb3237, 0x1a9cd9ac, 0x186c6ddd, 
+  0x163a1a7e, 0x14060b68, 
+  0x11d06c97, 0xf996a26, 0xd61304e, 0xb27eb5c, 0x8edc7b7, 0x6b2f1d2, 
+  0x4779632, 0x23be165, 
+ 
+ 
+}; 
+ 
+/**  
+* \par  
+* Cosine Table is generated from following loop  
+* <pre>for(i = 0; i < 360; i++)  
+* {  
+*    cosTable[i]= cos((i-180) * PI/180.0);  
+* } </pre> 
+* \par  
+* Convert above coefficients to fixed point 1.31 format.  
+*/ 
+static const int32_t cosTableQ31[360] = { 
+  0x80000000, 0x8004fda0, 0x8013f61d, 0x802ce84c, 0x804fd23a, 0x807cb130, 
+  0x80b381ac, 0x80f43f69, 
+  0x813ee55b, 0x81936daf, 0x81f1d1ce, 0x825a0a5b, 0x82cc0f36, 0x8347d77b, 
+  0x83cd5982, 0x845c8ae3, 
+  0x84f56073, 0x8597ce46, 0x8643c7b3, 0x86f93f50, 0x87b826f7, 0x88806fc4, 
+  0x89520a1a, 0x8a2ce59f, 
+  0x8b10f144, 0x8bfe1b3f, 0x8cf45113, 0x8df37f8b, 0x8efb92c2, 0x900c7621, 
+  0x9126145f, 0x92485786, 
+  0x937328f5, 0x94a6715d, 0x95e218c9, 0x9726069c, 0x98722192, 0x99c64fc5, 
+  0x9b2276b0, 0x9c867b2c, 
+  0x9df24175, 0x9f65ad2d, 0xa0e0a15f, 0xa263007d, 0xa3ecac65, 0xa57d8666, 
+  0xa7156f3c, 0xa8b4471a, 
+  0xaa59eda4, 0xac0641fb, 0xadb922b7, 0xaf726def, 0xb1320139, 0xb2f7b9af, 
+  0xb4c373ee, 0xb6950c1e, 
+  0xb86c5df0, 0xba4944a2, 0xbc2b9b05, 0xbe133b7c, 0xc0000000, 0xc1f1c224, 
+  0xc3e85b18, 0xc5e3a3a9, 
+  0xc7e3744b, 0xc9e7a512, 0xcbf00dbe, 0xcdfc85bb, 0xd00ce422, 0xd220ffc0, 
+  0xd438af17, 0xd653c860, 
+  0xd8722192, 0xda939061, 0xdcb7ea46, 0xdedf047d, 0xe108b40d, 0xe334cdc9, 
+  0xe5632654, 0xe7939223, 
+  0xe9c5e582, 0xebf9f498, 0xee2f9369, 0xf06695da, 0xf29ecfb2, 0xf4d814a4, 
+  0xf7123849, 0xf94d0e2e, 
+  0xfb8869ce, 0xfdc41e9b, 0x0, 0x23be165, 0x4779632, 0x6b2f1d2, 0x8edc7b7, 
+  0xb27eb5c, 
+  0xd61304e, 0xf996a26, 0x11d06c97, 0x14060b68, 0x163a1a7e, 0x186c6ddd, 
+  0x1a9cd9ac, 0x1ccb3237, 
+  0x1ef74bf3, 0x2120fb83, 0x234815ba, 0x256c6f9f, 0x278dde6e, 0x29ac37a0, 
+  0x2bc750e9, 0x2ddf0040, 
+  0x2ff31bde, 0x32037a45, 0x340ff242, 0x36185aee, 0x381c8bb5, 0x3a1c5c57, 
+  0x3c17a4e8, 0x3e0e3ddc, 
+  0x40000000, 0x41ecc484, 0x43d464fb, 0x45b6bb5e, 0x4793a210, 0x496af3e2, 
+  0x4b3c8c12, 0x4d084651, 
+  0x4ecdfec7, 0x508d9211, 0x5246dd49, 0x53f9be05, 0x55a6125c, 0x574bb8e6, 
+  0x58ea90c4, 0x5a82799a, 
+  0x5c13539b, 0x5d9cff83, 0x5f1f5ea1, 0x609a52d3, 0x620dbe8b, 0x637984d4, 
+  0x64dd8950, 0x6639b03b, 
+  0x678dde6e, 0x68d9f964, 0x6a1de737, 0x6b598ea3, 0x6c8cd70b, 0x6db7a87a, 
+  0x6ed9eba1, 0x6ff389df, 
+  0x71046d3e, 0x720c8075, 0x730baeed, 0x7401e4c1, 0x74ef0ebc, 0x75d31a61, 
+  0x76adf5e6, 0x777f903c, 
+  0x7847d909, 0x7906c0b0, 0x79bc384d, 0x7a6831ba, 0x7b0a9f8d, 0x7ba3751d, 
+  0x7c32a67e, 0x7cb82885, 
+  0x7d33f0ca, 0x7da5f5a5, 0x7e0e2e32, 0x7e6c9251, 0x7ec11aa5, 0x7f0bc097, 
+  0x7f4c7e54, 0x7f834ed0, 
+  0x7fb02dc6, 0x7fd317b4, 0x7fec09e3, 0x7ffb0260, 0x7fffffff, 0x7ffb0260, 
+  0x7fec09e3, 0x7fd317b4, 
+  0x7fb02dc6, 0x7f834ed0, 0x7f4c7e54, 0x7f0bc097, 0x7ec11aa5, 0x7e6c9251, 
+  0x7e0e2e32, 0x7da5f5a5, 
+  0x7d33f0ca, 0x7cb82885, 0x7c32a67e, 0x7ba3751d, 0x7b0a9f8d, 0x7a6831ba, 
+  0x79bc384d, 0x7906c0b0, 
+  0x7847d909, 0x777f903c, 0x76adf5e6, 0x75d31a61, 0x74ef0ebc, 0x7401e4c1, 
+  0x730baeed, 0x720c8075, 
+  0x71046d3e, 0x6ff389df, 0x6ed9eba1, 0x6db7a87a, 0x6c8cd70b, 0x6b598ea3, 
+  0x6a1de737, 0x68d9f964, 
+  0x678dde6e, 0x6639b03b, 0x64dd8950, 0x637984d4, 0x620dbe8b, 0x609a52d3, 
+  0x5f1f5ea1, 0x5d9cff83, 
+  0x5c13539b, 0x5a82799a, 0x58ea90c4, 0x574bb8e6, 0x55a6125c, 0x53f9be05, 
+  0x5246dd49, 0x508d9211, 
+  0x4ecdfec7, 0x4d084651, 0x4b3c8c12, 0x496af3e2, 0x4793a210, 0x45b6bb5e, 
+  0x43d464fb, 0x41ecc484, 
+  0x40000000, 0x3e0e3ddc, 0x3c17a4e8, 0x3a1c5c57, 0x381c8bb5, 0x36185aee, 
+  0x340ff242, 0x32037a45, 
+  0x2ff31bde, 0x2ddf0040, 0x2bc750e9, 0x29ac37a0, 0x278dde6e, 0x256c6f9f, 
+  0x234815ba, 0x2120fb83, 
+  0x1ef74bf3, 0x1ccb3237, 0x1a9cd9ac, 0x186c6ddd, 0x163a1a7e, 0x14060b68, 
+  0x11d06c97, 0xf996a26, 
+  0xd61304e, 0xb27eb5c, 0x8edc7b7, 0x6b2f1d2, 0x4779632, 0x23be165, 0x0, 
+  0xfdc41e9b, 
+  0xfb8869ce, 0xf94d0e2e, 0xf7123849, 0xf4d814a4, 0xf29ecfb2, 0xf06695da, 
+  0xee2f9369, 0xebf9f498, 
+  0xe9c5e582, 0xe7939223, 0xe5632654, 0xe334cdc9, 0xe108b40d, 0xdedf047d, 
+  0xdcb7ea46, 0xda939061, 
+  0xd8722192, 0xd653c860, 0xd438af17, 0xd220ffc0, 0xd00ce422, 0xcdfc85bb, 
+  0xcbf00dbe, 0xc9e7a512, 
+  0xc7e3744b, 0xc5e3a3a9, 0xc3e85b18, 0xc1f1c224, 0xc0000000, 0xbe133b7c, 
+  0xbc2b9b05, 0xba4944a2, 
+  0xb86c5df0, 0xb6950c1e, 0xb4c373ee, 0xb2f7b9af, 0xb1320139, 0xaf726def, 
+  0xadb922b7, 0xac0641fb, 
+  0xaa59eda4, 0xa8b4471a, 0xa7156f3c, 0xa57d8666, 0xa3ecac65, 0xa263007d, 
+  0xa0e0a15f, 0x9f65ad2d, 
+  0x9df24175, 0x9c867b2c, 0x9b2276b0, 0x99c64fc5, 0x98722192, 0x9726069c, 
+  0x95e218c9, 0x94a6715d, 
+  0x937328f5, 0x92485786, 0x9126145f, 0x900c7621, 0x8efb92c2, 0x8df37f8b, 
+  0x8cf45113, 0x8bfe1b3f, 
+  0x8b10f144, 0x8a2ce59f, 0x89520a1a, 0x88806fc4, 0x87b826f7, 0x86f93f50, 
+  0x8643c7b3, 0x8597ce46, 
+  0x84f56073, 0x845c8ae3, 0x83cd5982, 0x8347d77b, 0x82cc0f36, 0x825a0a5b, 
+  0x81f1d1ce, 0x81936daf, 
+  0x813ee55b, 0x80f43f69, 0x80b381ac, 0x807cb130, 0x804fd23a, 0x802ce84c, 
+  0x8013f61d, 0x8004fda0, 
+ 
+}; 
+ 
+ 
+/**  
+ * @brief  Q31 sin_cos function. 
+ * @param[in]  theta    scaled input value in degrees  
+ * @param[out] *pSinVal points to the processed sine output.  
+ * @param[out] *pCosVal points to the processed cosine output.  
+ * @return none. 
+ *  
+ * The Q31 input value is in the range [-1 +1) and is mapped to a degree value in the range [-180 180). 
+ *  
+ */ 
+ 
+ 
+void arm_sin_cos_q31( 
+  q31_t theta, 
+  q31_t * pSinVal, 
+  q31_t * pCosVal) 
+{ 
+  q31_t x0;                                      /* Nearest input value */ 
+  q31_t y0, y1;                                  /* Nearest output values */ 
+  q31_t xSpacing = INPUT_SPACING;                /* Spaing between inputs */ 
+  uint32_t i;                                    /* Index */ 
+  q31_t oneByXSpacing;                           /* 1/ xSpacing value */ 
+  q31_t out;                                     /* temporary variable */ 
+  uint32_t sign_bits;                            /* No.of sign bits */ 
+  uint32_t firstX = 0x80000000;                  /* First X value */ 
+ 
+  /* Calculation of index */ 
+  i = ((uint32_t) theta - firstX) / (uint32_t) xSpacing; 
+ 
+  /* Calculation of first nearest input value */ 
+  x0 = (q31_t) firstX + ((q31_t) i * xSpacing); 
+ 
+  /* Reading nearest sine output values from table */ 
+  y0 = sinTableQ31[i]; 
+  y1 = sinTableQ31[i + 1u]; 
+ 
+  /* Calculation of 1/(x1-x0) */ 
+  /* (x1-x0) is xSpacing which is fixed value */ 
+  sign_bits = 8u; 
+  oneByXSpacing = 0x5A000000; 
+ 
+  /* Calculation of (theta - x0)/(x1-x0) */ 
+  out = (((q31_t) (((q63_t) (theta - x0) * oneByXSpacing) >> 32)) << sign_bits); 
+ 
+  /* Calculation of y0 + (y1 - y0) * ((theta - x0)/(x1-x0)) */ 
+  *pSinVal = y0 + ((q31_t) (((q63_t) (y1 - y0) * out) >> 30)); 
+ 
+  /* Reading nearest cosine output values from table */ 
+  y0 = cosTableQ31[i]; 
+  y1 = cosTableQ31[i + 1u]; 
+ 
+  /* Calculation of y0 + (y1 - y0) * ((theta - x0)/(x1-x0)) */ 
+  *pCosVal = y0 + ((q31_t) (((q63_t) (y1 - y0) * out) >> 30)); 
+ 
+} 
+ 
+/**  
+ * @} end of SinCos group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FastMathFunctions/arm_cos_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,251 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cos_f32.c  
+*  
+* Description:	Fast cosine calculation for floating-point values. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+/**  
+ * @ingroup groupFastMath  
+ */ 
+ 
+/**  
+ * @defgroup cos Cosine  
+ *  
+ * Computes the trigonometric cosine function using a combination of table lookup 
+ * and cubic interpolation.  There are separate functions for 
+ * Q15, Q31, and floating-point data types. 
+ * The input to the floating-point version is in radians while the 
+ * fixed-point Q15 and Q31 have a scaled input with the range 
+ * [0 1) mapping to [0 2*pi). 
+ * 
+ * The implementation is based on table lookup using 256 values together with cubic interpolation. 
+ * The steps used are: 
+ *  -# Calculation of the nearest integer table index 
+ *  -# Fetch the four table values a, b, c, and d   
+ *  -# Compute the fractional portion (fract) of the table index. 
+ *  -# Calculation of wa, wb, wc, wd  
+ *  -# The final result equals <code>a*wa + b*wb + c*wc + d*wd</code> 
+ * 
+ * where 
+ * <pre>  
+ *    a=Table[index-1];  
+ *    b=Table[index+0];  
+ *    c=Table[index+1];  
+ *    d=Table[index+2];  
+ * </pre> 
+ * and 
+ * <pre>  
+ *    wa=-(1/6)*fract.^3 + (1/2)*fract.^2 - (1/3)*fract;  
+ *    wb=(1/2)*fract.^3 - fract.^2 - (1/2)*fract + 1;  
+ *    wc=-(1/2)*fract.^3+(1/2)*fract.^2+fract;  
+ *    wd=(1/6)*fract.^3 - (1/6)*fract;  
+ * </pre>  
+ */ 
+ 
+ /**  
+ * @addtogroup cos  
+ * @{  
+ */ 
+ 
+ 
+/**  
+* \par  
+* <b>Example code for Generation of Cos Table:</b> 
+* tableSize = 256;  
+* <pre>for(n = -1; n < (tableSize + 1); n++)  
+* {  
+*	cosTable[n+1]= cos(2*pi*n/tableSize);  
+* } </pre>  
+* where pi value is  3.14159265358979  
+*/ 
+ 
+static const float32_t cosTable[259] = { 
+  0.999698817729949950f, 1.000000000000000000f, 0.999698817729949950f, 
+  0.998795449733734130f, 0.997290432453155520f, 0.995184719562530520f, 
+  0.992479562759399410f, 0.989176511764526370f, 
+  0.985277652740478520f, 0.980785250663757320f, 0.975702106952667240f, 
+  0.970031261444091800f, 0.963776051998138430f, 0.956940352916717530f, 
+  0.949528157711029050f, 0.941544055938720700f, 
+  0.932992815971374510f, 0.923879504203796390f, 0.914209783077239990f, 
+  0.903989315032958980f, 0.893224298954010010f, 0.881921291351318360f, 
+  0.870086967945098880f, 0.857728600502014160f, 
+  0.844853579998016360f, 0.831469595432281490f, 0.817584812641143800f, 
+  0.803207516670227050f, 0.788346409797668460f, 0.773010432720184330f, 
+  0.757208824157714840f, 0.740951120853424070f, 
+  0.724247097969055180f, 0.707106769084930420f, 0.689540565013885500f, 
+  0.671558976173400880f, 0.653172850608825680f, 0.634393274784088130f, 
+  0.615231573581695560f, 0.595699310302734380f, 
+  0.575808167457580570f, 0.555570244789123540f, 0.534997642040252690f, 
+  0.514102756977081300f, 0.492898195981979370f, 0.471396744251251220f, 
+  0.449611335992813110f, 0.427555084228515630f, 
+  0.405241310596466060f, 0.382683426141738890f, 0.359895050525665280f, 
+  0.336889863014221190f, 0.313681751489639280f, 0.290284663438797000f, 
+  0.266712754964828490f, 0.242980182170867920f, 
+  0.219101235270500180f, 0.195090323686599730f, 0.170961886644363400f, 
+  0.146730467677116390f, 0.122410677373409270f, 0.098017141222953796f, 
+  0.073564566671848297f, 0.049067676067352295f, 
+  0.024541229009628296f, 0.000000000000000061f, -0.024541229009628296f, 
+  -0.049067676067352295f, -0.073564566671848297f, -0.098017141222953796f, 
+  -0.122410677373409270f, -0.146730467677116390f, 
+  -0.170961886644363400f, -0.195090323686599730f, -0.219101235270500180f, 
+  -0.242980182170867920f, -0.266712754964828490f, -0.290284663438797000f, 
+  -0.313681751489639280f, -0.336889863014221190f, 
+  -0.359895050525665280f, -0.382683426141738890f, -0.405241310596466060f, 
+  -0.427555084228515630f, -0.449611335992813110f, -0.471396744251251220f, 
+  -0.492898195981979370f, -0.514102756977081300f, 
+  -0.534997642040252690f, -0.555570244789123540f, -0.575808167457580570f, 
+  -0.595699310302734380f, -0.615231573581695560f, -0.634393274784088130f, 
+  -0.653172850608825680f, -0.671558976173400880f, 
+  -0.689540565013885500f, -0.707106769084930420f, -0.724247097969055180f, 
+  -0.740951120853424070f, -0.757208824157714840f, -0.773010432720184330f, 
+  -0.788346409797668460f, -0.803207516670227050f, 
+  -0.817584812641143800f, -0.831469595432281490f, -0.844853579998016360f, 
+  -0.857728600502014160f, -0.870086967945098880f, -0.881921291351318360f, 
+  -0.893224298954010010f, -0.903989315032958980f, 
+  -0.914209783077239990f, -0.923879504203796390f, -0.932992815971374510f, 
+  -0.941544055938720700f, -0.949528157711029050f, -0.956940352916717530f, 
+  -0.963776051998138430f, -0.970031261444091800f, 
+  -0.975702106952667240f, -0.980785250663757320f, -0.985277652740478520f, 
+  -0.989176511764526370f, -0.992479562759399410f, -0.995184719562530520f, 
+  -0.997290432453155520f, -0.998795449733734130f, 
+  -0.999698817729949950f, -1.000000000000000000f, -0.999698817729949950f, 
+  -0.998795449733734130f, -0.997290432453155520f, -0.995184719562530520f, 
+  -0.992479562759399410f, -0.989176511764526370f, 
+  -0.985277652740478520f, -0.980785250663757320f, -0.975702106952667240f, 
+  -0.970031261444091800f, -0.963776051998138430f, -0.956940352916717530f, 
+  -0.949528157711029050f, -0.941544055938720700f, 
+  -0.932992815971374510f, -0.923879504203796390f, -0.914209783077239990f, 
+  -0.903989315032958980f, -0.893224298954010010f, -0.881921291351318360f, 
+  -0.870086967945098880f, -0.857728600502014160f, 
+  -0.844853579998016360f, -0.831469595432281490f, -0.817584812641143800f, 
+  -0.803207516670227050f, -0.788346409797668460f, -0.773010432720184330f, 
+  -0.757208824157714840f, -0.740951120853424070f, 
+  -0.724247097969055180f, -0.707106769084930420f, -0.689540565013885500f, 
+  -0.671558976173400880f, -0.653172850608825680f, -0.634393274784088130f, 
+  -0.615231573581695560f, -0.595699310302734380f, 
+  -0.575808167457580570f, -0.555570244789123540f, -0.534997642040252690f, 
+  -0.514102756977081300f, -0.492898195981979370f, -0.471396744251251220f, 
+  -0.449611335992813110f, -0.427555084228515630f, 
+  -0.405241310596466060f, -0.382683426141738890f, -0.359895050525665280f, 
+  -0.336889863014221190f, -0.313681751489639280f, -0.290284663438797000f, 
+  -0.266712754964828490f, -0.242980182170867920f, 
+  -0.219101235270500180f, -0.195090323686599730f, -0.170961886644363400f, 
+  -0.146730467677116390f, -0.122410677373409270f, -0.098017141222953796f, 
+  -0.073564566671848297f, -0.049067676067352295f, 
+  -0.024541229009628296f, -0.000000000000000184f, 0.024541229009628296f, 
+  0.049067676067352295f, 0.073564566671848297f, 0.098017141222953796f, 
+  0.122410677373409270f, 0.146730467677116390f, 
+  0.170961886644363400f, 0.195090323686599730f, 0.219101235270500180f, 
+  0.242980182170867920f, 0.266712754964828490f, 0.290284663438797000f, 
+  0.313681751489639280f, 0.336889863014221190f, 
+  0.359895050525665280f, 0.382683426141738890f, 0.405241310596466060f, 
+  0.427555084228515630f, 0.449611335992813110f, 0.471396744251251220f, 
+  0.492898195981979370f, 0.514102756977081300f, 
+  0.534997642040252690f, 0.555570244789123540f, 0.575808167457580570f, 
+  0.595699310302734380f, 0.615231573581695560f, 0.634393274784088130f, 
+  0.653172850608825680f, 0.671558976173400880f, 
+  0.689540565013885500f, 0.707106769084930420f, 0.724247097969055180f, 
+  0.740951120853424070f, 0.757208824157714840f, 0.773010432720184330f, 
+  0.788346409797668460f, 0.803207516670227050f, 
+  0.817584812641143800f, 0.831469595432281490f, 0.844853579998016360f, 
+  0.857728600502014160f, 0.870086967945098880f, 0.881921291351318360f, 
+  0.893224298954010010f, 0.903989315032958980f, 
+  0.914209783077239990f, 0.923879504203796390f, 0.932992815971374510f, 
+  0.941544055938720700f, 0.949528157711029050f, 0.956940352916717530f, 
+  0.963776051998138430f, 0.970031261444091800f, 
+  0.975702106952667240f, 0.980785250663757320f, 0.985277652740478520f, 
+  0.989176511764526370f, 0.992479562759399410f, 0.995184719562530520f, 
+  0.997290432453155520f, 0.998795449733734130f, 
+  0.999698817729949950f, 1.000000000000000000f, 0.999698817729949950f 
+}; 
+ 
+/** 
+ * @brief  Fast approximation to the trigonometric cosine function for floating-point data. 
+ * @param[in] x input value in radians. 
+ * @return cos(x). 
+ */ 
+ 
+float32_t arm_cos_f32( 
+  float32_t x) 
+{ 
+  float32_t cosVal, fract, in; 
+  uint32_t index; 
+  uint32_t tableSize = (uint32_t) TABLE_SIZE; 
+  float32_t wa, wb, wc, wd; 
+  float32_t a, b, c, d; 
+  float32_t *tablePtr; 
+  int32_t n; 
+ 
+  /* input x is in radians */ 
+  /* Scale the input to [0 1] range from [0 2*PI] , divide input by 2*pi */ 
+  in = x * 0.159154943092f; 
+ 
+  /* Calculation of floor value of input */ 
+  n = (int32_t) in; 
+ 
+  /* Make negative values towards -infinity */ 
+  if(x < 0.0f) 
+  { 
+    n = n - 1; 
+  } 
+ 
+  /* Map input value to [0 1] */ 
+  in = in - (float32_t) n; 
+ 
+  /* Calculation of index of the table */ 
+  index = (uint32_t) (tableSize * in); 
+ 
+  /* fractional value calculation */ 
+  fract = ((float32_t) tableSize * in) - (float32_t) index; 
+ 
+  /* Initialise table pointer */ 
+  tablePtr = (float32_t *) & cosTable[index]; 
+ 
+  /* Read four nearest values of input value from the cos table */ 
+  a = *tablePtr++; 
+  b = *tablePtr++; 
+  c = *tablePtr++; 
+  d = *tablePtr++; 
+ 
+  /* Cubic interpolation process */ 
+  wa = -(((0.166666667f) * fract) * (fract * fract)) + 
+        (((0.5f) * (fract * fract)) - ((0.3333333333333f) * fract)); 
+  wb = ((((0.5f) * fract) * (fract * fract)) - (fract * fract)) + 
+       (-((0.5f) * fract) + 1.0f); 
+  wc = -(((0.5f) * fract) * (fract * fract)) +  
+	    (((0.5f) * (fract * fract)) + fract); 
+  wd = (((0.166666667f) * fract) * (fract * fract)) -  
+	   ((0.166666667f) * fract); 
+ 
+  /* Calculate cos value */ 
+  cosVal = ((a * wa) + (b * wb)) + ((c * wc) + (d * wd)); 
+ 
+  /* Return the output value */ 
+  return (cosVal); 
+ 
+} 
+ 
+/**  
+ * @} end of cos group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FastMathFunctions/arm_cos_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,186 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cos_q15.c  
+*  
+* Description:	Fast cosine calculation for Q15 values. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFastMath  
+ */ 
+ 
+ /**  
+ * @addtogroup cos  
+ * @{  
+ */ 
+ 
+/** 
+* \par  
+* Table Values are in Q15(1.15 Fixed point format) and generation is done in three steps  
+* \par  
+* First Generate cos values in floating point:  
+* tableSize = 256;   
+* <pre>for(n = -1; n < (tableSize + 1); n++)  
+* {  
+*	cosTable[n+1]= cos(2*pi*n/tableSize);  
+* }</pre> 
+* where pi value is  3.14159265358979  
+* \par  
+* Secondly Convert Floating point to Q15(Fixed point):  
+*	round(cosTable[i] * pow(2, 15))  
+* \par  
+* Finally Rounding to nearest integer is done  
+* 	cosTable[i] += (cosTable[i] > 0 ? 0.5 :-0.5);  
+*/ 
+ 
+static const q15_t cosTableQ15[259] = { 
+  0x7ff6, 0x7fff, 0x7ff6, 0x7fd9, 0x7fa7, 0x7f62, 0x7f0a, 0x7e9d, 
+  0x7e1e, 0x7d8a, 0x7ce4, 0x7c2a, 0x7b5d, 0x7a7d, 0x798a, 0x7885, 
+  0x776c, 0x7642, 0x7505, 0x73b6, 0x7255, 0x70e3, 0x6f5f, 0x6dca, 
+  0x6c24, 0x6a6e, 0x68a7, 0x66d0, 0x64e9, 0x62f2, 0x60ec, 0x5ed7, 
+  0x5cb4, 0x5a82, 0x5843, 0x55f6, 0x539b, 0x5134, 0x4ec0, 0x4c40, 
+  0x49b4, 0x471d, 0x447b, 0x41ce, 0x3f17, 0x3c57, 0x398d, 0x36ba, 
+  0x33df, 0x30fc, 0x2e11, 0x2b1f, 0x2827, 0x2528, 0x2224, 0x1f1a, 
+  0x1c0c, 0x18f9, 0x15e2, 0x12c8, 0xfab, 0xc8c, 0x96b, 0x648, 
+  0x324, 0x0, 0xfcdc, 0xf9b8, 0xf695, 0xf374, 0xf055, 0xed38, 
+  0xea1e, 0xe707, 0xe3f4, 0xe0e6, 0xdddc, 0xdad8, 0xd7d9, 0xd4e1, 
+  0xd1ef, 0xcf04, 0xcc21, 0xc946, 0xc673, 0xc3a9, 0xc0e9, 0xbe32, 
+  0xbb85, 0xb8e3, 0xb64c, 0xb3c0, 0xb140, 0xaecc, 0xac65, 0xaa0a, 
+  0xa7bd, 0xa57e, 0xa34c, 0xa129, 0x9f14, 0x9d0e, 0x9b17, 0x9930, 
+  0x9759, 0x9592, 0x93dc, 0x9236, 0x90a1, 0x8f1d, 0x8dab, 0x8c4a, 
+  0x8afb, 0x89be, 0x8894, 0x877b, 0x8676, 0x8583, 0x84a3, 0x83d6, 
+  0x831c, 0x8276, 0x81e2, 0x8163, 0x80f6, 0x809e, 0x8059, 0x8027, 
+  0x800a, 0x8000, 0x800a, 0x8027, 0x8059, 0x809e, 0x80f6, 0x8163, 
+  0x81e2, 0x8276, 0x831c, 0x83d6, 0x84a3, 0x8583, 0x8676, 0x877b, 
+  0x8894, 0x89be, 0x8afb, 0x8c4a, 0x8dab, 0x8f1d, 0x90a1, 0x9236, 
+  0x93dc, 0x9592, 0x9759, 0x9930, 0x9b17, 0x9d0e, 0x9f14, 0xa129, 
+  0xa34c, 0xa57e, 0xa7bd, 0xaa0a, 0xac65, 0xaecc, 0xb140, 0xb3c0, 
+  0xb64c, 0xb8e3, 0xbb85, 0xbe32, 0xc0e9, 0xc3a9, 0xc673, 0xc946, 
+  0xcc21, 0xcf04, 0xd1ef, 0xd4e1, 0xd7d9, 0xdad8, 0xdddc, 0xe0e6, 
+  0xe3f4, 0xe707, 0xea1e, 0xed38, 0xf055, 0xf374, 0xf695, 0xf9b8, 
+  0xfcdc, 0x0, 0x324, 0x648, 0x96b, 0xc8c, 0xfab, 0x12c8, 
+  0x15e2, 0x18f9, 0x1c0c, 0x1f1a, 0x2224, 0x2528, 0x2827, 0x2b1f, 
+  0x2e11, 0x30fc, 0x33df, 0x36ba, 0x398d, 0x3c57, 0x3f17, 0x41ce, 
+  0x447b, 0x471d, 0x49b4, 0x4c40, 0x4ec0, 0x5134, 0x539b, 0x55f6, 
+  0x5843, 0x5a82, 0x5cb4, 0x5ed7, 0x60ec, 0x62f2, 0x64e9, 0x66d0, 
+  0x68a7, 0x6a6e, 0x6c24, 0x6dca, 0x6f5f, 0x70e3, 0x7255, 0x73b6, 
+  0x7505, 0x7642, 0x776c, 0x7885, 0x798a, 0x7a7d, 0x7b5d, 0x7c2a, 
+  0x7ce4, 0x7d8a, 0x7e1e, 0x7e9d, 0x7f0a, 0x7f62, 0x7fa7, 0x7fd9, 
+  0x7ff6, 0x7fff, 0x7ff6 
+}; 
+ 
+ 
+/** 
+ * @brief Fast approximation to the trigonometric cosine function for Q15 data. 
+ * @param[in] x Scaled input value in radians. 
+ * @return  cos(x). 
+ * 
+ * The Q15 input value is in the range [0 +1) and is mapped to a radian value in the range [0 2*pi). 
+ */ 
+ 
+q15_t arm_cos_q15( 
+  q15_t x) 
+{ 
+  q31_t cosVal;                                  /* Temporary variables output */ 
+  q15_t *tablePtr;                               /* Pointer to table */ 
+  q15_t fract, in, in2;                          /* Temporary variables for input, output */ 
+  q31_t wa, wb, wc, wd;                          /* Cubic interpolation coefficients */ 
+  q15_t a, b, c, d;                              /* Four nearest output values */ 
+  q15_t fractCube, fractSquare;                  /* Temporary values for fractional value */ 
+  q15_t oneBy6 = 0x1555;                         /* Fixed point value of 1/6 */ 
+  q15_t tableSpacing = TABLE_SPACING_Q15;        /* Table spacing */ 
+  int32_t index;                                 /* Index variable */ 
+ 
+  in = x; 
+ 
+  /* Calculate the nearest index */ 
+  index = (int32_t) in / tableSpacing; 
+ 
+  /* Calculate the nearest value of input */ 
+  in2 = (q15_t) index *tableSpacing; 
+ 
+  /* Calculation of fractional value */ 
+  fract = (in - in2) << 8; 
+ 
+  /* fractSquare = fract * fract */ 
+  fractSquare = (q15_t) ((fract * fract) >> 15); 
+ 
+  /* fractCube = fract * fract * fract */ 
+  fractCube = (q15_t) ((fractSquare * fract) >> 15); 
+ 
+  /* Initialise table pointer */ 
+  tablePtr = (q15_t *) & cosTableQ15[index]; 
+ 
+  /* Cubic interpolation process */ 
+  /* Calculation of wa */ 
+  /* wa = -(oneBy6)*fractCube + (fractSquare >> 1u) - (0x2AAA)*fract; */ 
+  wa = (q31_t) oneBy6 *fractCube; 
+  wa += (q31_t) 0x2AAA * fract; 
+  wa = -(wa >> 15); 
+  wa += (fractSquare >> 1u); 
+ 
+  /* Read first nearest value of output from the cos table */ 
+  a = *tablePtr++; 
+ 
+  /* cosVal = a * wa */ 
+  cosVal = a * wa; 
+ 
+  /* Calculation of wb */ 
+  wb = (((fractCube >> 1u) - fractSquare) - (fract >> 1u)) + 0x7FFF; 
+ 
+  /* Read second nearest value of output from the cos table */ 
+  b = *tablePtr++; 
+ 
+  /*      cosVal += b*wb */ 
+  cosVal += b * wb; 
+ 
+  /* Calculation of wc */ 
+  wc = -(q31_t) fractCube + fractSquare; 
+  wc = (wc >> 1u) + fract; 
+ 
+  /* Read third nearest value of output from the cos table */ 
+  c = *tablePtr++; 
+ 
+  /*      cosVal += c*wc */ 
+  cosVal += c * wc; 
+ 
+  /* Calculation of wd */ 
+  /* wd = (oneBy6)*fractCube - (oneBy6)*fract; */ 
+  fractCube = fractCube - fract; 
+  wd = ((q15_t) (((q31_t) oneBy6 * fractCube) >> 15)); 
+ 
+  /* Read fourth nearest value of output from the cos table */ 
+  d = *tablePtr++; 
+ 
+  /* cosVal += d*wd; */ 
+  cosVal += d * wd; 
+ 
+  /* Return the output value in 1.15(q15) format */ 
+  return ((q15_t) (cosVal >> 15u)); 
+ 
+} 
+ 
+/**  
+ * @} end of cos group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FastMathFunctions/arm_cos_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,222 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_cos_q31.c  
+*  
+* Description:	Fast cosine calculation for Q31 values. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFastMath  
+ */ 
+ 
+ /**  
+ * @addtogroup cos  
+ * @{  
+ */ 
+ 
+/**  
+ * \par  
+ * Table Values are in Q31(1.31 Fixed point format) and generation is done in three steps  
+ * First Generate cos values in floating point:  
+ * tableSize = 256;    
+ * <pre>for(n = -1; n < (tableSize + 1); n++)  
+ * {  
+ *	cosTable[n+1]= cos(2*pi*n/tableSize);  
+ * } </pre>   
+ * where pi value is  3.14159265358979  
+ * \par  
+ * Secondly Convert Floating point to Q31(Fixed point):  
+ *	(cosTable[i] * pow(2, 31))  
+ * \par  
+ * Finally Rounding to nearest integer is done  
+ * 	cosTable[i] += (cosTable[i] > 0 ? 0.5 :-0.5);  
+ */ 
+ 
+ 
+static const q31_t cosTableQ31[259] = { 
+  0x7ff62182, 0x7fffffff, 0x7ff62182, 0x7fd8878e, 0x7fa736b4, 0x7f62368f, 
+    0x7f0991c4, 0x7e9d55fc, 
+  0x7e1d93ea, 0x7d8a5f40, 0x7ce3ceb2, 0x7c29fbee, 0x7b5d039e, 0x7a7d055b, 
+    0x798a23b1, 0x78848414, 
+  0x776c4edb, 0x7641af3d, 0x7504d345, 0x73b5ebd1, 0x72552c85, 0x70e2cbc6, 
+    0x6f5f02b2, 0x6dca0d14, 
+  0x6c242960, 0x6a6d98a4, 0x68a69e81, 0x66cf8120, 0x64e88926, 0x62f201ac, 
+    0x60ec3830, 0x5ed77c8a, 
+  0x5cb420e0, 0x5a82799a, 0x5842dd54, 0x55f5a4d2, 0x539b2af0, 0x5133cc94, 
+    0x4ebfe8a5, 0x4c3fdff4, 
+  0x49b41533, 0x471cece7, 0x447acd50, 0x41ce1e65, 0x3f1749b8, 0x3c56ba70, 
+    0x398cdd32, 0x36ba2014, 
+  0x33def287, 0x30fbc54d, 0x2e110a62, 0x2b1f34eb, 0x2826b928, 0x25280c5e, 
+    0x2223a4c5, 0x1f19f97b, 
+  0x1c0b826a, 0x18f8b83c, 0x15e21445, 0x12c8106f, 0xfab272b, 0xc8bd35e, 
+    0x96a9049, 0x647d97c, 
+  0x3242abf, 0x0, 0xfcdbd541, 0xf9b82684, 0xf6956fb7, 0xf3742ca2, 0xf054d8d5, 
+    0xed37ef91, 
+  0xea1debbb, 0xe70747c4, 0xe3f47d96, 0xe0e60685, 0xdddc5b3b, 0xdad7f3a2, 
+    0xd7d946d8, 0xd4e0cb15, 
+  0xd1eef59e, 0xcf043ab3, 0xcc210d79, 0xc945dfec, 0xc67322ce, 0xc3a94590, 
+    0xc0e8b648, 0xbe31e19b, 
+  0xbb8532b0, 0xb8e31319, 0xb64beacd, 0xb3c0200c, 0xb140175b, 0xaecc336c, 
+    0xac64d510, 0xaa0a5b2e, 
+  0xa7bd22ac, 0xa57d8666, 0xa34bdf20, 0xa1288376, 0x9f13c7d0, 0x9d0dfe54, 
+    0x9b1776da, 0x99307ee0, 
+  0x9759617f, 0x9592675c, 0x93dbd6a0, 0x9235f2ec, 0x90a0fd4e, 0x8f1d343a, 
+    0x8daad37b, 0x8c4a142f, 
+  0x8afb2cbb, 0x89be50c3, 0x8893b125, 0x877b7bec, 0x8675dc4f, 0x8582faa5, 
+    0x84a2fc62, 0x83d60412, 
+  0x831c314e, 0x8275a0c0, 0x81e26c16, 0x8162aa04, 0x80f66e3c, 0x809dc971, 
+    0x8058c94c, 0x80277872, 
+  0x8009de7e, 0x80000000, 0x8009de7e, 0x80277872, 0x8058c94c, 0x809dc971, 
+    0x80f66e3c, 0x8162aa04, 
+  0x81e26c16, 0x8275a0c0, 0x831c314e, 0x83d60412, 0x84a2fc62, 0x8582faa5, 
+    0x8675dc4f, 0x877b7bec, 
+  0x8893b125, 0x89be50c3, 0x8afb2cbb, 0x8c4a142f, 0x8daad37b, 0x8f1d343a, 
+    0x90a0fd4e, 0x9235f2ec, 
+  0x93dbd6a0, 0x9592675c, 0x9759617f, 0x99307ee0, 0x9b1776da, 0x9d0dfe54, 
+    0x9f13c7d0, 0xa1288376, 
+  0xa34bdf20, 0xa57d8666, 0xa7bd22ac, 0xaa0a5b2e, 0xac64d510, 0xaecc336c, 
+    0xb140175b, 0xb3c0200c, 
+  0xb64beacd, 0xb8e31319, 0xbb8532b0, 0xbe31e19b, 0xc0e8b648, 0xc3a94590, 
+    0xc67322ce, 0xc945dfec, 
+  0xcc210d79, 0xcf043ab3, 0xd1eef59e, 0xd4e0cb15, 0xd7d946d8, 0xdad7f3a2, 
+    0xdddc5b3b, 0xe0e60685, 
+  0xe3f47d96, 0xe70747c4, 0xea1debbb, 0xed37ef91, 0xf054d8d5, 0xf3742ca2, 
+    0xf6956fb7, 0xf9b82684, 
+  0xfcdbd541, 0x0, 0x3242abf, 0x647d97c, 0x96a9049, 0xc8bd35e, 0xfab272b, 
+    0x12c8106f, 
+  0x15e21445, 0x18f8b83c, 0x1c0b826a, 0x1f19f97b, 0x2223a4c5, 0x25280c5e, 
+    0x2826b928, 0x2b1f34eb, 
+  0x2e110a62, 0x30fbc54d, 0x33def287, 0x36ba2014, 0x398cdd32, 0x3c56ba70, 
+    0x3f1749b8, 0x41ce1e65, 
+  0x447acd50, 0x471cece7, 0x49b41533, 0x4c3fdff4, 0x4ebfe8a5, 0x5133cc94, 
+    0x539b2af0, 0x55f5a4d2, 
+  0x5842dd54, 0x5a82799a, 0x5cb420e0, 0x5ed77c8a, 0x60ec3830, 0x62f201ac, 
+    0x64e88926, 0x66cf8120, 
+  0x68a69e81, 0x6a6d98a4, 0x6c242960, 0x6dca0d14, 0x6f5f02b2, 0x70e2cbc6, 
+    0x72552c85, 0x73b5ebd1, 
+  0x7504d345, 0x7641af3d, 0x776c4edb, 0x78848414, 0x798a23b1, 0x7a7d055b, 
+    0x7b5d039e, 0x7c29fbee, 
+  0x7ce3ceb2, 0x7d8a5f40, 0x7e1d93ea, 0x7e9d55fc, 0x7f0991c4, 0x7f62368f, 
+    0x7fa736b4, 0x7fd8878e, 
+  0x7ff62182, 0x7fffffff, 0x7ff62182 
+}; 
+ 
+/** 
+ * @brief Fast approximation to the trigonometric cosine function for Q31 data. 
+ * @param[in] x Scaled input value in radians. 
+ * @return  cos(x). 
+ * 
+ * The Q31 input value is in the range [0 +1) and is mapped to a radian value in the range [0 2*pi). 
+ */ 
+ 
+q31_t arm_cos_q31( 
+  q31_t x) 
+{ 
+  q31_t cosVal, fract, in, in2;                  /* Temporary variables for input, output */ 
+  q31_t wa, wb, wc, wd;                          /* Cubic interpolation coefficients */ 
+  q31_t a, b, c, d;                              /* Four nearest output values */ 
+  q31_t *tablePtr;                               /* Pointer to table */ 
+  q31_t fractCube, fractSquare;                  /* Temporary values for fractional value */ 
+  q31_t oneBy6 = 0x15555555;                     /* Fixed point value of 1/6 */ 
+  q31_t tableSpacing = TABLE_SPACING_Q31;        /* Table spacing */ 
+  q31_t temp;                                    /* Temporary variable for intermediate process */ 
+  uint32_t index;                                /* Index variable */ 
+ 
+  in = x; 
+ 
+  /* Calculate the nearest index */ 
+  index = in / tableSpacing; 
+ 
+  /* Calculate the nearest value of input */ 
+  in2 = ((q31_t) index) * tableSpacing; 
+ 
+  /* Calculation of fractional value */ 
+  fract = (in - in2) << 8; 
+ 
+  /* fractSquare = fract * fract */ 
+  fractSquare = ((q31_t) (((q63_t) fract * fract) >> 32)); 
+  fractSquare = fractSquare << 1; 
+ 
+  /* fractCube = fract * fract * fract */ 
+  fractCube = ((q31_t) (((q63_t) fractSquare * fract) >> 32)); 
+  fractCube = fractCube << 1; 
+ 
+  /* Initialise table pointer */ 
+  tablePtr = (q31_t *) & cosTableQ31[index]; 
+ 
+  /* Cubic interpolation process */ 
+  /* Calculation of wa */ 
+  /* wa = -(oneBy6)*fractCube + (fractSquare >> 1u) - (0x2AAAAAAA)*fract; */ 
+  wa = ((q31_t) (((q63_t) oneBy6 * fractCube) >> 32)); 
+  temp = 0x2AAAAAAA; 
+  wa = (q31_t) ((((q63_t) wa << 32) + ((q63_t) temp * fract)) >> 32); 
+  wa = -(wa << 1u); 
+  wa += (fractSquare >> 1u); 
+ 
+  /* Read first nearest value of output from the cos table */ 
+  a = *tablePtr++; 
+ 
+  /* cosVal = a*wa */ 
+  cosVal = ((q31_t) (((q63_t) a * wa) >> 32)); 
+ 
+  /* q31(1.31) Fixed point value of 1 */ 
+  temp = 0x7FFFFFFF; 
+ 
+  /* Calculation of wb */ 
+  wb = ((fractCube >> 1u) - (fractSquare + (fract >> 1u))) + temp; 
+  /* Read second nearest value of output from the cos table */ 
+  b = *tablePtr++; 
+ 
+  /*      cosVal += b*wb */ 
+  cosVal = (q31_t) ((((q63_t) cosVal << 32) + ((q63_t) b * (wb))) >> 32); 
+ 
+  /* Calculation of wc */ 
+  wc = -fractCube + fractSquare; 
+  wc = (wc >> 1u) + fract; 
+  /* Read third nearest values of output value from the cos table */ 
+  c = *tablePtr++; 
+ 
+  /*      cosVal += c*wc */ 
+  cosVal = (q31_t) ((((q63_t) cosVal << 32) + ((q63_t) c * (wc))) >> 32); 
+ 
+  /* Calculation of wd */ 
+  /* wd = (oneBy6)*fractCube - (oneBy6)*fract; */ 
+  fractCube = fractCube - fract; 
+  wd = ((q31_t) (((q63_t) oneBy6 * fractCube) >> 32)); 
+  wd = (wd << 1u); 
+ 
+  /* Read fourth nearest value of output from the cos table */ 
+  d = *tablePtr++; 
+ 
+  /* cosVal += d*wd; */ 
+  cosVal = (q31_t) ((((q63_t) cosVal << 32) + ((q63_t) d * (wd))) >> 32); 
+ 
+  /* convert cosVal in 2.30 format to 1.31 format */ 
+  return (cosVal << 1u); 
+ 
+} 
+ 
+/**  
+ * @} end of cos group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FastMathFunctions/arm_sin_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,254 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_sin_f32.c  
+*  
+* Description:	Fast sine calculation for floating-point values. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFastMath  
+ */ 
+ 
+/**  
+ * @defgroup sin Sine  
+ *  
+ * Computes the trigonometric sine function using a combination of table lookup 
+ * and cubic interpolation.  There are separate functions for 
+ * Q15, Q31, and floating-point data types. 
+ * The input to the floating-point version is in radians while the 
+ * fixed-point Q15 and Q31 have a scaled input with the range 
+ * [0 1) mapping to [0 2*pi). 
+ * 
+ * The implementation is based on table lookup using 256 values together with cubic interpolation. 
+ * The steps used are: 
+ *  -# Calculation of the nearest integer table index 
+ *  -# Fetch the four table values a, b, c, and d   
+ *  -# Compute the fractional portion (fract) of the table index. 
+ *  -# Calculation of wa, wb, wc, wd  
+ *  -# The final result equals <code>a*wa + b*wb + c*wc + d*wd</code> 
+ * 
+ * where 
+ * <pre>  
+ *    a=Table[index-1];  
+ *    b=Table[index+0];  
+ *    c=Table[index+1];  
+ *    d=Table[index+2];  
+ * </pre> 
+ * and 
+ * <pre>  
+ *    wa=-(1/6)*fract.^3 + (1/2)*fract.^2 - (1/3)*fract;  
+ *    wb=(1/2)*fract.^3 - fract.^2 - (1/2)*fract + 1;  
+ *    wc=-(1/2)*fract.^3+(1/2)*fract.^2+fract;  
+ *    wd=(1/6)*fract.^3 - (1/6)*fract;  
+ * </pre>  
+ */ 
+ 
+/**  
+ * @addtogroup sin  
+ * @{  
+ */ 
+ 
+ 
+/** 
+ * \par  
+ * Example code for Generation of Floating-point Sin Table: 
+ * tableSize = 256;  
+ * <pre>for(n = -1; n < (tableSize + 1); n++)  
+ * {  
+ *	sinTable[n+1]=sin(2*pi*n/tableSize);  
+ * }</pre>  
+ * \par  
+ * where pi value is  3.14159265358979  
+ */ 
+ 
+static const float32_t sinTable[259] = { 
+  -0.024541229009628296f, 0.000000000000000000f, 0.024541229009628296f, 
+  0.049067676067352295f, 0.073564566671848297f, 0.098017141222953796f, 
+  0.122410677373409270f, 0.146730467677116390f, 
+  0.170961886644363400f, 0.195090323686599730f, 0.219101235270500180f, 
+  0.242980182170867920f, 0.266712754964828490f, 0.290284663438797000f, 
+  0.313681751489639280f, 0.336889863014221190f, 
+  0.359895050525665280f, 0.382683426141738890f, 0.405241310596466060f, 
+  0.427555084228515630f, 0.449611335992813110f, 0.471396744251251220f, 
+  0.492898195981979370f, 0.514102756977081300f, 
+  0.534997642040252690f, 0.555570244789123540f, 0.575808167457580570f, 
+  0.595699310302734380f, 0.615231573581695560f, 0.634393274784088130f, 
+  0.653172850608825680f, 0.671558976173400880f, 
+  0.689540565013885500f, 0.707106769084930420f, 0.724247097969055180f, 
+  0.740951120853424070f, 0.757208824157714840f, 0.773010432720184330f, 
+  0.788346409797668460f, 0.803207516670227050f, 
+  0.817584812641143800f, 0.831469595432281490f, 0.844853579998016360f, 
+  0.857728600502014160f, 0.870086967945098880f, 0.881921291351318360f, 
+  0.893224298954010010f, 0.903989315032958980f, 
+  0.914209783077239990f, 0.923879504203796390f, 0.932992815971374510f, 
+  0.941544055938720700f, 0.949528157711029050f, 0.956940352916717530f, 
+  0.963776051998138430f, 0.970031261444091800f, 
+  0.975702106952667240f, 0.980785250663757320f, 0.985277652740478520f, 
+  0.989176511764526370f, 0.992479562759399410f, 0.995184719562530520f, 
+  0.997290432453155520f, 0.998795449733734130f, 
+  0.999698817729949950f, 1.000000000000000000f, 0.999698817729949950f, 
+  0.998795449733734130f, 0.997290432453155520f, 0.995184719562530520f, 
+  0.992479562759399410f, 0.989176511764526370f, 
+  0.985277652740478520f, 0.980785250663757320f, 0.975702106952667240f, 
+  0.970031261444091800f, 0.963776051998138430f, 0.956940352916717530f, 
+  0.949528157711029050f, 0.941544055938720700f, 
+  0.932992815971374510f, 0.923879504203796390f, 0.914209783077239990f, 
+  0.903989315032958980f, 0.893224298954010010f, 0.881921291351318360f, 
+  0.870086967945098880f, 0.857728600502014160f, 
+  0.844853579998016360f, 0.831469595432281490f, 0.817584812641143800f, 
+  0.803207516670227050f, 0.788346409797668460f, 0.773010432720184330f, 
+  0.757208824157714840f, 0.740951120853424070f, 
+  0.724247097969055180f, 0.707106769084930420f, 0.689540565013885500f, 
+  0.671558976173400880f, 0.653172850608825680f, 0.634393274784088130f, 
+  0.615231573581695560f, 0.595699310302734380f, 
+  0.575808167457580570f, 0.555570244789123540f, 0.534997642040252690f, 
+  0.514102756977081300f, 0.492898195981979370f, 0.471396744251251220f, 
+  0.449611335992813110f, 0.427555084228515630f, 
+  0.405241310596466060f, 0.382683426141738890f, 0.359895050525665280f, 
+  0.336889863014221190f, 0.313681751489639280f, 0.290284663438797000f, 
+  0.266712754964828490f, 0.242980182170867920f, 
+  0.219101235270500180f, 0.195090323686599730f, 0.170961886644363400f, 
+  0.146730467677116390f, 0.122410677373409270f, 0.098017141222953796f, 
+  0.073564566671848297f, 0.049067676067352295f, 
+  0.024541229009628296f, 0.000000000000000122f, -0.024541229009628296f, 
+  -0.049067676067352295f, -0.073564566671848297f, -0.098017141222953796f, 
+  -0.122410677373409270f, -0.146730467677116390f, 
+  -0.170961886644363400f, -0.195090323686599730f, -0.219101235270500180f, 
+  -0.242980182170867920f, -0.266712754964828490f, -0.290284663438797000f, 
+  -0.313681751489639280f, -0.336889863014221190f, 
+  -0.359895050525665280f, -0.382683426141738890f, -0.405241310596466060f, 
+  -0.427555084228515630f, -0.449611335992813110f, -0.471396744251251220f, 
+  -0.492898195981979370f, -0.514102756977081300f, 
+  -0.534997642040252690f, -0.555570244789123540f, -0.575808167457580570f, 
+  -0.595699310302734380f, -0.615231573581695560f, -0.634393274784088130f, 
+  -0.653172850608825680f, -0.671558976173400880f, 
+  -0.689540565013885500f, -0.707106769084930420f, -0.724247097969055180f, 
+  -0.740951120853424070f, -0.757208824157714840f, -0.773010432720184330f, 
+  -0.788346409797668460f, -0.803207516670227050f, 
+  -0.817584812641143800f, -0.831469595432281490f, -0.844853579998016360f, 
+  -0.857728600502014160f, -0.870086967945098880f, -0.881921291351318360f, 
+  -0.893224298954010010f, -0.903989315032958980f, 
+  -0.914209783077239990f, -0.923879504203796390f, -0.932992815971374510f, 
+  -0.941544055938720700f, -0.949528157711029050f, -0.956940352916717530f, 
+  -0.963776051998138430f, -0.970031261444091800f, 
+  -0.975702106952667240f, -0.980785250663757320f, -0.985277652740478520f, 
+  -0.989176511764526370f, -0.992479562759399410f, -0.995184719562530520f, 
+  -0.997290432453155520f, -0.998795449733734130f, 
+  -0.999698817729949950f, -1.000000000000000000f, -0.999698817729949950f, 
+  -0.998795449733734130f, -0.997290432453155520f, -0.995184719562530520f, 
+  -0.992479562759399410f, -0.989176511764526370f, 
+  -0.985277652740478520f, -0.980785250663757320f, -0.975702106952667240f, 
+  -0.970031261444091800f, -0.963776051998138430f, -0.956940352916717530f, 
+  -0.949528157711029050f, -0.941544055938720700f, 
+  -0.932992815971374510f, -0.923879504203796390f, -0.914209783077239990f, 
+  -0.903989315032958980f, -0.893224298954010010f, -0.881921291351318360f, 
+  -0.870086967945098880f, -0.857728600502014160f, 
+  -0.844853579998016360f, -0.831469595432281490f, -0.817584812641143800f, 
+  -0.803207516670227050f, -0.788346409797668460f, -0.773010432720184330f, 
+  -0.757208824157714840f, -0.740951120853424070f, 
+  -0.724247097969055180f, -0.707106769084930420f, -0.689540565013885500f, 
+  -0.671558976173400880f, -0.653172850608825680f, -0.634393274784088130f, 
+  -0.615231573581695560f, -0.595699310302734380f, 
+  -0.575808167457580570f, -0.555570244789123540f, -0.534997642040252690f, 
+  -0.514102756977081300f, -0.492898195981979370f, -0.471396744251251220f, 
+  -0.449611335992813110f, -0.427555084228515630f, 
+  -0.405241310596466060f, -0.382683426141738890f, -0.359895050525665280f, 
+  -0.336889863014221190f, -0.313681751489639280f, -0.290284663438797000f, 
+  -0.266712754964828490f, -0.242980182170867920f, 
+  -0.219101235270500180f, -0.195090323686599730f, -0.170961886644363400f, 
+  -0.146730467677116390f, -0.122410677373409270f, -0.098017141222953796f, 
+  -0.073564566671848297f, -0.049067676067352295f, 
+  -0.024541229009628296f, -0.000000000000000245f, 0.024541229009628296f 
+}; 
+ 
+ 
+/** 
+ * @brief  Fast approximation to the trigonometric sine function for floating-point data. 
+ * @param[in] x input value in radians. 
+ * @return  sin(x). 
+ */ 
+ 
+float32_t arm_sin_f32( 
+  float32_t x) 
+{ 
+  float32_t sinVal, fract, in;                   /* Temporary variables for input, output */ 
+  uint32_t index;                                /* Index variables */ 
+  uint32_t tableSize = (uint32_t) TABLE_SIZE;    /* Initialise tablesize */ 
+  float32_t wa, wb, wc, wd;                      /* Cubic interpolation coefficients */ 
+  float32_t a, b, c, d;                          /* Four nearest output values */ 
+  float32_t *tablePtr;                           /* Pointer to table */ 
+  int32_t n; 
+ 
+  /* input x is in radians */ 
+  /* Scale the input to [0 1] range from [0 2*PI] , divide input by 2*pi */ 
+  in = x * 0.159154943092f; 
+ 
+  /* Calculation of floor value of input */ 
+  n = (int32_t) in; 
+ 
+  /* Make negative values towards -infinity */ 
+  if(x < 0.0f) 
+  { 
+    n = n - 1; 
+  } 
+ 
+  /* Map input value to [0 1] */ 
+  in = in - (float32_t) n; 
+ 
+  /* Calculation of index of the table */ 
+  index = (uint32_t) (tableSize * in); 
+ 
+  /* fractional value calculation */ 
+  fract = ((float32_t) tableSize * in) - (float32_t) index; 
+ 
+  /* Initialise table pointer */ 
+  tablePtr = (float32_t *) & sinTable[index]; 
+ 
+  /* Read four nearest values of output value from the sin table */ 
+  a = *tablePtr++; 
+  b = *tablePtr++; 
+  c = *tablePtr++; 
+  d = *tablePtr++; 
+ 
+  /* Cubic interpolation process */ 
+  wa = -(((0.166666667f) * (fract * (fract * fract))) + 
+        ((0.3333333333333f) * fract)) + ((0.5f) * (fract * fract)); 
+  wb = (((0.5f) * (fract * (fract * fract))) - 
+       ((fract * fract) + ((0.5f) * fract))) + 1.0f; 
+  wc = (-((0.5f) * (fract * (fract * fract))) +  
+	   ((0.5f) * (fract * fract))) + fract; 
+  wd = ((0.166666667f) * (fract * (fract * fract))) -  
+	   ((0.166666667f) * fract); 
+ 
+  /* Calculate sin value */ 
+  sinVal = ((a * wa) + (b * wb)) + ((c * wc) + (d * wd)); 
+ 
+  /* Return the output value */ 
+  return (sinVal); 
+ 
+} 
+ 
+/**  
+ * @} end of sin group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FastMathFunctions/arm_sin_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,189 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_sin_q15.c  
+*  
+* Description:	Fast sine calculation for Q15 values. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFastMath  
+ */ 
+ 
+ /**  
+ * @addtogroup sin  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * \par  
+ * Example code for Generation of Q15 Sin Table: 
+ * \par  
+ * <pre>tableSize = 256;  
+ * for(n = -1; n < (tableSize + 1); n++)  
+ * {  
+ *	sinTable[n+1]=sin(2*pi*n/tableSize);  
+ * } </pre>  
+ * where pi value is  3.14159265358979  
+ * \par  
+ * Convert Floating point to Q15(Fixed point):  
+ *	(sinTable[i] * pow(2, 15))  
+ * \par  
+ * rounding to nearest integer is done  
+ * 	sinTable[i] += (sinTable[i] > 0 ? 0.5 :-0.5);  
+ */ 
+ 
+ 
+static const q15_t sinTableQ15[259] = { 
+  0xfcdc, 0x0, 0x324, 0x648, 0x96b, 0xc8c, 0xfab, 0x12c8, 
+  0x15e2, 0x18f9, 0x1c0c, 0x1f1a, 0x2224, 0x2528, 0x2827, 0x2b1f, 
+  0x2e11, 0x30fc, 0x33df, 0x36ba, 0x398d, 0x3c57, 0x3f17, 0x41ce, 
+  0x447b, 0x471d, 0x49b4, 0x4c40, 0x4ec0, 0x5134, 0x539b, 0x55f6, 
+  0x5843, 0x5a82, 0x5cb4, 0x5ed7, 0x60ec, 0x62f2, 0x64e9, 0x66d0, 
+  0x68a7, 0x6a6e, 0x6c24, 0x6dca, 0x6f5f, 0x70e3, 0x7255, 0x73b6, 
+  0x7505, 0x7642, 0x776c, 0x7885, 0x798a, 0x7a7d, 0x7b5d, 0x7c2a, 
+  0x7ce4, 0x7d8a, 0x7e1e, 0x7e9d, 0x7f0a, 0x7f62, 0x7fa7, 0x7fd9, 
+  0x7ff6, 0x7fff, 0x7ff6, 0x7fd9, 0x7fa7, 0x7f62, 0x7f0a, 0x7e9d, 
+  0x7e1e, 0x7d8a, 0x7ce4, 0x7c2a, 0x7b5d, 0x7a7d, 0x798a, 0x7885, 
+  0x776c, 0x7642, 0x7505, 0x73b6, 0x7255, 0x70e3, 0x6f5f, 0x6dca, 
+  0x6c24, 0x6a6e, 0x68a7, 0x66d0, 0x64e9, 0x62f2, 0x60ec, 0x5ed7, 
+  0x5cb4, 0x5a82, 0x5843, 0x55f6, 0x539b, 0x5134, 0x4ec0, 0x4c40, 
+  0x49b4, 0x471d, 0x447b, 0x41ce, 0x3f17, 0x3c57, 0x398d, 0x36ba, 
+  0x33df, 0x30fc, 0x2e11, 0x2b1f, 0x2827, 0x2528, 0x2224, 0x1f1a, 
+  0x1c0c, 0x18f9, 0x15e2, 0x12c8, 0xfab, 0xc8c, 0x96b, 0x648, 
+  0x324, 0x0, 0xfcdc, 0xf9b8, 0xf695, 0xf374, 0xf055, 0xed38, 
+  0xea1e, 0xe707, 0xe3f4, 0xe0e6, 0xdddc, 0xdad8, 0xd7d9, 0xd4e1, 
+  0xd1ef, 0xcf04, 0xcc21, 0xc946, 0xc673, 0xc3a9, 0xc0e9, 0xbe32, 
+  0xbb85, 0xb8e3, 0xb64c, 0xb3c0, 0xb140, 0xaecc, 0xac65, 0xaa0a, 
+  0xa7bd, 0xa57e, 0xa34c, 0xa129, 0x9f14, 0x9d0e, 0x9b17, 0x9930, 
+  0x9759, 0x9592, 0x93dc, 0x9236, 0x90a1, 0x8f1d, 0x8dab, 0x8c4a, 
+  0x8afb, 0x89be, 0x8894, 0x877b, 0x8676, 0x8583, 0x84a3, 0x83d6, 
+  0x831c, 0x8276, 0x81e2, 0x8163, 0x80f6, 0x809e, 0x8059, 0x8027, 
+  0x800a, 0x8000, 0x800a, 0x8027, 0x8059, 0x809e, 0x80f6, 0x8163, 
+  0x81e2, 0x8276, 0x831c, 0x83d6, 0x84a3, 0x8583, 0x8676, 0x877b, 
+  0x8894, 0x89be, 0x8afb, 0x8c4a, 0x8dab, 0x8f1d, 0x90a1, 0x9236, 
+  0x93dc, 0x9592, 0x9759, 0x9930, 0x9b17, 0x9d0e, 0x9f14, 0xa129, 
+  0xa34c, 0xa57e, 0xa7bd, 0xaa0a, 0xac65, 0xaecc, 0xb140, 0xb3c0, 
+  0xb64c, 0xb8e3, 0xbb85, 0xbe32, 0xc0e9, 0xc3a9, 0xc673, 0xc946, 
+  0xcc21, 0xcf04, 0xd1ef, 0xd4e1, 0xd7d9, 0xdad8, 0xdddc, 0xe0e6, 
+  0xe3f4, 0xe707, 0xea1e, 0xed38, 0xf055, 0xf374, 0xf695, 0xf9b8, 
+  0xfcdc, 0x0, 0x324 
+}; 
+ 
+ 
+/** 
+ * @brief Fast approximation to the trigonometric sine function for Q15 data. 
+ * @param[in] x Scaled input value in radians. 
+ * @return  sin(x). 
+ * 
+ * The Q15 input value is in the range [0 +1) and is mapped to a radian value in the range [0 2*pi). 
+ */ 
+ 
+q15_t arm_sin_q15( 
+  q15_t x) 
+{ 
+  q31_t sinVal;                                  /* Temporary variables output */ 
+  q15_t *tablePtr;                               /* Pointer to table */ 
+  q15_t fract, in, in2;                          /* Temporary variables for input, output */ 
+  q31_t wa, wb, wc, wd;                          /* Cubic interpolation coefficients */ 
+  q15_t a, b, c, d;                              /* Four nearest output values */ 
+  q15_t fractCube, fractSquare;                  /* Temporary values for fractional value */ 
+  q15_t oneBy6 = 0x1555;                         /* Fixed point value of 1/6 */ 
+  q15_t tableSpacing = TABLE_SPACING_Q15;        /* Table spacing */ 
+  int32_t index;                                 /* Index variable */ 
+ 
+  in = x; 
+ 
+  /* Calculate the nearest index */ 
+  index = (int32_t) in / tableSpacing; 
+ 
+  /* Calculate the nearest value of input */ 
+  in2 = (q15_t) ((index) * tableSpacing); 
+ 
+  /* Calculation of fractional value */ 
+  fract = (in - in2) << 8; 
+ 
+  /* fractSquare = fract * fract */ 
+  fractSquare = (q15_t) ((fract * fract) >> 15); 
+ 
+  /* fractCube = fract * fract * fract */ 
+  fractCube = (q15_t) ((fractSquare * fract) >> 15); 
+ 
+  /* Initialise table pointer */ 
+  tablePtr = (q15_t *) & sinTableQ15[index]; 
+ 
+  /* Cubic interpolation process */ 
+  /* Calculation of wa */ 
+  /* wa = -(oneBy6)*fractCube + (fractSquare >> 1u) - (0x2AAA)*fract; */ 
+  wa = (q31_t) oneBy6 *fractCube; 
+  wa += (q31_t) 0x2AAA * fract; 
+  wa = -(wa >> 15); 
+  wa += ((q31_t) fractSquare >> 1u); 
+ 
+  /* Read first nearest value of output from the sin table */ 
+  a = *tablePtr++; 
+ 
+  /* sinVal = a * wa */ 
+  sinVal = a * wa; 
+ 
+  /* Calculation of wb */ 
+  wb = (((q31_t) fractCube >> 1u) - (q31_t) fractSquare) - 
+       (((q31_t) fract >> 1u) - 0x7FFF); 
+ 
+  /* Read second nearest value of output from the sin table */ 
+  b = *tablePtr++; 
+ 
+  /*      sinVal += b*wb */ 
+  sinVal += b * wb; 
+ 
+ 
+  /* Calculation of wc */ 
+  wc = -(q31_t) fractCube + fractSquare; 
+  wc = (wc >> 1u) + fract; 
+ 
+  /* Read third nearest value of output from the sin table */ 
+  c = *tablePtr++; 
+ 
+  /*      sinVal += c*wc */ 
+  sinVal += c * wc; 
+ 
+  /* Calculation of wd */ 
+  /* wd = (oneBy6)*fractCube - (oneBy6)*fract; */ 
+  fractCube = fractCube - fract; 
+  wd = ((q15_t) (((q31_t) oneBy6 * fractCube) >> 15)); 
+ 
+  /* Read fourth nearest value of output from the sin table */ 
+  d = *tablePtr++; 
+ 
+  /* sinVal += d*wd; */ 
+  sinVal += d * wd; 
+ 
+  /* Return the output value in 1.15(q15) format */ 
+  return ((q15_t) (sinVal >> 15u)); 
+ 
+} 
+ 
+/**  
+ * @} end of sin group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FastMathFunctions/arm_sin_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,224 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_sin_q31.c  
+*  
+* Description:	Fast sine calculation for Q31 values. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFastMath  
+ */ 
+ 
+ /**  
+ * @addtogroup sin  
+ * @{  
+ */ 
+ 
+/** 
+ * \par  
+ * Tables generated are in Q31(1.31 Fixed point format)  
+ * Generation of sin values in floating point:  
+ * <pre>tableSize = 256;    
+ * for(n = -1; n < (tableSize + 1); n++)  
+ * {  
+ *	sinTable[n+1]= sin(2*pi*n/tableSize);  
+ * } </pre>  
+ * where pi value is  3.14159265358979  
+ * \par  
+ * Convert Floating point to Q31(Fixed point):  
+ *	(sinTable[i] * pow(2, 31))  
+ * \par  
+ * rounding to nearest integer is done  
+ * 	sinTable[i] += (sinTable[i] > 0 ? 0.5 :-0.5);  
+ */ 
+ 
+static const q31_t sinTableQ31[259] = { 
+  0xfcdbd541, 0x0, 0x3242abf, 0x647d97c, 0x96a9049, 0xc8bd35e, 0xfab272b, 
+  0x12c8106f, 
+  0x15e21445, 0x18f8b83c, 0x1c0b826a, 0x1f19f97b, 0x2223a4c5, 0x25280c5e, 
+  0x2826b928, 0x2b1f34eb, 
+  0x2e110a62, 0x30fbc54d, 0x33def287, 0x36ba2014, 0x398cdd32, 0x3c56ba70, 
+  0x3f1749b8, 0x41ce1e65, 
+  0x447acd50, 0x471cece7, 0x49b41533, 0x4c3fdff4, 0x4ebfe8a5, 0x5133cc94, 
+  0x539b2af0, 0x55f5a4d2, 
+  0x5842dd54, 0x5a82799a, 0x5cb420e0, 0x5ed77c8a, 0x60ec3830, 0x62f201ac, 
+  0x64e88926, 0x66cf8120, 
+  0x68a69e81, 0x6a6d98a4, 0x6c242960, 0x6dca0d14, 0x6f5f02b2, 0x70e2cbc6, 
+  0x72552c85, 0x73b5ebd1, 
+  0x7504d345, 0x7641af3d, 0x776c4edb, 0x78848414, 0x798a23b1, 0x7a7d055b, 
+  0x7b5d039e, 0x7c29fbee, 
+  0x7ce3ceb2, 0x7d8a5f40, 0x7e1d93ea, 0x7e9d55fc, 0x7f0991c4, 0x7f62368f, 
+  0x7fa736b4, 0x7fd8878e, 
+  0x7ff62182, 0x7fffffff, 0x7ff62182, 0x7fd8878e, 0x7fa736b4, 0x7f62368f, 
+  0x7f0991c4, 0x7e9d55fc, 
+  0x7e1d93ea, 0x7d8a5f40, 0x7ce3ceb2, 0x7c29fbee, 0x7b5d039e, 0x7a7d055b, 
+  0x798a23b1, 0x78848414, 
+  0x776c4edb, 0x7641af3d, 0x7504d345, 0x73b5ebd1, 0x72552c85, 0x70e2cbc6, 
+  0x6f5f02b2, 0x6dca0d14, 
+  0x6c242960, 0x6a6d98a4, 0x68a69e81, 0x66cf8120, 0x64e88926, 0x62f201ac, 
+  0x60ec3830, 0x5ed77c8a, 
+  0x5cb420e0, 0x5a82799a, 0x5842dd54, 0x55f5a4d2, 0x539b2af0, 0x5133cc94, 
+  0x4ebfe8a5, 0x4c3fdff4, 
+  0x49b41533, 0x471cece7, 0x447acd50, 0x41ce1e65, 0x3f1749b8, 0x3c56ba70, 
+  0x398cdd32, 0x36ba2014, 
+  0x33def287, 0x30fbc54d, 0x2e110a62, 0x2b1f34eb, 0x2826b928, 0x25280c5e, 
+  0x2223a4c5, 0x1f19f97b, 
+  0x1c0b826a, 0x18f8b83c, 0x15e21445, 0x12c8106f, 0xfab272b, 0xc8bd35e, 
+  0x96a9049, 0x647d97c, 
+  0x3242abf, 0x0, 0xfcdbd541, 0xf9b82684, 0xf6956fb7, 0xf3742ca2, 0xf054d8d5, 
+  0xed37ef91, 
+  0xea1debbb, 0xe70747c4, 0xe3f47d96, 0xe0e60685, 0xdddc5b3b, 0xdad7f3a2, 
+  0xd7d946d8, 0xd4e0cb15, 
+  0xd1eef59e, 0xcf043ab3, 0xcc210d79, 0xc945dfec, 0xc67322ce, 0xc3a94590, 
+  0xc0e8b648, 0xbe31e19b, 
+  0xbb8532b0, 0xb8e31319, 0xb64beacd, 0xb3c0200c, 0xb140175b, 0xaecc336c, 
+  0xac64d510, 0xaa0a5b2e, 
+  0xa7bd22ac, 0xa57d8666, 0xa34bdf20, 0xa1288376, 0x9f13c7d0, 0x9d0dfe54, 
+  0x9b1776da, 0x99307ee0, 
+  0x9759617f, 0x9592675c, 0x93dbd6a0, 0x9235f2ec, 0x90a0fd4e, 0x8f1d343a, 
+  0x8daad37b, 0x8c4a142f, 
+  0x8afb2cbb, 0x89be50c3, 0x8893b125, 0x877b7bec, 0x8675dc4f, 0x8582faa5, 
+  0x84a2fc62, 0x83d60412, 
+  0x831c314e, 0x8275a0c0, 0x81e26c16, 0x8162aa04, 0x80f66e3c, 0x809dc971, 
+  0x8058c94c, 0x80277872, 
+  0x8009de7e, 0x80000000, 0x8009de7e, 0x80277872, 0x8058c94c, 0x809dc971, 
+  0x80f66e3c, 0x8162aa04, 
+  0x81e26c16, 0x8275a0c0, 0x831c314e, 0x83d60412, 0x84a2fc62, 0x8582faa5, 
+  0x8675dc4f, 0x877b7bec, 
+  0x8893b125, 0x89be50c3, 0x8afb2cbb, 0x8c4a142f, 0x8daad37b, 0x8f1d343a, 
+  0x90a0fd4e, 0x9235f2ec, 
+  0x93dbd6a0, 0x9592675c, 0x9759617f, 0x99307ee0, 0x9b1776da, 0x9d0dfe54, 
+  0x9f13c7d0, 0xa1288376, 
+  0xa34bdf20, 0xa57d8666, 0xa7bd22ac, 0xaa0a5b2e, 0xac64d510, 0xaecc336c, 
+  0xb140175b, 0xb3c0200c, 
+  0xb64beacd, 0xb8e31319, 0xbb8532b0, 0xbe31e19b, 0xc0e8b648, 0xc3a94590, 
+  0xc67322ce, 0xc945dfec, 
+  0xcc210d79, 0xcf043ab3, 0xd1eef59e, 0xd4e0cb15, 0xd7d946d8, 0xdad7f3a2, 
+  0xdddc5b3b, 0xe0e60685, 
+  0xe3f47d96, 0xe70747c4, 0xea1debbb, 0xed37ef91, 0xf054d8d5, 0xf3742ca2, 
+  0xf6956fb7, 0xf9b82684, 
+  0xfcdbd541, 0x0, 0x3242abf 
+}; 
+ 
+ 
+/** 
+ * @brief Fast approximation to the trigonometric sine function for Q31 data. 
+ * @param[in] x Scaled input value in radians. 
+ * @return  sin(x). 
+ * 
+ * The Q31 input value is in the range [0 +1) and is mapped to a radian value in the range [0 2*pi). 
+ */ 
+ 
+q31_t arm_sin_q31( 
+  q31_t x) 
+{ 
+  q31_t sinVal, fract, in, in2;                  /* Temporary variables for input, output */ 
+  uint32_t index;                                /* Index variables */ 
+  q31_t wa, wb, wc, wd;                          /* Cubic interpolation coefficients */ 
+  q31_t a, b, c, d;                              /* Four nearest output values */ 
+  q31_t *tablePtr;                               /* Pointer to table */ 
+  q31_t fractCube, fractSquare;                  /* Temporary values for fractional value */ 
+  q31_t oneBy6 = 0x15555555;                     /* Fixed point value of 1/6 */ 
+  q31_t tableSpacing = TABLE_SPACING_Q31;        /* Table spacing */ 
+  q31_t temp;                                    /* Temporary variable for intermediate process */ 
+ 
+  in = x; 
+ 
+  /* Calculate the nearest index */ 
+  index = (uint32_t) in / (uint32_t) tableSpacing; 
+ 
+  /* Calculate the nearest value of input */ 
+  in2 = (q31_t) index *tableSpacing; 
+ 
+  /* Calculation of fractional value */ 
+  fract = (in - in2) << 8; 
+ 
+  /* fractSquare = fract * fract */ 
+  fractSquare = ((q31_t) (((q63_t) fract * fract) >> 32)); 
+  fractSquare = fractSquare << 1; 
+ 
+  /* fractCube = fract * fract * fract */ 
+  fractCube = ((q31_t) (((q63_t) fractSquare * fract) >> 32)); 
+  fractCube = fractCube << 1; 
+ 
+  /* Initialise table pointer */ 
+  tablePtr = (q31_t *) & sinTableQ31[index]; 
+ 
+  /* Cubic interpolation process */ 
+  /* Calculation of wa */ 
+  /* wa = -(oneBy6)*fractCube + (fractSquare >> 1u) - (0x2AAAAAAA)*fract; */ 
+  wa = ((q31_t) (((q63_t) oneBy6 * fractCube) >> 32)); 
+  temp = 0x2AAAAAAA; 
+  wa = (q31_t) ((((q63_t) wa << 32) + ((q63_t) temp * fract)) >> 32); 
+  wa = -(wa << 1u); 
+  wa += (fractSquare >> 1u); 
+ 
+  /* Read first nearest value of output from the sin table */ 
+  a = *tablePtr++; 
+ 
+  /* sinVal = a*wa */ 
+  sinVal = ((q31_t) (((q63_t) a * wa) >> 32)); 
+ 
+  /* q31(1.31) Fixed point value of 1 */ 
+  temp = 0x7FFFFFFF; 
+ 
+  /* Calculation of wb */ 
+  wb = ((fractCube >> 1u) - (fractSquare + (fract >> 1u))) + temp; 
+ 
+  /* Read second nearest value of output from the sin table */ 
+  b = *tablePtr++; 
+ 
+  /*  sinVal += b*wb */ 
+  sinVal = (q31_t) ((((q63_t) sinVal << 32) + (q63_t) b * (wb)) >> 32); 
+ 
+  /* Calculation of wc */ 
+  wc = -fractCube + fractSquare; 
+  wc = (wc >> 1u) + fract; 
+ 
+  /* Read third nearest value of output from the sin table */ 
+  c = *tablePtr++; 
+ 
+  /*      sinVal += c*wc */ 
+  sinVal = (q31_t) ((((q63_t) sinVal << 32) + ((q63_t) c * wc)) >> 32); 
+ 
+  /* Calculation of wd */ 
+  /* wd = (oneBy6) * fractCube - (oneBy6) * fract; */ 
+  fractCube = fractCube - fract; 
+  wd = ((q31_t) (((q63_t) oneBy6 * fractCube) >> 32)); 
+  wd = (wd << 1u); 
+ 
+  /* Read fourth nearest value of output from the sin table */ 
+  d = *tablePtr++; 
+ 
+  /* sinVal += d*wd; */ 
+  sinVal = (q31_t) ((((q63_t) sinVal << 32) + ((q63_t) d * wd)) >> 32); 
+ 
+  /* convert sinVal in 2.30 format to 1.31 format */ 
+  return (sinVal << 1u); 
+ 
+} 
+ 
+/**  
+ * @} end of sin group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FastMathFunctions/arm_sqrt_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,123 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_sqrt_q15.c  
+*  
+* Description:	Q15 square root function. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+#include "arm_common_tables.h" 
+ 
+ 
+/**  
+ * @ingroup groupFastMath  
+ */ 
+ 
+/**  
+ * @addtogroup SQRT  
+ * @{  
+ */ 
+ 
+  /** 
+   * @brief  Q15 square root function. 
+   * @param[in]   in     input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF. 
+   * @param[out]  *pOut  square root of input value. 
+   * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if 
+   * <code>in</code> is negative value and returns zero output for negative values. 
+   */ 
+ 
+arm_status arm_sqrt_q15( 
+  q15_t in, 
+  q15_t * pOut) 
+{ 
+  q31_t out; 
+  q31_t prevOut; 
+  q15_t oneByOut; 
+  uint32_t sign_bits; 
+ 
+  if(in > 0) 
+  { 
+    /* run for ten iterations */ 
+ 
+    /* Take initial guess as half of the input and first iteration */ 
+    out = ((q31_t) in >> 1u) + 0x3FFF; 
+ 
+    /* Calculation of reciprocal of out */ 
+    /* oneByOut contains reciprocal of out which is in 2.14 format  
+       and oneByOut should be upscaled by signBits */ 
+    sign_bits = arm_recip_q15((q15_t) out, &oneByOut, (q15_t*)armRecipTableQ15); 
+ 
+    /* 0.5 * (out) */ 
+    out = out >> 1u; 
+    /* prevOut = 0.5 * out + (in * (oneByOut << signBits))) */ 
+    prevOut = out + (((q15_t) (((q31_t) in * oneByOut) >> 16)) << sign_bits); 
+ 
+    /* Third iteration */ 
+    sign_bits = arm_recip_q15((q15_t) prevOut, &oneByOut, (q15_t*)armRecipTableQ15); 
+    prevOut = prevOut >> 1u; 
+    out = prevOut + (((q15_t) (((q31_t) in * oneByOut) >> 16)) << sign_bits); 
+ 
+    sign_bits = arm_recip_q15((q15_t) out, &oneByOut, (q15_t*)armRecipTableQ15); 
+    out = out >> 1u; 
+    prevOut = out + (((q15_t) (((q31_t) in * oneByOut) >> 16)) << sign_bits); 
+ 
+    /* Fifth iteration */ 
+    sign_bits = arm_recip_q15((q15_t) prevOut, &oneByOut, (q15_t*)armRecipTableQ15); 
+    prevOut = prevOut >> 1u; 
+    out = prevOut + (((q15_t) (((q31_t) in * oneByOut) >> 16)) << sign_bits); 
+ 
+    sign_bits = arm_recip_q15((q15_t) out, &oneByOut, (q15_t*)armRecipTableQ15); 
+    out = out >> 1u; 
+    prevOut = out + (((q15_t) (((q31_t) in * oneByOut) >> 16)) << sign_bits); 
+ 
+    /* Seventh iteration */ 
+    sign_bits = arm_recip_q15((q15_t) prevOut, &oneByOut, (q15_t*)armRecipTableQ15); 
+    prevOut = prevOut >> 1u; 
+    out = prevOut + (((q15_t) (((q31_t) in * oneByOut) >> 16)) << sign_bits); 
+ 
+    sign_bits = arm_recip_q15((q15_t) out, &oneByOut, (q15_t*)armRecipTableQ15); 
+    out = out >> 1u; 
+    prevOut = out + (((q15_t) (((q31_t) in * oneByOut) >> 16)) << sign_bits); 
+ 
+    sign_bits = arm_recip_q15((q15_t) prevOut, &oneByOut, (q15_t*)armRecipTableQ15); 
+    prevOut = prevOut >> 1u; 
+    out = prevOut + (((q15_t) (((q31_t) in * oneByOut) >> 16)) << sign_bits); 
+ 
+    /* tenth iteration */ 
+    sign_bits = arm_recip_q15((q15_t) out, &oneByOut, (q15_t*)armRecipTableQ15); 
+    out = out >> 1u; 
+    *pOut = out + (((q15_t) (((q31_t) in * oneByOut) >> 16)) << sign_bits); 
+ 
+    return (ARM_MATH_SUCCESS); 
+  } 
+  else 
+  { 
+ 
+    *pOut = 0; 
+    return (ARM_MATH_ARGUMENT_ERROR); 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of SQRT group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FastMathFunctions/arm_sqrt_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,124 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_sqrt_q31.c  
+*  
+* Description:	Q31 square root function. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+#include "arm_common_tables.h" 
+ 
+/**  
+ * @ingroup groupFastMath  
+ */ 
+ 
+/**  
+ * @addtogroup SQRT  
+ * @{  
+ */ 
+ 
+/** 
+ * @brief Q31 square root function. 
+ * @param[in]   in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF. 
+ * @param[out]  *pOut square root of input value. 
+ * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if 
+ * <code>in</code> is negative value and returns zero output for negative values. 
+ */ 
+ 
+arm_status arm_sqrt_q31( 
+  q31_t in, 
+  q31_t * pOut) 
+{ 
+  q63_t out; 
+  q63_t prevOut; 
+  q31_t oneByOut; 
+  uint32_t signBits; 
+ 
+ 
+  if(in > 0) 
+  { 
+ 
+    /* run for ten iterations */ 
+ 
+    /* Take initial guess as half of the input and first iteration */ 
+    out = (in >> 1) + 0x3FFFFFFF; 
+ 
+    /* Calculation of reciprocal of out */ 
+    /* oneByOut contains reciprocal of out which is in 2.30 format  
+       and oneByOut should be upscaled by signBits */ 
+    signBits = arm_recip_q31((q31_t) out, &oneByOut, (q31_t*)armRecipTableQ31); 
+ 
+    /* 0.5 * (out) */ 
+    out = out >> 1u; 
+ 
+    /* prevOut = 0.5 * out + (in * (oneByOut << signBits))) */ 
+    prevOut = out + (((q31_t) (((q63_t) in * oneByOut) >> 32)) << signBits); 
+ 
+    /* Third iteration */ 
+    signBits = arm_recip_q31((q31_t) prevOut, &oneByOut, (q31_t*)armRecipTableQ31); 
+    prevOut = prevOut >> 1u; 
+    out = prevOut + (((q31_t) (((q63_t) in * oneByOut) >> 32)) << signBits); 
+ 
+    signBits = arm_recip_q31((q31_t) out, &oneByOut, (q31_t*)armRecipTableQ31); 
+    out = out >> 1u; 
+    prevOut = out + (((q31_t) (((q63_t) in * oneByOut) >> 32)) << signBits); 
+ 
+    /* Fifth iteration */ 
+    signBits = arm_recip_q31((q31_t) prevOut, &oneByOut, (q31_t*)armRecipTableQ31); 
+    prevOut = prevOut >> 1u; 
+    out = prevOut + (((q31_t) (((q63_t) in * oneByOut) >> 32)) << signBits); 
+ 
+    signBits = arm_recip_q31((q31_t) out, &oneByOut, (q31_t*)armRecipTableQ31); 
+    out = out >> 1u; 
+    prevOut = out + (((q31_t) (((q63_t) in * oneByOut) >> 32)) << signBits); 
+ 
+    /* Seventh iteration */ 
+    signBits = arm_recip_q31((q31_t) prevOut, &oneByOut, (q31_t*)armRecipTableQ31); 
+    prevOut = prevOut >> 1u; 
+    out = prevOut + (((q31_t) (((q63_t) in * oneByOut) >> 32)) << signBits); 
+ 
+    signBits = arm_recip_q31((q31_t) out, &oneByOut, (q31_t*)armRecipTableQ31); 
+    out = out >> 1u; 
+    prevOut = out + (((q31_t) (((q63_t) in * oneByOut) >> 32)) << signBits); 
+ 
+    signBits = arm_recip_q31((q31_t) prevOut, &oneByOut, (q31_t*)armRecipTableQ31); 
+    prevOut = prevOut >> 1u; 
+    out = prevOut + (((q31_t) (((q63_t) in * oneByOut) >> 32)) << signBits); 
+ 
+    /* tenth iteration */ 
+    signBits = arm_recip_q31((q31_t) out, &oneByOut, (q31_t*)armRecipTableQ31); 
+    out = out >> 1u; 
+    *pOut = out + (((q31_t) (((q63_t) in * oneByOut) >> 32)) << signBits); 
+ 
+    return (ARM_MATH_SUCCESS); 
+  } 
+  else 
+  { 
+    *pOut = 0; 
+    return (ARM_MATH_ARGUMENT_ERROR); 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of SQRT group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_32x64_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,99 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_biquad_cascade_df1_32x64_init_q31.c  
+*  
+* Description:	High precision Q31 Biquad cascade filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF1_32x64  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+ *  
+ * @param[in,out] *S           	points to an instance of the high precision Q31 Biquad cascade filter structure.  
+ * @param[in]     numStages     number of 2nd order stages in the filter.  
+ * @param[in]     *pCoeffs      points to the filter coefficients.  
+ * @param[in]     *pState       points to the state buffer.  
+ * @param[in]     postShift     Shift to be applied after the accumulator.  Varies according to the coefficients format.  
+ * @return        none  
+ *  
+ * <b>Coefficient and State Ordering:</b>  
+ *  
+ * \par  
+ * The coefficients are stored in the array <code>pCoeffs</code> in the following order:  
+ * <pre>  
+ *     {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}  
+ * </pre>  
+ * where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,  
+ * <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,  
+ * and so on.  The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.  
+ *  
+ * \par  
+ * The <code>pState</code> points to state variables array and size of each state variable is 1.63 format.  
+ * Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.  
+ * The state variables are arranged in the state array as:  
+ * <pre>  
+ *     {x[n-1], x[n-2], y[n-1], y[n-2]}  
+ * </pre>  
+ * The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.  
+ * The state array has a total length of <code>4*numStages</code> values.  
+ * The state variables are updated after each block of data is processed; the coefficients are untouched.  
+ */ 
+ 
+void arm_biquad_cas_df1_32x64_init_q31( 
+  arm_biquad_cas_df1_32x64_ins_q31 * S, 
+  uint8_t numStages, 
+  q31_t * pCoeffs, 
+  q63_t * pState, 
+  uint8_t postShift) 
+{ 
+  /* Assign filter stages */ 
+  S->numStages = numStages; 
+ 
+  /* Assign postShift to be applied to the output */ 
+  S->postShift = postShift; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always 4 * numStages */ 
+  memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q63_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+} 
+ 
+/**  
+ * @} end of BiquadCascadeDF1_32x64 group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_32x64_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,389 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_biquad_cascade_df1_32x64_q31.c  
+*  
+* Description:	High precision Q31 Biquad cascade filter processing function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup BiquadCascadeDF1_32x64 High Precision Q31 Biquad Cascade Filter  
+ *  
+ * This function implements a high precision Biquad cascade filter which operates on  
+ * Q31 data values.  The filter coefficients are in 1.31 format and the state variables  
+ * are in 1.63 format.  The double precision state variables reduce quantization noise  
+ * in the filter and provide a cleaner output.  
+ * These filters are particularly useful when implementing filters in which the  
+ * singularities are close to the unit circle.  This is common for low pass or high  
+ * pass filters with very low cutoff frequencies.  
+ *  
+ * The function operates on blocks of input and output data  
+ * and each call to the function processes <code>blockSize</code> samples through  
+ * the filter. <code>pSrc</code> and <code>pDst</code> points to input and output arrays  
+ * containing <code>blockSize</code> Q31 values.  
+ *  
+ * \par Algorithm  
+ * Each Biquad stage implements a second order filter using the difference equation:  
+ * <pre>  
+ *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]  
+ * </pre>  
+ * A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.  
+ * \image html Biquad.gif "Single Biquad filter stage"  
+ * Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.  
+ * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.  
+ * Pay careful attention to the sign of the feedback coefficients.  
+ * Some design tools use the difference equation  
+ * <pre>  
+ *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]  
+ * </pre>  
+ * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.  
+ *  
+ * \par  
+ * Higher order filters are realized as a cascade of second order sections.  
+ * <code>numStages</code> refers to the number of second order stages used.  
+ * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.  
+ * \image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"  
+ * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).  
+ *  
+ * \par  
+ * The <code>pState</code> points to state variables array .  
+ * Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code> and each state variable in 1.63 format to improve precision.  
+ * The state variables are arranged in the array as:  
+ * <pre>  
+ *     {x[n-1], x[n-2], y[n-1], y[n-2]}  
+ * </pre>  
+ *  
+ * \par  
+ * The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.  
+ * The state array has a total length of <code>4*numStages</code> values of data in 1.63 format.  
+ * The state variables are updated after each block of data is processed; the coefficients are untouched.  
+ *  
+ * \par Instance Structure  
+ * The coefficients and state variables for a filter are stored together in an instance data structure.  
+ * A separate instance structure must be defined for each filter.  
+ * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.  
+ *  
+ * \par Init Function  
+ * There is also an associated initialization function which performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Zeros out the values in the state buffer.  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * Set the values in the state buffer to zeros before static initialization.  
+ * For example, to statically initialize the filter instance structure use  
+ * <pre>  
+ *     arm_biquad_cas_df1_32x64_ins_q31 S1 = {numStages, pState, pCoeffs, postShift};  
+ * </pre>  
+ * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer;  
+ * <code>pCoeffs</code> is the address of the coefficient buffer; <code>postShift</code> shift to be applied which is described in detail below.  
+ * \par Fixed-Point Behavior  
+ * Care must be taken while using Biquad Cascade 32x64 filter function.  
+ * Following issues must be considered:  
+ * - Scaling of coefficients  
+ * - Filter gain  
+ * - Overflow and saturation  
+ *  
+ * \par  
+ * Filter coefficients are represented as fractional values and  
+ * restricted to lie in the range <code>[-1 +1)</code>.  
+ * The processing function has an additional scaling parameter <code>postShift</code>  
+ * which allows the filter coefficients to exceed the range <code>[+1 -1)</code>.  
+ * At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.  
+ * \image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"  
+ * This essentially scales the filter coefficients by <code>2^postShift</code>.  
+ * For example, to realize the coefficients  
+ * <pre>  
+ *    {1.5, -0.8, 1.2, 1.6, -0.9}  
+ * </pre>  
+ * set the Coefficient array to:  
+ * <pre>  
+ *    {0.75, -0.4, 0.6, 0.8, -0.45}  
+ * </pre>  
+ * and set <code>postShift=1</code>  
+ *  
+ * \par  
+ * The second thing to keep in mind is the gain through the filter.  
+ * The frequency response of a Biquad filter is a function of its coefficients.  
+ * It is possible for the gain through the filter to exceed 1.0 meaning that the filter increases the amplitude of certain frequencies.  
+ * This means that an input signal with amplitude < 1.0 may result in an output > 1.0 and these are saturated or overflowed based on the implementation of the filter.  
+ * To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0 or the input signal must be scaled down so that the combination of input and filter are never overflowed.  
+ *  
+ * \par  
+ * The third item to consider is the overflow and saturation behavior of the fixed-point Q31 version.  
+ * This is described in the function specific documentation below.  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF1_32x64  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+  
+ * @param[in]  *S points to an instance of the high precision Q31 Biquad cascade filter.  
+ * @param[in]  *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data.  
+ * @param[in]  blockSize number of samples to process.  
+ * @return none.  
+ *  
+ * \par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * Thus, if the accumulator result overflows it wraps around rather than clip.  
+ * In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).  
+ * After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to  
+ * 1.31 format by discarding the low 32 bits.  
+ *  
+ * \par  
+ * Two related functions are provided in the CMSIS DSP library.  
+ * <code>arm_biquad_cascade_df1_q31()</code> implements a Biquad cascade with 32-bit coefficients and state variables with a Q63 accumulator.  
+ * <code>arm_biquad_cascade_df1_fast_q31()</code> implements a Biquad cascade with 32-bit coefficients and state variables with a Q31 accumulator.  
+ */ 
+ 
+void arm_biquad_cas_df1_32x64_q31( 
+  const arm_biquad_cas_df1_32x64_ins_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pIn = pSrc;                             /*  input pointer initialization  */ 
+  q31_t *pOut = pDst;                            /*  output pointer initialization */ 
+  q63_t *pState = S->pState;                     /*  state pointer initialization  */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /*  coeff pointer initialization  */ 
+  q63_t acc;                                     /*  accumulator                   */ 
+  q63_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */ 
+  q31_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */ 
+  q63_t Xn;                                      /*  temporary input               */ 
+  int32_t shift = (int32_t) S->postShift + 1;    /*  Shift to be applied to the output */ 
+  uint32_t sample, stage = S->numStages;         /*  loop counters                     */ 
+ 
+ 
+  do 
+  { 
+    /* Reading the coefficients */ 
+    b0 = *pCoeffs++; 
+    b1 = *pCoeffs++; 
+    b2 = *pCoeffs++; 
+    a1 = *pCoeffs++; 
+    a2 = *pCoeffs++; 
+ 
+    /* Reading the state values */ 
+    Xn1 = pState[0]; 
+    Xn2 = pState[1]; 
+    Yn1 = pState[2]; 
+    Yn2 = pState[3]; 
+ 
+    /* Apply loop unrolling and compute 4 output values simultaneously. */ 
+    /* The variable acc hold output value that is being computed and  
+     * stored in the destination buffer  
+     * acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]  
+     */ 
+ 
+    sample = blockSize >> 2u; 
+ 
+    /* First part of the processing with loop unrolling. Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(sample > 0u) 
+    { 
+      /* Read the input */ 
+      Xn = *pIn++; 
+ 
+      /* The value is shifted to the MSB to perform 32x64 multiplication */ 
+      Xn = Xn << 32; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+ 
+      /* acc =  b0 * x[n] */ 
+      acc = mult32x64(Xn, b0); 
+      /* acc +=  b1 * x[n-1] */ 
+      acc += mult32x64(Xn1, b1); 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc += mult32x64(Xn2, b2); 
+      /* acc +=  a1 * y[n-1] */ 
+      acc += mult32x64(Yn1, a1); 
+      /* acc +=  a2 * y[n-2] */ 
+      acc += mult32x64(Yn2, a2); 
+ 
+      /* The result is converted to 1.63 , Yn2 variable is reused */ 
+      Yn2 = acc << shift; 
+ 
+      /* Store the output in the destination buffer in 1.31 format. */ 
+      *pOut++ = (q31_t) (acc >> (32 - shift)); 
+ 
+      /* Read the second input into Xn2, to reuse the value */ 
+      Xn2 = *pIn++; 
+ 
+      /* The value is shifted to the MSB to perform 32x64 multiplication */ 
+      Xn2 = Xn2 << 32; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+ 
+      /* acc =  b0 * x[n] */ 
+      acc = mult32x64(Xn2, b0); 
+      /* acc +=  b1 * x[n-1] */ 
+      acc += mult32x64(Xn, b1); 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc += mult32x64(Xn1, b2); 
+      /* acc +=  a1 * y[n-1] */ 
+      acc += mult32x64(Yn2, a1); 
+      /* acc +=  a2 * y[n-2] */ 
+      acc += mult32x64(Yn1, a2); 
+ 
+      /* The result is converted to 1.63, Yn1 variable is reused */ 
+      Yn1 = acc << shift; 
+ 
+      /* The result is converted to 1.31 */ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = (q31_t) (acc >> (32 - shift)); 
+ 
+      /* Read the third input into Xn1, to reuse the value */ 
+      Xn1 = *pIn++; 
+ 
+      /* The value is shifted to the MSB to perform 32x64 multiplication */ 
+      Xn1 = Xn1 << 32; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      /* acc =  b0 * x[n] */ 
+      acc = mult32x64(Xn1, b0); 
+      /* acc +=  b1 * x[n-1] */ 
+      acc += mult32x64(Xn2, b1); 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc += mult32x64(Xn, b2); 
+      /* acc +=  a1 * y[n-1] */ 
+      acc += mult32x64(Yn1, a1); 
+      /* acc +=  a2 * y[n-2] */ 
+      acc += mult32x64(Yn2, a2); 
+ 
+      /* The result is converted to 1.63, Yn2 variable is reused  */ 
+      Yn2 = acc << shift; 
+ 
+      /* Store the output in the destination buffer in 1.31 format. */ 
+      *pOut++ = (q31_t) (acc >> (32 - shift)); 
+ 
+      /* Read the fourth input into Xn, to reuse the value */ 
+      Xn = *pIn++; 
+ 
+      /* The value is shifted to the MSB to perform 32x64 multiplication */ 
+      Xn = Xn << 32; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      /* acc =  b0 * x[n] */ 
+      acc = mult32x64(Xn, b0); 
+      /* acc +=  b1 * x[n-1] */ 
+      acc += mult32x64(Xn1, b1); 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc += mult32x64(Xn2, b2); 
+      /* acc +=  a1 * y[n-1] */ 
+      acc += mult32x64(Yn2, a1); 
+      /* acc +=  a2 * y[n-2] */ 
+      acc += mult32x64(Yn1, a2); 
+ 
+      /* The result is converted to 1.63, Yn1 variable is reused  */ 
+      Yn1 = acc << shift; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc    */ 
+      Xn2 = Xn1; 
+      Xn1 = Xn; 
+ 
+      /* Store the output in the destination buffer in 1.31 format. */ 
+      *pOut++ = (q31_t) (acc >> (32 - shift)); 
+ 
+      /* decrement the loop counter */ 
+      sample--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    sample = (blockSize & 0x3u); 
+ 
+    while(sample > 0u) 
+    { 
+      /* Read the input */ 
+      Xn = *pIn++; 
+ 
+      /* The value is shifted to the MSB to perform 32x64 multiplication */ 
+      Xn = Xn << 32; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      /* acc =  b0 * x[n] */ 
+      acc = mult32x64(Xn, b0); 
+      /* acc +=  b1 * x[n-1] */ 
+      acc += mult32x64(Xn1, b1); 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc += mult32x64(Xn2, b2); 
+      /* acc +=  a1 * y[n-1] */ 
+      acc += mult32x64(Yn1, a1); 
+      /* acc +=  a2 * y[n-2] */ 
+      acc += mult32x64(Yn2, a2); 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc    */ 
+      Xn2 = Xn1; 
+      Xn1 = Xn; 
+      Yn2 = Yn1; 
+      Yn1 = acc << shift; 
+ 
+      /* Store the output in the destination buffer in 1.31 format. */ 
+      *pOut++ = (q31_t) (acc >> (32 - shift)); 
+ 
+      /* decrement the loop counter */ 
+      sample--; 
+    } 
+ 
+    /*  The first stage output is given as input to the second stage. */ 
+    pIn = pDst; 
+ 
+    /* Reset to destination buffer working pointer */ 
+    pOut = pDst; 
+ 
+    /*  Store the updated state variables back into the pState array */ 
+    *pState++ = Xn1; 
+    *pState++ = Xn2; 
+    *pState++ = Yn1; 
+    *pState++ = Yn2; 
+ 
+  } while(--stage); 
+} 
+ 
+  /**  
+   * @} end of BiquadCascadeDF1_32x64 group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,339 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_biquad_cascade_df1_f32.c  
+*  
+* Description:	Processing function for the  
+*               floating-point Biquad cascade DirectFormI(DF1) filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup BiquadCascadeDF1 Biquad Cascade IIR Filters Using Direct Form I Structure  
+ *  
+ * This set of functions implements arbitrary order recursive (IIR) filters.  
+ * The filters are implemented as a cascade of second order Biquad sections.  
+ * The functions support Q15, Q31 and floating-point data types. Fast version of Q15 and Q31 also supported.  
+ *  
+ * The functions operate on blocks of input and output data and each call to the function  
+ * processes <code>blockSize</code> samples through the filter.  
+ * <code>pSrc</code> points to the array of input data and  
+ * <code>pDst</code> points to the array of output data.  
+ * Both arrays contain <code>blockSize</code> values.  
+ *  
+ * \par Algorithm  
+ * Each Biquad stage implements a second order filter using the difference equation:  
+ * <pre>  
+ *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]  
+ * </pre>  
+ * A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.  
+ * \image html Biquad.gif "Single Biquad filter stage"  
+ * Coefficients <code>b0, b1 and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.  
+ * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.  
+ * Pay careful attention to the sign of the feedback coefficients.  
+ * Some design tools use the difference equation  
+ * <pre>  
+ *     y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]  
+ * </pre>  
+ * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.  
+ *  
+ * \par  
+ * Higher order filters are realized as a cascade of second order sections.  
+ * <code>numStages</code> refers to the number of second order stages used.  
+ * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.  
+ * \image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"  
+ * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).  
+ *  
+ * \par  
+ * The <code>pState</code> points to state variables array.  
+ * Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.  
+ * The state variables are arranged in the <code>pState</code> array as:  
+ * <pre>  
+ *     {x[n-1], x[n-2], y[n-1], y[n-2]}  
+ * </pre>  
+ *  
+ * \par  
+ * The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.  
+ * The state array has a total length of <code>4*numStages</code> values.  
+ * The state variables are updated after each block of data is processed, the coefficients are untouched.  
+ *  
+ * \par Instance Structure  
+ * The coefficients and state variables for a filter are stored together in an instance data structure.  
+ * A separate instance structure must be defined for each filter.  
+ * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.  
+ * There are separate instance structure declarations for each of the 3 supported data types.  
+ *  
+ * \par Init Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Zeros out the values in the state buffer.  
+ *  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * Set the values in the state buffer to zeros before static initialization.  
+ * The code below statically initializes each of the 3 different data type filter instance structures  
+ * <pre>  
+ *     arm_biquad_casd_df1_inst_f32 S1 = {numStages, pState, pCoeffs};  
+ *     arm_biquad_casd_df1_inst_q15 S2 = {numStages, pState, pCoeffs, postShift};  
+ *     arm_biquad_casd_df1_inst_q31 S3 = {numStages, pState, pCoeffs, postShift};  
+ * </pre>  
+ * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer;  
+ * <code>pCoeffs</code> is the address of the coefficient buffer; <code>postShift</code> shift to be applied.  
+ *  
+ * \par Fixed-Point Behavior  
+ * Care must be taken when using the Q15 and Q31 versions of the Biquad Cascade filter functions.  
+ * Following issues must be considered:  
+ * - Scaling of coefficients  
+ * - Filter gain  
+ * - Overflow and saturation  
+ *  
+ * \par  
+ * <b>Scaling of coefficients: </b>  
+ * Filter coefficients are represented as fractional values and  
+ * coefficients are restricted to lie in the range <code>[-1 +1)</code>.  
+ * The fixed-point functions have an additional scaling parameter <code>postShift</code>  
+ * which allow the filter coefficients to exceed the range <code>[+1 -1)</code>.  
+ * At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.  
+ * \image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"  
+ * This essentially scales the filter coefficients by <code>2^postShift</code>.  
+ * For example, to realize the coefficients  
+ * <pre>  
+ *    {1.5, -0.8, 1.2, 1.6, -0.9}  
+ * </pre>  
+ * set the pCoeffs array to:  
+ * <pre>  
+ *    {0.75, -0.4, 0.6, 0.8, -0.45}  
+ * </pre>  
+ * and set <code>postShift=1</code>  
+ *  
+ * \par  
+ * <b>Filter gain: </b>  
+ * The frequency response of a Biquad filter is a function of its coefficients.  
+ * It is possible for the gain through the filter to exceed 1.0 meaning that the filter increases the amplitude of certain frequencies.  
+ * This means that an input signal with amplitude < 1.0 may result in an output > 1.0 and these are saturated or overflowed based on the implementation of the filter.  
+ * To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0 or the input signal must be scaled down so that the combination of input and filter are never overflowed.  
+ *  
+ * \par  
+ * <b>Overflow and saturation: </b>  
+ * For Q15 and Q31 versions, it is described separately as part of the function specific documentation below.  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF1  
+ * @{  
+ */ 
+ 
+/**  
+ * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.  
+ * @param[in]  *pSrc      points to the block of input data.  
+ * @param[out] *pDst      points to the block of output data.  
+ * @param[in]  blockSize  number of samples to process per call.  
+ * @return     none.  
+ *  
+ */ 
+ 
+void arm_biquad_cascade_df1_f32( 
+  const arm_biquad_casd_df1_inst_f32 * S, 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  float32_t *pIn = pSrc;                         /*  source pointer            */ 
+  float32_t *pOut = pDst;                        /*  destination pointer       */ 
+  float32_t *pState = S->pState;                 /*  pState pointer            */ 
+  float32_t *pCoeffs = S->pCoeffs;               /*  coefficient pointer       */ 
+  float32_t acc;                                 /*  Simulates the accumulator */ 
+  float32_t b0, b1, b2, a1, a2;                  /*  Filter coefficients       */ 
+  float32_t Xn1, Xn2, Yn1, Yn2;                  /*  Filter pState variables   */ 
+  float32_t Xn;                                  /*  temporary input           */ 
+  uint32_t sample, stage = S->numStages;         /*  loop counters             */ 
+ 
+ 
+  do 
+  { 
+    /* Reading the coefficients */ 
+    b0 = *pCoeffs++; 
+    b1 = *pCoeffs++; 
+    b2 = *pCoeffs++; 
+    a1 = *pCoeffs++; 
+    a2 = *pCoeffs++; 
+ 
+    /* Reading the pState values */ 
+    Xn1 = pState[0]; 
+    Xn2 = pState[1]; 
+    Yn1 = pState[2]; 
+    Yn2 = pState[3]; 
+ 
+    /* Apply loop unrolling and compute 4 output values simultaneously. */ 
+    /*      The variable acc hold output values that are being computed:  
+     *  
+     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]  
+     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]  
+     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]  
+     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1]   + a2 * y[n-2]  
+     */ 
+ 
+    sample = blockSize >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(sample > 0u) 
+    { 
+      /* Read the first input */ 
+      Xn = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      Yn2 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2); 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = Yn2; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc   */ 
+ 
+      /* Read the second input */ 
+      Xn2 = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      Yn1 = (b0 * Xn2) + (b1 * Xn) + (b2 * Xn1) + (a1 * Yn2) + (a2 * Yn1); 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = Yn1; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc   */ 
+ 
+      /* Read the third input */ 
+      Xn1 = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      Yn2 = (b0 * Xn1) + (b1 * Xn2) + (b2 * Xn) + (a1 * Yn1) + (a2 * Yn2); 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = Yn2; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as: */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc   */ 
+ 
+      /* Read the forth input */ 
+      Xn = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      Yn1 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn2) + (a2 * Yn1); 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = Yn1; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc   */ 
+      Xn2 = Xn1; 
+      Xn1 = Xn; 
+ 
+      /* decrement the loop counter */ 
+      sample--; 
+ 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    sample = blockSize & 0x3u; 
+ 
+    while(sample > 0u) 
+    { 
+      /* Read the input */ 
+      Xn = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2); 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = acc; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:    */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc   */ 
+      Xn2 = Xn1; 
+      Xn1 = Xn; 
+      Yn2 = Yn1; 
+      Yn1 = acc; 
+ 
+      /* decrement the loop counter */ 
+      sample--; 
+ 
+    } 
+ 
+    /*  Store the updated state variables back into the pState array */ 
+    *pState++ = Xn1; 
+    *pState++ = Xn2; 
+    *pState++ = Yn1; 
+    *pState++ = Yn2; 
+ 
+    /*  The first stage goes from the input wire to the output wire. */ 
+    /*  Subsequent numStages occur in-place in the output wire */ 
+    pIn = pDst; 
+ 
+    /* Reset the output pointer */ 
+    pOut = pDst; 
+ 
+    /* decrement the loop counter */ 
+    stage--; 
+ 
+  } while(stage > 0u); 
+ 
+} 
+ 
+ 
+  /**  
+   * @} end of BiquadCascadeDF1 group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_fast_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,228 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_biquad_cascade_df1_fast_q15.c  
+*  
+* Description:	Fast processing function for the  
+*				Q15 Biquad cascade filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.9  2010/08/16   
+*    Initial version  
+*  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF1  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+ * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.  
+ * @param[in]  *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data.  
+ * @param[in]  blockSize number of samples to process per call.  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * This fast version uses a 32-bit accumulator with 2.30 format.  
+ * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * Thus, if the accumulator result overflows it wraps around and distorts the result.  
+ * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25).  
+ * The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits.  
+ *  
+ * \par  
+ * Refer to the function <code>arm_biquad_cascade_df1_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion.  Both the slow and the fast versions use the same instance structure.  
+ * Use the function <code>arm_biquad_cascade_df1_init_q15()</code> to initialize the filter structure.  
+ *  
+ */ 
+ 
+void arm_biquad_cascade_df1_fast_q15( 
+  const arm_biquad_casd_df1_inst_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pIn = pSrc;                             /*  Source pointer                               */ 
+  q15_t *pOut = pDst;                            /*  Destination pointer                          */ 
+  q31_t in;                                      /*  Temporary variable to hold input value       */ 
+  q31_t out;                                     /*  Temporary variable to hold output value      */ 
+  q15_t b0; 
+  q31_t b1, a1;                                  /*  Filter coefficients                          */ 
+  q31_t state_in, state_out;                     /*  Filter state variables                       */ 
+  q31_t acc0;                                    /*  Accumulator                                  */ 
+  int32_t shift = (int32_t) (15 - S->postShift); /*  Post shift                                   */ 
+  q15_t *pState = S->pState;                     /*  State pointer                                */ 
+  q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */ 
+  q31_t *pState_q31;                             /*  32-bit state pointer for SIMD implementation */ 
+  uint32_t sample, stage = S->numStages;         /*  Stage loop counter                           */ 
+ 
+ 
+ 
+  do 
+  { 
+    /* Initialize state pointer of type q31 */ 
+    pState_q31 = (q31_t *) (pState); 
+ 
+    /* Read the b0 and 0 coefficients using SIMD  */ 
+    b0 = *__SIMD32(pCoeffs)++; 
+ 
+    /* Read the b1 and b2 coefficients using SIMD */ 
+    b1 = *__SIMD32(pCoeffs)++; 
+ 
+    /* Read the a1 and a2 coefficients using SIMD */ 
+    a1 = *__SIMD32(pCoeffs)++; 
+ 
+    /* Read the input state values from the state buffer:  x[n-1], x[n-2] */ 
+    state_in = (q31_t) (*pState_q31++); 
+ 
+    /* Read the output state values from the state buffer:  y[n-1], y[n-2] */ 
+    state_out = (q31_t) (*pState_q31); 
+ 
+    /* Apply loop unrolling and compute 2 output values simultaneously. */ 
+    /*      The variables acc0 ... acc3 hold output values that are being computed:  
+     *  
+     *    acc0 =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]  
+     *    acc0 =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]  
+     */ 
+    sample = blockSize >> 1u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.  
+     ** a second loop below computes the remaining 1 sample. */ 
+    while(sample > 0u) 
+    { 
+ 
+      /* Read the input */ 
+      in = *__SIMD32(pIn)++; 
+ 
+      /* out =  b0 * x[n] + 0 * 0 */ 
+      out = (q31_t) b0 * ((q15_t) in); 
+      /* acc0 =  b1 * x[n-1] + acc0 +=  b2 * x[n-2] + out */ 
+      acc0 = __SMLAD(b1, state_in, out); 
+      /* acc0 +=  a1 * y[n-1] + acc0 +=  a2 * y[n-2] */ 
+      acc0 = __SMLAD(a1, state_out, acc0); 
+ 
+      /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 
+      out = __SSAT((acc0 >> shift), 16); 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc0   */ 
+      /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 
+      /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 
+      state_in = __PKHBT(in, state_in, 16); 
+      state_out = __PKHBT(out, state_out, 16); 
+ 
+      /* out =  b0 * x[n] + 0 * 0 */ 
+      out = (q31_t) b0 *((q15_t)(in >> 16)); 
+      /* acc0 =  b1 * x[n-1] + acc0 +=  b2 * x[n-2] + out */ 
+      acc0 = __SMLAD(b1, state_in, out); 
+      /* acc0 +=  a1 * y[n-1] + acc0 +=  a2 * y[n-2] */ 
+      acc0 = __SMLAD(a1, state_out, acc0); 
+ 
+      /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 
+      out = __SSAT((acc0 >> shift), 16); 
+ 
+      /* Store the output in the destination buffer. */ 
+      *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16); 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc0   */ 
+      /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 
+      /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 
+      state_in = __PKHBT(in >> 16, state_in, 16); 
+      state_out = __PKHBT(out, state_out, 16); 
+ 
+      /* Decrement the loop counter */ 
+      sample--; 
+ 
+    } 
+ 
+    /* If the blockSize is not a multiple of 2, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+ 
+    if((blockSize & 0x1u) != 0u) 
+    { 
+      /* Read the input */ 
+      in = *pIn++; 
+ 
+      /* out =  b0 * x[n] + 0 * 0 */ 
+      out = (q31_t) in *b0; 
+      /* acc0 =  b1 * x[n-1] + acc0 +=  b2 * x[n-2] + out */ 
+      acc0 = __SMLAD(b1, state_in, out); 
+      /* acc0 +=  a1 * y[n-1] + acc0 +=  a2 * y[n-2] */ 
+      acc0 = __SMLAD(a1, state_out, acc0); 
+ 
+      /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 
+      out = __SSAT((acc0 >> shift), 16); 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = (q15_t) out; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc0   */ 
+      /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 
+      /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 
+      state_in = __PKHBT(in, state_in, 16); 
+      state_out = __PKHBT(out, state_out, 16); 
+ 
+    } 
+ 
+    /*  The first stage goes from the input buffer to the output buffer.  */ 
+    /*  Subsequent (numStages - 1) occur in-place in the output buffer  */ 
+    pIn = pDst; 
+ 
+    /* Reset the output pointer */ 
+    pOut = pDst; 
+ 
+    /*  Store the updated state variables back into the state array */ 
+    *__SIMD32(pState)++ = __PKHBT(state_in, (state_in >> 16), 16); 
+    *__SIMD32(pState)++ = __PKHBT(state_out, (state_out >> 16), 16); 
+ 
+    /* Decrement the loop counter */ 
+    stage--; 
+ 
+  } while(stage > 0u); 
+} 
+ 
+ 
+/**  
+ * @} end of BiquadCascadeDF1 group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_fast_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,268 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_biquad_cascade_df1_fast_q31.c  
+*  
+* Description:	Processing function for the  
+*				Q31 Fast Biquad cascade DirectFormI(DF1) filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.9  2010/08/27   
+*    Initial version  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF1  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+ *  
+ * @param[in]  *S        points to an instance of the Q31 Biquad cascade structure.  
+ * @param[in]  *pSrc     points to the block of input data.  
+ * @param[out] *pDst     points to the block of output data.  
+ * @param[in]  blockSize number of samples to process per call.  
+ * @return 	   none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * This function is optimized for speed at the expense of fixed-point precision and overflow protection.  
+ * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.  
+ * These intermediate results are added to a 2.30 accumulator.  
+ * Finally, the accumulator is saturated and converted to a 1.31 result.  
+ * The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result.  
+ * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). Use the intialization function  
+ * arm_biquad_cascade_df1_init_q31() to initialize filter structure.  
+ *  
+ * \par  
+ * Refer to the function <code>arm_biquad_cascade_df1_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision.  Both the slow and the fast versions use the same instance structure.  
+ * Use the function <code>arm_biquad_cascade_df1_init_q31()</code> to initialize the filter structure.  
+ */ 
+ 
+void arm_biquad_cascade_df1_fast_q31( 
+  const arm_biquad_casd_df1_inst_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pIn = pSrc;                             /*  input pointer initialization  */ 
+  q31_t *pOut = pDst;                            /*  output pointer initialization */ 
+  q31_t *pState = S->pState;                     /*  pState pointer initialization */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /*  coeff pointer initialization  */ 
+  q31_t acc;                                     /*  accumulator                   */ 
+  q31_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */ 
+  q31_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */ 
+  q31_t Xn;                                      /*  temporary input               */ 
+  int32_t shift = (int32_t) S->postShift + 1;    /*  Shift to be applied to the output */ 
+  uint32_t sample, stage = S->numStages;         /*  loop counters                     */ 
+ 
+ 
+  do 
+  { 
+    /* Reading the coefficients */ 
+    b0 = *pCoeffs++; 
+    b1 = *pCoeffs++; 
+    b2 = *pCoeffs++; 
+    a1 = *pCoeffs++; 
+    a2 = *pCoeffs++; 
+ 
+    /* Reading the state values */ 
+    Xn1 = pState[0]; 
+    Xn2 = pState[1]; 
+    Yn1 = pState[2]; 
+    Yn2 = pState[3]; 
+ 
+    /* Apply loop unrolling and compute 4 output values simultaneously. */ 
+    /*      The variables acc ... acc3 hold output values that are being computed:  
+     *  
+     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]  
+     */ 
+ 
+    sample = blockSize >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(sample > 0u) 
+    { 
+      /* Read the input */ 
+      Xn = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      /* acc =  b0 * x[n] */ 
+      acc = (q31_t) (((q63_t) b0 * Xn) >> 32); 
+      /* acc +=  b1 * x[n-1] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32); 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32); 
+      /* acc +=  a1 * y[n-1] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32); 
+      /* acc +=  a2 * y[n-2] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32); 
+ 
+      /* The result is converted to 1.31 , Yn2 variable is reused */ 
+      Yn2 = acc << shift; 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = Yn2; 
+ 
+      /* Read the second input */ 
+      Xn2 = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      /* acc =  b0 * x[n] */ 
+      acc = (q31_t) (((q63_t) b0 * (Xn2)) >> 32); 
+      /* acc +=  b1 * x[n-1] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn))) >> 32); 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn1))) >> 32); 
+      /* acc +=  a1 * y[n-1] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32); 
+      /* acc +=  a2 * y[n-2] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32); 
+ 
+      /* The result is converted to 1.31, Yn1 variable is reused  */ 
+      Yn1 = acc << shift; 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = Yn1; 
+ 
+      /* Read the third input  */ 
+      Xn1 = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      /* acc =  b0 * x[n] */ 
+      acc = (q31_t) (((q63_t) b0 * (Xn1)) >> 32); 
+      /* acc +=  b1 * x[n-1] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn2))) >> 32); 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn))) >> 32); 
+      /* acc +=  a1 * y[n-1] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32); 
+      /* acc +=  a2 * y[n-2] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32); 
+ 
+      /* The result is converted to 1.31, Yn2 variable is reused  */ 
+      Yn2 = acc << shift; 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = Yn2; 
+ 
+      /* Read the forth input */ 
+      Xn = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      /* acc =  b0 * x[n] */ 
+      acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32); 
+      /* acc +=  b1 * x[n-1] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32); 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32); 
+      /* acc +=  a1 * y[n-1] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32); 
+      /* acc +=  a2 * y[n-2] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32); 
+ 
+      /* The result is converted to 1.31, Yn1 variable is reused  */ 
+      Yn1 = acc << shift; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc    */ 
+      Xn2 = Xn1; 
+      Xn1 = Xn; 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = Yn1; 
+ 
+      /* decrement the loop counter */ 
+      sample--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    sample = (blockSize & 0x3u); 
+ 
+    while(sample > 0u) 
+    { 
+      /* Read the input */ 
+      Xn = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+      /* acc =  b0 * x[n] */ 
+      acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32); 
+      /* acc +=  b1 * x[n-1] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32); 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32); 
+      /* acc +=  a1 * y[n-1] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32); 
+      /* acc +=  a2 * y[n-2] */ 
+      acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32); 
+      /* The result is converted to 1.31  */ 
+      acc = acc << shift; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc    */ 
+      Xn2 = Xn1; 
+      Xn1 = Xn; 
+      Yn2 = Yn1; 
+      Yn1 = acc; 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = acc; 
+ 
+      /* decrement the loop counter */ 
+      sample--; 
+    } 
+ 
+    /*  The first stage goes from the input buffer to the output buffer. */ 
+    /*  Subsequent stages occur in-place in the output buffer */ 
+    pIn = pDst; 
+ 
+    /* Reset to destination pointer */ 
+    pOut = pDst; 
+ 
+    /*  Store the updated state variables back into the pState array */ 
+    *pState++ = Xn1; 
+    *pState++ = Xn2; 
+    *pState++ = Yn1; 
+    *pState++ = Yn2; 
+ 
+  } while(--stage); 
+} 
+ 
+/**  
+  * @} end of BiquadCascadeDF1 group  
+  */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,101 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_biquad_cascade_df1_init_f32.c  
+*  
+* Description:  floating-point Biquad cascade DirectFormI(DF1) filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF1  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+ * @brief  Initialization function for the floating-point Biquad cascade filter.  
+ * @param[in,out] *S           points to an instance of the floating-point Biquad cascade structure.  
+ * @param[in]     numStages    number of 2nd order stages in the filter.  
+ * @param[in]     *pCoeffs     points to the filter coefficients array.  
+ * @param[in]     *pState      points to the state array.  
+ * @return        none  
+ *  
+ *  
+ * <b>Coefficient and State Ordering:</b>  
+ *  
+ * \par  
+ * The coefficients are stored in the array <code>pCoeffs</code> in the following order:  
+ * <pre>  
+ *     {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}  
+ * </pre>  
+ *  
+ * \par  
+ * where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,  
+ * <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,  
+ * and so on.  The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.  
+ *  
+ * \par  
+ * The <code>pState</code> is a pointer to state array.  
+ * Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.  
+ * The state variables are arranged in the <code>pState</code> array as:  
+ * <pre>  
+ *     {x[n-1], x[n-2], y[n-1], y[n-2]}  
+ * </pre>  
+ * The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.  
+ * The state array has a total length of <code>4*numStages</code> values.  
+ * The state variables are updated after each block of data is processed; the coefficients are untouched.  
+ *  
+ */ 
+ 
+void arm_biquad_cascade_df1_init_f32( 
+  arm_biquad_casd_df1_inst_f32 * S, 
+  uint8_t numStages, 
+  float32_t * pCoeffs, 
+  float32_t * pState) 
+{ 
+  /* Assign filter stages */ 
+  S->numStages = numStages; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always 4 * numStages */ 
+  memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(float32_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+} 
+ 
+/**  
+ * @} end of BiquadCascadeDF1 group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,103 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_biquad_cascade_df1_init_q15.c  
+*  
+* Description:  Q15 Biquad cascade DirectFormI(DF1) filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF1  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+ *  
+ * @param[in,out] *S           points to an instance of the Q15 Biquad cascade structure.  
+ * @param[in]     numStages    number of 2nd order stages in the filter.  
+ * @param[in]     *pCoeffs     points to the filter coefficients.  
+ * @param[in]     *pState      points to the state buffer.  
+ * @param[in]     postShift    Shift to be applied to the accumulator result. Varies according to the coefficients format  
+ * @return        none  
+ *  
+ * <b>Coefficient and State Ordering:</b>  
+ *  
+ * \par  
+ * The coefficients are stored in the array <code>pCoeffs</code> in the following order:  
+ * <pre>  
+ *     {b10, 0, b11, b12, a11, a12, b20, 0, b21, b22, a21, a22, ...}  
+ * </pre>  
+ * where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,  
+ * <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,  
+ * and so on.  The <code>pCoeffs</code> array contains a total of <code>6*numStages</code> values.  
+ * The zero coefficient between <code>b1</code> and <code>b2</code> facilities  use of 16-bit SIMD instructions on the Cortex-M4.  
+ *  
+ * \par  
+ * The state variables are stored in the array <code>pState</code>.  
+ * Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.  
+ * The state variables are arranged in the <code>pState</code> array as:  
+ * <pre>  
+ *     {x[n-1], x[n-2], y[n-1], y[n-2]}  
+ * </pre>  
+ * The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.  
+ * The state array has a total length of <code>4*numStages</code> values.  
+ * The state variables are updated after each block of data is processed; the coefficients are untouched.  
+ */ 
+ 
+void arm_biquad_cascade_df1_init_q15( 
+  arm_biquad_casd_df1_inst_q15 * S, 
+  uint8_t numStages, 
+  q15_t * pCoeffs, 
+  q15_t * pState, 
+  int8_t postShift) 
+{ 
+  /* Assign filter stages */ 
+  S->numStages = numStages; 
+ 
+  /* Assign postShift to be applied to the output */ 
+  S->postShift = postShift; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always 4 * numStages */ 
+  memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q15_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+} 
+ 
+/**  
+ * @} end of BiquadCascadeDF1 group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,103 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_biquad_cascade_df1_init_q31.c  
+*  
+* Description:	Q31 Biquad cascade DirectFormI(DF1) filter initialization function.  
+*  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF1  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+ *  
+ * @param[in,out] *S           points to an instance of the Q31 Biquad cascade structure.  
+ * @param[in]     numStages    number of 2nd order stages in the filter.  
+ * @param[in]     *pCoeffs     points to the filter coefficients buffer.  
+ * @param[in]     *pState      points to the state buffer.  
+ * @param[in]     postShift    Shift to be applied after the accumulator.  Varies according to the coefficients format  
+ * @return        none  
+ *  
+ * <b>Coefficient and State Ordering:</b>  
+ *  
+ * \par  
+ * The coefficients are stored in the array <code>pCoeffs</code> in the following order:  
+ * <pre>  
+ *     {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}  
+ * </pre>  
+ * where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,  
+ * <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,  
+ * and so on.  The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.  
+ *  
+ * \par  
+ * The <code>pState</code> points to state variables array.  
+ * Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.  
+ * The state variables are arranged in the <code>pState</code> array as:  
+ * <pre>  
+ *     {x[n-1], x[n-2], y[n-1], y[n-2]}  
+ * </pre>  
+ * The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.  
+ * The state array has a total length of <code>4*numStages</code> values.  
+ * The state variables are updated after each block of data is processed; the coefficients are untouched.  
+ */ 
+ 
+void arm_biquad_cascade_df1_init_q31( 
+  arm_biquad_casd_df1_inst_q31 * S, 
+  uint8_t numStages, 
+  q31_t * pCoeffs, 
+  q31_t * pState, 
+  int8_t postShift) 
+{ 
+  /* Assign filter stages */ 
+  S->numStages = numStages; 
+ 
+  /* Assign postShift to be applied to the output */ 
+  S->postShift = postShift; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always 4 * numStages */ 
+  memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q31_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+} 
+ 
+/**  
+ * @} end of BiquadCascadeDF1 group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,229 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_biquad_cascade_df1_q15.c  
+*  
+* Description:	Processing function for the  
+*				Q15 Biquad cascade DirectFormI(DF1) filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF1  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q15 Biquad cascade filter.  
+ * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.  
+ * @param[in]  *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the location where the output result is written.  
+ * @param[in]  blockSize number of samples to process per call.  
+ * @return none.  
+ *  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.  
+ * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.  
+ * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
+ * The accumulator is then shifted by <code>postShift</code> bits to truncate the result to 1.15 format by discarding the low 16 bits.  
+ * Finally, the result is saturated to 1.15 format.  
+ *  
+ * \par  
+ * Refer to the function <code>arm_biquad_cascade_df1_fast_q15()</code> for a faster but less precise implementation of this filter.  
+ */ 
+ 
+void arm_biquad_cascade_df1_q15( 
+  const arm_biquad_casd_df1_inst_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pIn = pSrc;                             /*  Source pointer                               */ 
+  q15_t *pOut = pDst;                            /*  Destination pointer                          */ 
+  q31_t in;                                      /*  Temporary variable to hold input value       */ 
+  q31_t out;                                     /*  Temporary variable to hold output value      */ 
+  q15_t b0;                                      /*  Temporary variable to hold bo value          */ 
+  q31_t b1, a1;                                  /*  Filter coefficients                          */ 
+  q31_t state_in, state_out;                     /*  Filter state variables                       */ 
+  q63_t acc;                                     /*  Accumulator                                  */ 
+  int32_t shift = (15 - (int32_t) S->postShift); /*  Post shift                                   */ 
+  q15_t *pState = S->pState;                     /*  State pointer                                */ 
+  q15_t *pCoeffs = S->pCoeffs;                   /*  Coefficient pointer                          */ 
+  q31_t *pState_q31;                             /*  32-bit state pointer for SIMD implementation */ 
+  uint32_t sample, stage = (uint32_t) S->numStages;     /*  Stage loop counter                           */ 
+ 
+ 
+ 
+  do 
+  { 
+    /* Initialize state pointer of type q31 */ 
+    pState_q31 = (q31_t *) (pState); 
+ 
+    /* Read the b0 and 0 coefficients using SIMD  */ 
+    b0 = *__SIMD32(pCoeffs)++; 
+ 
+    /* Read the b1 and b2 coefficients using SIMD */ 
+    b1 = *__SIMD32(pCoeffs)++; 
+ 
+    /* Read the a1 and a2 coefficients using SIMD */ 
+    a1 = *__SIMD32(pCoeffs)++; 
+ 
+    /* Read the input state values from the state buffer:  x[n-1], x[n-2] */ 
+    state_in = (q31_t) (*pState_q31++); 
+ 
+    /* Read the output state values from the state buffer:  y[n-1], y[n-2] */ 
+    state_out = (q31_t) (*pState_q31); 
+ 
+    /* Apply loop unrolling and compute 2 output values simultaneously. */ 
+    /*      The variable acc hold output values that are being computed:  
+     *  
+     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]  
+     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]  
+     */ 
+    sample = blockSize >> 1u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.  
+     ** a second loop below computes the remaining 1 sample. */ 
+    while(sample > 0u) 
+    { 
+ 
+      /* Read the input */ 
+      in = *__SIMD32(pIn)++; 
+ 
+      /* out =  b0 * x[n] + 0 * 0 */ 
+      out = (q31_t) b0 * ((q15_t) in); 
+      /* acc +=  b1 * x[n-1] +  b2 * x[n-2] + out */ 
+      acc = __SMLALD(b1, state_in, out); 
+      /* acc +=  a1 * y[n-1] +  a2 * y[n-2] */ 
+      acc = __SMLALD(a1, state_out, acc); 
+ 
+      /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 
+      out = __SSAT((acc >> shift), 16); 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc   */ 
+      /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 
+      /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 
+      state_in = __PKHBT(in, state_in, 16); 
+      state_out = __PKHBT(out, state_out, 16); 
+ 
+      /* out =  b0 * x[n] + 0 * 0 */ 
+      out = (q31_t) b0 * ((q15_t) (in >> 16)); 
+      /* acc +=  b1 * x[n-1] +  b2 * x[n-2] + out */ 
+      acc = __SMLALD(b1, state_in, out); 
+      /* acc +=  a1 * y[n-1] + a2 * y[n-2] */ 
+      acc = __SMLALD(a1, state_out, acc); 
+ 
+      /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 
+      out = __SSAT((acc >> shift), 16); 
+ 
+      /* Store the output in the destination buffer. */ 
+      *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16); 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc   */ 
+      /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 
+      /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 
+      state_in = __PKHBT(in >> 16, state_in, 16); 
+      state_out = __PKHBT(out, state_out, 16); 
+ 
+      /* Decrement the loop counter */ 
+      sample--; 
+ 
+    } 
+ 
+    /* If the blockSize is not a multiple of 2, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+ 
+    if((blockSize & 0x1u) != 0u) 
+    { 
+      /* Read the input */ 
+      in = *pIn++; 
+ 
+      /* out =  b0 * x[n] + 0 * 0 */ 
+      out = (q31_t) in *b0; 
+      /* acc =  b1 * x[n-1] + b2 * x[n-2] + out */ 
+      acc = __SMLALD(b1, state_in, out); 
+      /* acc +=  a1 * y[n-1] + a2 * y[n-2] */ 
+      acc = __SMLALD(a1, state_out, acc); 
+ 
+      /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 
+      out = __SSAT((acc >> shift), 16); 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = (q15_t) out; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc   */ 
+      /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 
+      /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 
+      state_in = __PKHBT(in, state_in, 16); 
+      state_out = __PKHBT(out, state_out, 16); 
+ 
+    } 
+ 
+    /*  The first stage goes from the input wire to the output wire.  */ 
+    /*  Subsequent numStages occur in-place in the output wire  */ 
+    pIn = pDst; 
+ 
+    /* Reset the output pointer */ 
+    pOut = pDst; 
+ 
+    /*  Store the updated state variables back into the state array */ 
+    *__SIMD32(pState)++ = __PKHBT(state_in, (state_in >> 16), 16); 
+    *__SIMD32(pState)++ = __PKHBT(state_out, (state_out >> 16), 16); 
+ 
+    /* Decrement the loop counter */ 
+    stage--; 
+ 
+  } while(stage > 0u); 
+} 
+ 
+ 
+/**  
+ * @} end of BiquadCascadeDF1 group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df1_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,274 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_biquad_cascade_df1_q31.c  
+*  
+* Description:	Processing function for the  
+*				Q31 Biquad cascade filter  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF1  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q31 Biquad cascade filter.  
+ * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.  
+ * @param[in]  *pSrc      points to the block of input data.  
+ * @param[out] *pDst      points to the block of output data.  
+ * @param[in]  blockSize  number of samples to process per call.  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * Thus, if the accumulator result overflows it wraps around rather than clip.  
+ * In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).  
+ * After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to  
+ * 1.31 format by discarding the low 32 bits.  
+ *  
+ * \par  
+ * Refer to the function <code>arm_biquad_cascade_df1_fast_q31()</code> for a faster but less precise implementation of this filter.  
+ */ 
+ 
+void arm_biquad_cascade_df1_q31( 
+  const arm_biquad_casd_df1_inst_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pIn = pSrc;                             /*  input pointer initialization  */ 
+  q31_t *pOut = pDst;                            /*  output pointer initialization */ 
+  q31_t *pState = S->pState;                     /*  pState pointer initialization */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /*  coeff pointer initialization  */ 
+  q63_t acc;                                     /*  accumulator                   */ 
+  q31_t Xn1, Xn2, Yn1, Yn2;                      /*  Filter state variables        */ 
+  q31_t b0, b1, b2, a1, a2;                      /*  Filter coefficients           */ 
+  q31_t Xn;                                      /*  temporary input               */ 
+  uint32_t shift = 32u - ((uint32_t) S->postShift + 1u);        /*  Shift to be applied to the output */ 
+  uint32_t sample, stage = S->numStages;         /*  loop counters                     */ 
+ 
+ 
+  do 
+  { 
+    /* Reading the coefficients */ 
+    b0 = *pCoeffs++; 
+    b1 = *pCoeffs++; 
+    b2 = *pCoeffs++; 
+    a1 = *pCoeffs++; 
+    a2 = *pCoeffs++; 
+ 
+    /* Reading the state values */ 
+    Xn1 = pState[0]; 
+    Xn2 = pState[1]; 
+    Yn1 = pState[2]; 
+    Yn2 = pState[3]; 
+ 
+    /* Apply loop unrolling and compute 4 output values simultaneously. */ 
+    /*      The variable acc hold output values that are being computed:  
+     *  
+     *    acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]  
+     */ 
+ 
+    sample = blockSize >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(sample > 0u) 
+    { 
+      /* Read the input */ 
+      Xn = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+ 
+      /* acc =  b0 * x[n] */ 
+      acc = (q63_t) b0 *Xn; 
+      /* acc +=  b1 * x[n-1] */ 
+      acc += (q63_t) b1 *Xn1; 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc += (q63_t) b2 *Xn2; 
+      /* acc +=  a1 * y[n-1] */ 
+      acc += (q63_t) a1 *Yn1; 
+      /* acc +=  a2 * y[n-2] */ 
+      acc += (q63_t) a2 *Yn2; 
+ 
+      /* The result is converted to 1.31 , Yn2 variable is reused */ 
+      Yn2 = (q31_t) (acc >> shift); 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = Yn2; 
+ 
+      /* Read the second input */ 
+      Xn2 = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+ 
+      /* acc =  b0 * x[n] */ 
+      acc = (q63_t) b0 *Xn2; 
+      /* acc +=  b1 * x[n-1] */ 
+      acc += (q63_t) b1 *Xn; 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc += (q63_t) b2 *Xn1; 
+      /* acc +=  a1 * y[n-1] */ 
+      acc += (q63_t) a1 *Yn2; 
+      /* acc +=  a2 * y[n-2] */ 
+      acc += (q63_t) a2 *Yn1; 
+ 
+ 
+      /* The result is converted to 1.31, Yn1 variable is reused  */ 
+      Yn1 = (q31_t) (acc >> shift); 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = Yn1; 
+ 
+      /* Read the third input  */ 
+      Xn1 = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+ 
+      /* acc =  b0 * x[n] */ 
+      acc = (q63_t) b0 *Xn1; 
+      /* acc +=  b1 * x[n-1] */ 
+      acc += (q63_t) b1 *Xn2; 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc += (q63_t) b2 *Xn; 
+      /* acc +=  a1 * y[n-1] */ 
+      acc += (q63_t) a1 *Yn1; 
+      /* acc +=  a2 * y[n-2] */ 
+      acc += (q63_t) a2 *Yn2; 
+ 
+      /* The result is converted to 1.31, Yn2 variable is reused  */ 
+      Yn2 = (q31_t) (acc >> shift); 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = Yn2; 
+ 
+      /* Read the forth input */ 
+      Xn = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+ 
+      /* acc =  b0 * x[n] */ 
+      acc = (q63_t) b0 *Xn; 
+      /* acc +=  b1 * x[n-1] */ 
+      acc += (q63_t) b1 *Xn1; 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc += (q63_t) b2 *Xn2; 
+      /* acc +=  a1 * y[n-1] */ 
+      acc += (q63_t) a1 *Yn2; 
+      /* acc +=  a2 * y[n-2] */ 
+      acc += (q63_t) a2 *Yn1; 
+ 
+      /* The result is converted to 1.31, Yn1 variable is reused  */ 
+      Yn1 = (q31_t) (acc >> shift); 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc    */ 
+      Xn2 = Xn1; 
+      Xn1 = Xn; 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = Yn1; 
+ 
+      /* decrement the loop counter */ 
+      sample--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    sample = (blockSize & 0x3u); 
+ 
+    while(sample > 0u) 
+    { 
+      /* Read the input */ 
+      Xn = *pIn++; 
+ 
+      /* acc =  b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 
+ 
+      /* acc =  b0 * x[n] */ 
+      acc = (q63_t) b0 *Xn; 
+      /* acc +=  b1 * x[n-1] */ 
+      acc += (q63_t) b1 *Xn1; 
+      /* acc +=  b[2] * x[n-2] */ 
+      acc += (q63_t) b2 *Xn2; 
+      /* acc +=  a1 * y[n-1] */ 
+      acc += (q63_t) a1 *Yn1; 
+      /* acc +=  a2 * y[n-2] */ 
+      acc += (q63_t) a2 *Yn2; 
+ 
+      /* The result is converted to 1.31  */ 
+      acc = acc >> shift; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* The states should be updated as:  */ 
+      /* Xn2 = Xn1    */ 
+      /* Xn1 = Xn     */ 
+      /* Yn2 = Yn1    */ 
+      /* Yn1 = acc    */ 
+      Xn2 = Xn1; 
+      Xn1 = Xn; 
+      Yn2 = Yn1; 
+      Yn1 = (q31_t) acc; 
+ 
+      /* Store the output in the destination buffer. */ 
+      *pOut++ = (q31_t) acc; 
+ 
+      /* decrement the loop counter */ 
+      sample--; 
+    } 
+ 
+    /*  The first stage goes from the input buffer to the output buffer. */ 
+    /*  Subsequent stages occur in-place in the output buffer */ 
+    pIn = pDst; 
+ 
+    /* Reset to destination pointer */ 
+    pOut = pDst; 
+ 
+    /*  Store the updated state variables back into the pState array */ 
+    *pState++ = Xn1; 
+    *pState++ = Xn2; 
+    *pState++ = Yn1; 
+    *pState++ = Yn2; 
+ 
+  } while(--stage); 
+} 
+ 
+/**  
+  * @} end of BiquadCascadeDF1 group  
+  */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df2T_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,294 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_biquad_cascade_df2T_f32.c  
+*  
+* Description:  Processing function for the floating-point transposed  
+*               direct form II Biquad cascade filter. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup BiquadCascadeDF2T Biquad Cascade IIR Filters Using a Direct Form II Transposed Structure  
+ *  
+ * This set of functions implements arbitrary order recursive (IIR) filters using a transposed direct form II structure.  
+ * The filters are implemented as a cascade of second order Biquad sections.  
+ * These functions provide a slight memory savings as compared to the direct form I Biquad filter functions. 
+ * Only floating-point data is supported.  
+ *  
+ * This function operate on blocks of input and output data and each call to the function  
+ * processes <code>blockSize</code> samples through the filter.  
+ * <code>pSrc</code> points to the array of input data and  
+ * <code>pDst</code> points to the array of output data.  
+ * Both arrays contain <code>blockSize</code> values.  
+ *  
+ * \par Algorithm  
+ * Each Biquad stage implements a second order filter using the difference equation:  
+ * <pre>  
+ *    y[n] = b0 * x[n] + d1  
+ *    d1 = b1 * x[n] + a1 * y[n] + d2  
+ *    d2 = b2 * x[n] + a2 * y[n]  
+ * </pre>  
+ * where d1 and d2 represent the two state values.  
+ *  
+ * \par  
+ * A Biquad filter using a transposed Direct Form II structure is shown below.  
+ * \image html BiquadDF2Transposed.gif "Single transposed Direct Form II Biquad"  
+ * Coefficients <code>b0, b1, and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.  
+ * Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.  
+ * Pay careful attention to the sign of the feedback coefficients.  
+ * Some design tools flip the sign of the feedback coefficients:  
+ * <pre>  
+ *    y[n] = b0 * x[n] + d1;  
+ *    d1 = b1 * x[n] - a1 * y[n] + d2;  
+ *    d2 = b2 * x[n] - a2 * y[n];  
+ * </pre>  
+ * In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.  
+ *  
+ * \par  
+ * Higher order filters are realized as a cascade of second order sections.  
+ * <code>numStages</code> refers to the number of second order stages used.  
+ * For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.  
+ * A 9th order filter would be realized with <code>numStages=5</code> second order stages with the  
+ * coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).  
+ *  
+ * \par  
+ * <code>pState</code> points to the state variable array.  
+ * Each Biquad stage has 2 state variables <code>d1</code> and <code>d2</code>.  
+ * The state variables are arranged in the <code>pState</code> array as:  
+ * <pre>  
+ *     {d11, d12, d21, d22, ...}  
+ * </pre>  
+ * where <code>d1x</code> refers to the state variables for the first Biquad and  
+ * <code>d2x</code> refers to the state variables for the second Biquad.  
+ * The state array has a total length of <code>2*numStages</code> values.  
+ * The state variables are updated after each block of data is processed; the coefficients are untouched.  
+ *  
+ * \par  
+ * The CMSIS library contains Biquad filters in both Direct Form I and transposed Direct Form II.  
+ * The advantage of the Direct Form I structure is that it is numerically more robust for fixed-point data types.  
+ * That is why the Direct Form I structure supports Q15 and Q31 data types.  
+ * The transposed Direct Form II structure, on the other hand, requires a wide dynamic range for the state variables <code>d1</code> and <code>d2</code>.  
+ * Because of this, the CMSIS library only has a floating-point version of the Direct Form II Biquad.  
+ * The advantage of the Direct Form II Biquad is that it requires half the number of state variables, 2 rather than 4, per Biquad stage.  
+ *  
+ * \par Instance Structure  
+ * The coefficients and state variables for a filter are stored together in an instance data structure.  
+ * A separate instance structure must be defined for each filter.  
+ * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.  
+ *  
+ * \par Init Functions  
+ * There is also an associated initialization function. 
+ * The initialization function performs following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Zeros out the values in the state buffer.  
+ *  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * Set the values in the state buffer to zeros before static initialization.  
+ * For example, to statically initialize the instance structure use  
+ * <pre>  
+ *     arm_biquad_cascade_df2T_instance_f32 S1 = {numStages, pState, pCoeffs};  
+ * </pre>  
+ * where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer.  
+ * <code>pCoeffs</code> is the address of the coefficient buffer;   
+ *  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF2T  
+ * @{  
+ */ 
+ 
+/** 
+ * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 
+ * @param[in]  *S        points to an instance of the filter data structure. 
+ * @param[in]  *pSrc     points to the block of input data. 
+ * @param[out] *pDst     points to the block of output data 
+ * @param[in]  blockSize number of samples to process. 
+ * @return none. 
+ */ 
+ 
+void arm_biquad_cascade_df2T_f32( 
+  const arm_biquad_cascade_df2T_instance_f32 * S, 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+ 
+  float32_t *pIn = pSrc;                         /*  source pointer            */ 
+  float32_t *pOut = pDst;                        /*  destination pointer       */ 
+  float32_t *pState = S->pState;                 /*  State pointer            */ 
+  float32_t *pCoeffs = S->pCoeffs;               /*  coefficient pointer       */ 
+  float32_t acc0;                                /*  Simulates the accumulator */ 
+  float32_t b0, b1, b2, a1, a2;                  /*  Filter coefficients       */ 
+  float32_t Xn;                                  /*  temporary input           */ 
+  float32_t d1, d2;                              /*  state variables          */ 
+  uint32_t sample, stage = S->numStages;         /*  loop counters             */ 
+ 
+ 
+  do 
+  { 
+    /* Reading the coefficients */ 
+    b0 = *pCoeffs++; 
+    b1 = *pCoeffs++; 
+    b2 = *pCoeffs++; 
+    a1 = *pCoeffs++; 
+    a2 = *pCoeffs++; 
+ 
+    /*Reading the state values */ 
+    d1 = pState[0]; 
+    d2 = pState[1]; 
+ 
+    /* Apply loop unrolling and compute 4 output values simultaneously. */ 
+    sample = blockSize >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(sample > 0u) 
+    { 
+      /* Read the first input */ 
+      Xn = *pIn++; 
+ 
+      /* y[n] = b0 * x[n] + d1 */ 
+      acc0 = (b0 * Xn) + d1; 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = acc0; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* d1 = b1 * x[n] + a1 * y[n] + d2 */ 
+      d1 = ((b1 * Xn) + (a1 * acc0)) + d2; 
+ 
+      /* d2 = b2 * x[n] + a2 * y[n] */ 
+      d2 = (b2 * Xn) + (a2 * acc0); 
+ 
+      /* Read the second input */ 
+      Xn = *pIn++; 
+ 
+      /* y[n] = b0 * x[n] + d1 */ 
+      acc0 = (b0 * Xn) + d1; 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = acc0; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* d1 = b1 * x[n] + a1 * y[n] + d2 */ 
+      d1 = ((b1 * Xn) + (a1 * acc0)) + d2; 
+ 
+      /* d2 = b2 * x[n] + a2 * y[n] */ 
+      d2 = (b2 * Xn) + (a2 * acc0); 
+ 
+      /* Read the third input */ 
+      Xn = *pIn++; 
+ 
+      /* y[n] = b0 * x[n] + d1 */ 
+      acc0 = (b0 * Xn) + d1; 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = acc0; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* d1 = b1 * x[n] + a1 * y[n] + d2 */ 
+      d1 = ((b1 * Xn) + (a1 * acc0)) + d2; 
+ 
+      /* d2 = b2 * x[n] + a2 * y[n] */ 
+      d2 = (b2 * Xn) + (a2 * acc0); 
+ 
+      /* Read the fourth input */ 
+      Xn = *pIn++; 
+ 
+      /* y[n] = b0 * x[n] + d1 */ 
+      acc0 = (b0 * Xn) + d1; 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = acc0; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* d1 = b1 * x[n] + a1 * y[n] + d2 */ 
+      d1 = (b1 * Xn) + (a1 * acc0) + d2; 
+ 
+      /* d2 = b2 * x[n] + a2 * y[n] */ 
+      d2 = (b2 * Xn) + (a2 * acc0); 
+ 
+      /* decrement the loop counter */ 
+      sample--; 
+ 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    sample = blockSize & 0x3u; 
+ 
+    while(sample > 0u) 
+    { 
+      /* Read the input */ 
+      Xn = *pIn++; 
+ 
+      /* y[n] = b0 * x[n] + d1 */ 
+      acc0 = (b0 * Xn) + d1; 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = acc0; 
+ 
+      /* Every time after the output is computed state should be updated. */ 
+      /* d1 = b1 * x[n] + a1 * y[n] + d2 */ 
+      d1 = ((b1 * Xn) + (a1 * acc0)) + d2; 
+ 
+      /* d2 = b2 * x[n] + a2 * y[n] */ 
+      d2 = (b2 * Xn) + (a2 * acc0); 
+ 
+      /* decrement the loop counter */ 
+      sample--; 
+    } 
+ 
+    /* Store the updated state variables back into the state array */ 
+    *pState++ = d1; 
+    *pState++ = d2; 
+ 
+    /* The current stage input is given as the output to the next stage */ 
+    pIn = pDst; 
+ 
+    /*Reset the output working pointer */ 
+    pOut = pDst; 
+ 
+    /* decrement the loop counter */ 
+    stage--; 
+ 
+  } while(stage > 0u); 
+ 
+ 
+} 
+ 
+ 
+  /**  
+   * @} end of BiquadCascadeDF2T group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_biquad_cascade_df2T_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,91 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_biquad_cascade_df2T_init_f32.c  
+*  
+* Description:  Initialization function for the floating-point transposed 
+*               direct form II Biquad cascade filter. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup BiquadCascadeDF2T  
+ * @{  
+ */ 
+ 
+/** 
+ * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter. 
+ * @param[in,out] *S           points to an instance of the filter data structure. 
+ * @param[in]     numStages    number of 2nd order stages in the filter. 
+ * @param[in]     *pCoeffs     points to the filter coefficients. 
+ * @param[in]     *pState      points to the state buffer. 
+ * @return        none 
+ *  
+ * <b>Coefficient and State Ordering:</b>  
+ * \par  
+ * The coefficients are stored in the array <code>pCoeffs</code> in the following order:  
+ * <pre>  
+ *     {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}  
+ * </pre>  
+ *  
+ * \par  
+ * where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,  
+ * <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,  
+ * and so on.  The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.  
+ *  
+ * \par  
+ * The <code>pState</code> is a pointer to state array.  
+ * Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.  
+ * The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.  
+ * The state array has a total length of <code>2*numStages</code> values.  
+ * The state variables are updated after each block of data is processed; the coefficients are untouched.  
+ */ 
+ 
+void arm_biquad_cascade_df2T_init_f32( 
+  arm_biquad_cascade_df2T_instance_f32 * S, 
+  uint8_t numStages, 
+  float32_t * pCoeffs, 
+  float32_t * pState) 
+{ 
+  /* Assign filter stages */ 
+  S->numStages = numStages; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always 2 * numStages */ 
+  memset(pState, 0, (2u * (uint32_t) numStages) * sizeof(float32_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+} 
+ 
+/**  
+ * @} end of BiquadCascadeDF2T group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,584 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_f32.c  
+*  
+* Description:	Convolution of floating-point sequences.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup Conv Convolution  
+ *  
+ * Convolution is a mathematical operation that operates on two finite length vectors to generate a finite length output vector.  
+ * Convolution is similar to correlation and is frequently used in filtering and data analysis.  
+ * The CMSIS DSP library contains functions for convolving Q7, Q15, Q31, and floating-point data types.  
+ * The library also provides fast versions of the Q15 and Q31 functions.  
+ *  
+ * \par Algorithm  
+ * Let <code>a[n]</code> and <code>b[n]</code> be sequences of length <code>srcALen</code> and <code>srcBLen</code> samples respectively.  
+ * Then the convolution  
+ *  
+ * <pre>  
+ *                   c[n] = a[n] * b[n]  
+ * </pre>  
+ *  
+ * \par  
+ * is defined as  
+ * \image html ConvolutionEquation.gif  
+ * \par  
+ * Note that <code>c[n]</code> is of length <code>srcALen + srcBLen - 1</code> and is defined over the interval <code>n=0, 1, 2, ..., srcALen + srcBLen - 2</code>.  
+ * <code>pSrcA</code> points to the first input vector of length <code>srcALen</code> and  
+ * <code>pSrcB</code> points to the second input vector of length <code>srcBLen</code>.  
+ * The output result is written to <code>pDst</code> and the calling function must allocate <code>srcALen+srcBLen-1</code> words for the result.  
+ *  
+ * \par  
+ * Conceptually, when two signals <code>a[n]</code> and <code>b[n]</code> are convolved,  
+ * the signal <code>b[n]</code> slides over <code>a[n]</code>.  
+ * For each offset \c n, the overlapping portions of a[n] and b[n] are multiplied and summed together.  
+ *  
+ * \par  
+ * Note that convolution is a commutative operation:  
+ *  
+ * <pre>  
+ *                   a[n] * b[n] = b[n] * a[n].  
+ * </pre>  
+ *  
+ * \par  
+ * This means that switching the A and B arguments to the convolution functions has no effect.  
+ *  
+ * <b>Fixed-Point Behavior</b>  
+ *  
+ * \par  
+ * Convolution requires summing up a large number of intermediate products.  
+ * As such, the Q7, Q15, and Q31 functions run a risk of overflow and saturation.  
+ * Refer to the function specific documentation below for further details of the particular algorithm used.  
+ */ 
+ 
+/**  
+ * @addtogroup Conv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Convolution of floating-point sequences.  
+ * @param[in] *pSrcA points to the first input sequence.  
+ * @param[in] srcALen length of the first input sequence.  
+ * @param[in] *pSrcB points to the second input sequence.  
+ * @param[in] srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.  
+ * @return none.  
+ */ 
+ 
+void arm_conv_f32( 
+  float32_t * pSrcA, 
+  uint32_t srcALen, 
+  float32_t * pSrcB, 
+  uint32_t srcBLen, 
+  float32_t * pDst) 
+{ 
+  float32_t *pIn1;                               /* inputA pointer */ 
+  float32_t *pIn2;                               /* inputB pointer */ 
+  float32_t *pOut = pDst;                        /* output pointer */ 
+  float32_t *px;                                 /* Intermediate inputA pointer */ 
+  float32_t *py;                                 /* Intermediate inputB pointer */ 
+  float32_t *pSrc1, *pSrc2;                      /* Intermediate pointers */ 
+  float32_t sum, acc0, acc1, acc2, acc3;         /* Accumulator */ 
+  float32_t x0, x1, x2, x3, c0;                  /* Temporary variables to hold state and coefficient values */ 
+  uint32_t j, k, count, blkCnt, blockSize1, blockSize2, blockSize3;     /* loop counters */ 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcA; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcB; 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcB; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcA; 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+  } 
+ 
+  /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+  /* The function is internally  
+   * divided into three stages according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first stage of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+   * In the third stage of the algorithm, the multiplications decrease by one  
+   * for every iteration. */ 
+ 
+  /* The algorithm is implemented in three stages.  
+     The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = srcALen - (srcBLen - 1u); 
+  blockSize3 = blockSize1; 
+ 
+  /* --------------------------  
+   * initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[0]  
+   * sum = x[0] * y[1] + x[1] * y[0]  
+   * ....  
+   * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* The first stage starts here */ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0.0f; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[0] * y[srcBLen - 1] */ 
+      sum += *px++ * *py--; 
+ 
+      /* x[1] * y[srcBLen - 2] */ 
+      sum += *px++ * *py--; 
+ 
+      /* x[2] * y[srcBLen - 3] */ 
+      sum += *px++ * *py--; 
+ 
+      /* x[3] * y[srcBLen - 4] */ 
+      sum += *px++ * *py--; 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      sum += *px++ * *py--; 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = sum; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pIn2 + count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+   * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  py = pSrc2; 
+ 
+  /* count is index by which the pointer pIn1 to be incremented */ 
+  count = 1u; 
+ 
+  /* -------------------  
+   * Stage2 process  
+   * ------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4 */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0.0f; 
+      acc1 = 0.0f; 
+      acc2 = 0.0f; 
+      acc3 = 0.0f; 
+ 
+      /* read x[0], x[1], x[2] samples */ 
+      x0 = *(px++); 
+      x1 = *(px++); 
+      x2 = *(px++); 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read y[srcBLen - 1] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[3] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        /* acc0 +=  x[0] * y[srcBLen - 1] */ 
+        acc0 += x0 * c0; 
+ 
+        /* acc1 +=  x[1] * y[srcBLen - 1] */ 
+        acc1 += x1 * c0; 
+ 
+        /* acc2 +=  x[2] * y[srcBLen - 1] */ 
+        acc2 += x2 * c0; 
+ 
+        /* acc3 +=  x[3] * y[srcBLen - 1] */ 
+        acc3 += x3 * c0; 
+ 
+        /* Read y[srcBLen - 2] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[4] sample */ 
+        x0 = *(px++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        /* acc0 +=  x[1] * y[srcBLen - 2] */ 
+        acc0 += x1 * c0; 
+        /* acc1 +=  x[2] * y[srcBLen - 2] */ 
+        acc1 += x2 * c0; 
+        /* acc2 +=  x[3] * y[srcBLen - 2] */ 
+        acc2 += x3 * c0; 
+        /* acc3 +=  x[4] * y[srcBLen - 2] */ 
+        acc3 += x0 * c0; 
+ 
+        /* Read y[srcBLen - 3] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[5] sample */ 
+        x1 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[2] * y[srcBLen - 3] */ 
+        acc0 += x2 * c0; 
+        /* acc1 +=  x[3] * y[srcBLen - 2] */ 
+        acc1 += x3 * c0; 
+        /* acc2 +=  x[4] * y[srcBLen - 2] */ 
+        acc2 += x0 * c0; 
+        /* acc3 +=  x[5] * y[srcBLen - 2] */ 
+        acc3 += x1 * c0; 
+ 
+        /* Read y[srcBLen - 4] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[6] sample */ 
+        x2 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[3] * y[srcBLen - 4] */ 
+        acc0 += x3 * c0; 
+        /* acc1 +=  x[4] * y[srcBLen - 4] */ 
+        acc1 += x0 * c0; 
+        /* acc2 +=  x[5] * y[srcBLen - 4] */ 
+        acc2 += x1 * c0; 
+        /* acc3 +=  x[6] * y[srcBLen - 4] */ 
+        acc3 += x2 * c0; 
+ 
+ 
+      } while(--k); 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Read y[srcBLen - 5] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[7] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[4] * y[srcBLen - 5] */ 
+        acc0 += x0 * c0; 
+        /* acc1 +=  x[5] * y[srcBLen - 5] */ 
+        acc1 += x1 * c0; 
+        /* acc2 +=  x[6] * y[srcBLen - 5] */ 
+        acc2 += x2 * c0; 
+        /* acc3 +=  x[7] * y[srcBLen - 5] */ 
+        acc3 += x3 * c0; 
+ 
+        /* Reuse the present samples for the next MAC */ 
+        x0 = x1; 
+        x1 = x2; 
+        x2 = x3; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = acc0; 
+      *pOut++ = acc1; 
+      *pOut++ = acc2; 
+      *pOut++ = acc3; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + (count * 4u); 
+      py = pSrc2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0.0f; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += *px++ * *py--; 
+        sum += *px++ * *py--; 
+        sum += *px++ * *py--; 
+        sum += *px++ * *py--; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += *px++ * *py--; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = sum; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0.0f; 
+ 
+      /* srcBLen number of MACS should be performed */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += *px++ * *py--; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = sum; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+   * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+   * sum +=  x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The blockSize3 variable holds the number of MAC operations performed */ 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  py = pSrc2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0.0f; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = blockSize3 >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */ 
+      sum += *px++ * *py--; 
+ 
+      /* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */ 
+      sum += *px++ * *py--; 
+ 
+      /* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */ 
+      sum += *px++ * *py--; 
+ 
+      /* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */ 
+      sum += *px++ * *py--; 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = blockSize3 % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* sum +=  x[srcALen-1] * y[srcBLen-1] */ 
+      sum += *px++ * *py--; 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = sum; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pSrc2; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Conv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_fast_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,651 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_fast_q15.c  
+*  
+* Description:	Fast Q15 Convolution.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup Conv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Convolution of Q15 sequences (fast version).  
+ * @param[in] *pSrcA points to the first input sequence.  
+ * @param[in] srcALen length of the first input sequence.  
+ * @param[in] *pSrcB points to the second input sequence.  
+ * @param[in] srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * This fast version uses a 32-bit accumulator with 2.30 format.  
+ * The accumulator maintains full precision of the intermediate multiplication results  
+ * but provides only a single guard bit. There is no saturation on intermediate additions.  
+ * Thus, if the accumulator overflows it wraps around and distorts the result.  
+ * The input signals should be scaled down to avoid intermediate overflows.  
+ * Scale down the inputs by log2(min(srcALen, srcBLen)) (log2 is read as log to the base 2) times to avoid overflows,  
+ * as maximum of min(srcALen, srcBLen) number of additions are carried internally.  
+ * The 2.30 accumulator is right shifted by 15 bits and then saturated to 1.15 format to yield the final result.  
+ *  
+ * \par  
+ * See <code>arm_conv_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion.  
+ */ 
+ 
+void arm_conv_fast_q15( 
+  q15_t * pSrcA, 
+  uint32_t srcALen, 
+  q15_t * pSrcB, 
+  uint32_t srcBLen, 
+  q15_t * pDst) 
+{ 
+  q15_t *pIn1;                                   /* inputA pointer */ 
+  q15_t *pIn2;                                   /* inputB pointer */ 
+  q15_t *pOut = pDst;                            /* output pointer */ 
+  q31_t sum, acc0, acc1, acc2, acc3;             /* Accumulator */ 
+  q15_t *px;                                     /* Intermediate inputA pointer  */ 
+  q15_t *py;                                     /* Intermediate inputB pointer  */ 
+  q15_t *pSrc1, *pSrc2;                          /* Intermediate pointers */ 
+  q31_t x0, x1, x2, x3, c0;                      /* Temporary variables to hold state and coefficient values */ 
+  uint32_t blockSize1, blockSize2, blockSize3, j, k, count, blkCnt;     /* loop counter */ 
+  q31_t *pb;                                     /* 32 bit pointer for inputB buffer */ 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcA; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcB; 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcB; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcA; 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+  } 
+ 
+  /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+  /* The function is internally  
+   * divided into three stages according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first stage of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+   * In the third stage of the algorithm, the multiplications decrease by one  
+   * for every iteration. */ 
+ 
+  /* The algorithm is implemented in three stages.  
+     The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = srcALen - (srcBLen - 1u); 
+  blockSize3 = blockSize1; 
+ 
+  /* --------------------------  
+   * Initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[0]  
+   * sum = x[0] * y[1] + x[1] * y[0]  
+   * ....  
+   * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* For loop unrolling by 4, this stage is divided into two. */ 
+  /* First part of this stage computes the MAC operations less than 4 */ 
+  /* Second part of this stage computes the MAC operations greater than or equal to 4 */ 
+ 
+  /* The first part of the stage starts here */ 
+  while((count < 4u) && (blockSize1 > 0u)) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Loop over number of MAC operations between  
+     * inputA samples and inputB samples */ 
+    k = count; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum = __SMLAD(*px++, *py--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q15_t) (sum >> 15); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pIn2 + count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* The second part of the stage starts here */ 
+  /* The internal loop, over count, is unrolled by 4 */ 
+  /* To, read the last two inputB samples using SIMD:  
+   * y[srcBLen] and y[srcBLen-1] coefficients, py is decremented by 1 */ 
+  py = py - 1; 
+ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* x[0], x[1] are multiplied with y[srcBLen - 1], y[srcBLen - 2] respectively */ 
+      sum = __SMLADX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+      /* x[2], x[3] are multiplied with y[srcBLen - 3], y[srcBLen - 4] respectively */ 
+      sum = __SMLADX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* For the next MAC operations, the pointer py is used without SIMD  
+     * So, py is incremented by 1 */ 
+    py = py + 1u; 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum = __SMLAD(*px++, *py--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q15_t) (sum >> 15); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pIn2 + (count - 1u); 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+   * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  py = pSrc2; 
+ 
+  /* Initialize inputB pointer of type q31 */ 
+  pb = (q31_t *) (py - 1u); 
+ 
+  /* count is the index by which the pointer pIn1 to be incremented */ 
+  count = 1u; 
+ 
+ 
+  /* --------------------  
+   * Stage2 process  
+   * -------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4 */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0; 
+      acc1 = 0; 
+      acc2 = 0; 
+      acc3 = 0; 
+ 
+ 
+      /* read x[0], x[1] samples */ 
+      x0 = *(q31_t *) (px++); 
+      /* read x[1], x[2] samples */ 
+      x1 = *(q31_t *) (px++); 
+ 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read the last two inputB samples using SIMD:  
+         * y[srcBLen - 1] and y[srcBLen - 2] */ 
+        c0 = *(pb--); 
+ 
+        /* acc0 +=  x[0] * y[srcBLen - 1] + x[1] * y[srcBLen - 2] */ 
+        acc0 = __SMLADX(x0, c0, acc0); 
+ 
+        /* acc1 +=  x[1] * y[srcBLen - 1] + x[2] * y[srcBLen - 2] */ 
+        acc1 = __SMLADX(x1, c0, acc1); 
+ 
+        /* Read x[2], x[3] */ 
+        x2 = *(q31_t *) (px++); 
+ 
+        /* Read x[3], x[4] */ 
+        x3 = *(q31_t *) (px++); 
+ 
+        /* acc2 +=  x[2] * y[srcBLen - 1] + x[3] * y[srcBLen - 2] */ 
+        acc2 = __SMLADX(x2, c0, acc2); 
+ 
+        /* acc3 +=  x[3] * y[srcBLen - 1] + x[4] * y[srcBLen - 2] */ 
+        acc3 = __SMLADX(x3, c0, acc3); 
+ 
+        /* Read y[srcBLen - 3] and y[srcBLen - 4] */ 
+        c0 = *(pb--); 
+ 
+        /* acc0 +=  x[2] * y[srcBLen - 3] + x[3] * y[srcBLen - 4] */ 
+        acc0 = __SMLADX(x2, c0, acc0); 
+ 
+        /* acc1 +=  x[3] * y[srcBLen - 3] + x[4] * y[srcBLen - 4] */ 
+        acc1 = __SMLADX(x3, c0, acc1); 
+ 
+        /* Read x[4], x[5] */ 
+        x0 = *(q31_t *) (px++); 
+ 
+        /* Read x[5], x[6] */ 
+        x1 = *(q31_t *) (px++); 
+ 
+        /* acc2 +=  x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */ 
+        acc2 = __SMLADX(x0, c0, acc2); 
+ 
+        /* acc3 +=  x[5] * y[srcBLen - 3] + x[6] * y[srcBLen - 4] */ 
+        acc3 = __SMLADX(x1, c0, acc3); 
+ 
+      } while(--k); 
+ 
+      /* For the next MAC operations, SIMD is not used  
+       * So, the 16 bit pointer if inputB, py is updated */ 
+      py = (q15_t *) pb; 
+      py = py + 1; 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      if(k == 1u) 
+      { 
+        /* Read y[srcBLen - 5] */ 
+        c0 = *(py); 
+ 
+        /* Read x[7] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLAD(x0, c0, acc0); 
+        acc1 = __SMLAD(x1, c0, acc1); 
+        acc2 = __SMLADX(x1, c0, acc2); 
+        acc3 = __SMLADX(x3, c0, acc3); 
+      } 
+ 
+      if(k == 2u) 
+      { 
+        /* Read y[srcBLen - 5], y[srcBLen - 6] */ 
+        c0 = *(pb); 
+ 
+        /* Read x[7], x[8] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Read x[9] */ 
+        x2 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLADX(x0, c0, acc0); 
+        acc1 = __SMLADX(x1, c0, acc1); 
+        acc2 = __SMLADX(x3, c0, acc2); 
+        acc3 = __SMLADX(x2, c0, acc3); 
+      } 
+ 
+      if(k == 3u) 
+      { 
+        /* Read y[srcBLen - 5], y[srcBLen - 6] */ 
+        c0 = *pb--; 
+ 
+        /* Read x[7], x[8] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Read x[9] */ 
+        x2 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLADX(x0, c0, acc0); 
+        acc1 = __SMLADX(x1, c0, acc1); 
+        acc2 = __SMLADX(x3, c0, acc2); 
+        acc3 = __SMLADX(x2, c0, acc3); 
+ 
+        /* Read y[srcBLen - 7] */ 
+        c0 = (q15_t) (*pb >> 16); 
+ 
+        /* Read x[10] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLADX(x1, c0, acc0); 
+        acc1 = __SMLAD(x2, c0, acc1); 
+        acc2 = __SMLADX(x2, c0, acc2); 
+        acc3 = __SMLADX(x3, c0, acc3); 
+      } 
+ 
+      /* Store the results in the accumulators in the destination buffer. */ 
+      *__SIMD32(pOut)++ = __PKHBT((acc0 >> 15), (acc1 >> 15), 16); 
+      *__SIMD32(pOut)++ = __PKHBT((acc2 >> 15), (acc3 >> 15), 16); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + (count * 4u); 
+      py = pSrc2; 
+      pb = (q31_t *) (py - 1); 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += ((q31_t) * px++ * *py--); 
+        sum += ((q31_t) * px++ * *py--); 
+        sum += ((q31_t) * px++ * *py--); 
+        sum += ((q31_t) * px++ * *py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += ((q31_t) * px++ * *py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (sum >> 15); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* srcBLen number of MACS should be performed */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += ((q31_t) * px++ * *py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (sum >> 15); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+   * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+   * sum +=  x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The blockSize3 variable holds the number of MAC operations performed */ 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  pIn2 = pSrc2 - 1u; 
+  py = pIn2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  /* For loop unrolling by 4, this stage is divided into two. */ 
+  /* First part of this stage computes the MAC operations greater than 4 */ 
+  /* Second part of this stage computes the MAC operations less than or equal to 4 */ 
+ 
+  /* The first part of the stage starts here */ 
+  j = blockSize3 >> 2u; 
+ 
+  while((j > 0u) && (blockSize3 > 0u)) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = blockSize3 >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[srcALen - srcBLen + 1], x[srcALen - srcBLen + 2] are multiplied  
+       * with y[srcBLen - 1], y[srcBLen - 2] respectively */ 
+      sum = __SMLADX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+      /* x[srcALen - srcBLen + 3], x[srcALen - srcBLen + 4] are multiplied  
+       * with y[srcBLen - 3], y[srcBLen - 4] respectively */ 
+      sum = __SMLADX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* For the next MAC operations, the pointer py is used without SIMD  
+     * So, py is incremented by 1 */ 
+    py = py + 1u; 
+ 
+    /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = blockSize3 % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* sum += x[srcALen - srcBLen + 5] * y[srcBLen - 5] */ 
+      sum = __SMLAD(*px++, *py--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q15_t) (sum >> 15); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pIn2; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+ 
+    j--; 
+  } 
+ 
+  /* The second part of the stage starts here */ 
+  /* SIMD is not used for the next MAC operations,  
+   * so pointer py is updated to read only one sample at a time */ 
+  py = py + 1u; 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = blockSize3; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* sum +=  x[srcALen-1] * y[srcBLen-1] */ 
+      sum = __SMLAD(*px++, *py--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q15_t) (sum >> 15); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pSrc2; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Conv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_fast_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,564 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_fast_q31.c  
+*  
+* Description:	Q31 Convolution (fast version).  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup Conv  
+ * @{  
+ */ 
+ 
+/**  
+ * @param[in] *pSrcA points to the first input sequence.  
+ * @param[in] srcALen length of the first input sequence.  
+ * @param[in] *pSrcB points to the second input sequence.  
+ * @param[in] srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * This function is optimized for speed at the expense of fixed-point precision and overflow protection.  
+ * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.  
+ * These intermediate results are accumulated in a 32-bit register in 2.30 format.  
+ * Finally, the accumulator is saturated and converted to a 1.31 result.  
+ *  
+ * \par  
+ * The fast version has the same overflow behavior as the standard version but provides less precision since it discards the low 32 bits of each multiplication result.  
+ * In order to avoid overflows completely the input signals must be scaled down.  
+ * Scale down the inputs by log2(min(srcALen, srcBLen)) (log2 is read as log to the base 2) times to avoid overflows,  
+ * as maximum of min(srcALen, srcBLen) number of additions are carried internally.  
+ *  
+ * \par  
+ * See <code>arm_conv_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision.  
+ */ 
+ 
+void arm_conv_fast_q31( 
+  q31_t * pSrcA, 
+  uint32_t srcALen, 
+  q31_t * pSrcB, 
+  uint32_t srcBLen, 
+  q31_t * pDst) 
+{ 
+  q31_t *pIn1;                                   /* inputA pointer */ 
+  q31_t *pIn2;                                   /* inputB pointer */ 
+  q31_t *pOut = pDst;                            /* output pointer */ 
+  q31_t *px;                                     /* Intermediate inputA pointer  */ 
+  q31_t *py;                                     /* Intermediate inputB pointer  */ 
+  q31_t *pSrc1, *pSrc2;                          /* Intermediate pointers */ 
+  q31_t sum, acc0, acc1, acc2, acc3;             /* Accumulator */ 
+  q31_t x0, x1, x2, x3, c0;                      /* Temporary variables to hold state and coefficient values */ 
+  uint32_t j, k, count, blkCnt, blockSize1, blockSize2, blockSize3;     /* loop counter */ 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcA; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcB; 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcB; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcA; 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+  } 
+ 
+  /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+  /* The function is internally  
+   * divided into three stages according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first stage of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+   * In the third stage of the algorithm, the multiplications decrease by one  
+   * for every iteration. */ 
+ 
+  /* The algorithm is implemented in three stages.  
+     The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = srcALen - (srcBLen - 1u); 
+  blockSize3 = blockSize1; 
+ 
+  /* --------------------------  
+   * Initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[0]  
+   * sum = x[0] * y[1] + x[1] * y[0]  
+   * ....  
+   * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* The first stage starts here */ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[0] * y[srcBLen - 1] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) +  
+		    ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+      /* x[1] * y[srcBLen - 2] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) +  
+		    ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+      /* x[2] * y[srcBLen - 3] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) +  
+		    ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+      /* x[3] * y[srcBLen - 4] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) +  
+		    ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+		    ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = sum << 1; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pIn2 + count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+   * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  py = pSrc2; 
+ 
+  /* count is index by which the pointer pIn1 to be incremented */ 
+  count = 1u; 
+ 
+  /* -------------------  
+   * Stage2 process  
+   * ------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4 */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0; 
+      acc1 = 0; 
+      acc2 = 0; 
+      acc3 = 0; 
+ 
+      /* read x[0], x[1], x[2] samples */ 
+      x0 = *(px++); 
+      x1 = *(px++); 
+      x2 = *(px++); 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read y[srcBLen - 1] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[3] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[0] * y[srcBLen - 1] */ 
+        acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32); 
+ 
+        /* acc1 +=  x[1] * y[srcBLen - 1] */ 
+        acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32); 
+ 
+        /* acc2 +=  x[2] * y[srcBLen - 1] */ 
+        acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x2 * c0)) >> 32); 
+ 
+        /* acc3 +=  x[3] * y[srcBLen - 1] */ 
+        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32); 
+ 
+        /* Read y[srcBLen - 2] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[4] sample */ 
+        x0 = *(px++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        /* acc0 +=  x[1] * y[srcBLen - 2] */ 
+        acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x1 * c0)) >> 32); 
+        /* acc1 +=  x[2] * y[srcBLen - 2] */ 
+        acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x2 * c0)) >> 32); 
+        /* acc2 +=  x[3] * y[srcBLen - 2] */ 
+        acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x3 * c0)) >> 32); 
+        /* acc3 +=  x[4] * y[srcBLen - 2] */ 
+        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x0 * c0)) >> 32); 
+ 
+        /* Read y[srcBLen - 3] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[5] sample */ 
+        x1 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[2] * y[srcBLen - 3] */ 
+        acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x2 * c0)) >> 32); 
+        /* acc1 +=  x[3] * y[srcBLen - 2] */ 
+        acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x3 * c0)) >> 32); 
+        /* acc2 +=  x[4] * y[srcBLen - 2] */ 
+        acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x0 * c0)) >> 32); 
+        /* acc3 +=  x[5] * y[srcBLen - 2] */ 
+        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x1 * c0)) >> 32); 
+ 
+        /* Read y[srcBLen - 4] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[6] sample */ 
+        x2 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[3] * y[srcBLen - 4] */ 
+        acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x3 * c0)) >> 32); 
+        /* acc1 +=  x[4] * y[srcBLen - 4] */ 
+        acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x0 * c0)) >> 32); 
+        /* acc2 +=  x[5] * y[srcBLen - 4] */ 
+        acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x1 * c0)) >> 32); 
+        /* acc3 +=  x[6] * y[srcBLen - 4] */ 
+        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x2 * c0)) >> 32); 
+ 
+ 
+      } while(--k); 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Read y[srcBLen - 5] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[7] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[4] * y[srcBLen - 5] */ 
+        acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32); 
+        /* acc1 +=  x[5] * y[srcBLen - 5] */ 
+        acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32); 
+        /* acc2 +=  x[6] * y[srcBLen - 5] */ 
+        acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x2 * c0)) >> 32); 
+        /* acc3 +=  x[7] * y[srcBLen - 5] */ 
+        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32); 
+ 
+        /* Reuse the present samples for the next MAC */ 
+        x0 = x1; 
+        x1 = x2; 
+        x2 = x3; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the results in the accumulators in the destination buffer. */ 
+      *pOut++ = (q31_t) (acc0 << 1); 
+      *pOut++ = (q31_t) (acc1 << 1); 
+      *pOut++ = (q31_t) (acc2 << 1); 
+      *pOut++ = (q31_t) (acc3 << 1); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + (count * 4u); 
+      py = pSrc2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = sum << 1; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* srcBLen number of MACS should be performed */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = sum << 1; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+   * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+   * sum +=  x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The blockSize3 variable holds the number of MAC operations performed */ 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  py = pSrc2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = blockSize3 >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) +  
+		              ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+      /* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) +  
+		              ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+      /* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+		              ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+      /* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) +  
+		              ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = blockSize3 % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      sum = (q31_t) ((((q63_t) sum << 32) +  
+		              ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = sum << 1; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pSrc2; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Conv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,588 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_partial_f32.c  
+*  
+* Description:	Partial Convolution of floating-point sequences  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup PartialConv Partial Convolution  
+ *  
+ * Partial Convolution is equivalent to Convolution except that a subset of the output samples is generated.  
+ * Each function has two additional arguments.  
+ * <code>firstIndex</code> specifies the starting index of the subset of output samples.  
+ * <code>numPoints</code> is the number of output samples to compute.  
+ * The function computes the output in the range  
+ * <code>[firstIndex, ..., firstIndex+numPoints-1]</code>.  
+ * The output array <code>pDst</code> contains <code>numPoints</code> values.  
+ *  
+ * The allowable range of output indices is [0 srcALen+srcBLen-2].  
+ * If the requested subset does not fall in this range then the functions return ARM_MATH_ARGUMENT_ERROR.  
+ * Otherwise the functions return ARM_MATH_SUCCESS.  
+ * \note Refer arm_conv_f32() for details on fixed point behavior. 
+ */ 
+ 
+/**  
+ * @addtogroup PartialConv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Partial convolution of floating-point sequences.  
+ * @param[in]       *pSrcA points to the first input sequence.  
+ * @param[in]       srcALen length of the first input sequence.  
+ * @param[in]       *pSrcB points to the second input sequence.  
+ * @param[in]       srcBLen length of the second input sequence.  
+ * @param[out]      *pDst points to the location where the output result is written.  
+ * @param[in]       firstIndex is the first output sample to start with.  
+ * @param[in]       numPoints is the number of output points to be computed.  
+ * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].  
+ */ 
+ 
+arm_status arm_conv_partial_f32( 
+  float32_t * pSrcA, 
+  uint32_t srcALen, 
+  float32_t * pSrcB, 
+  uint32_t srcBLen, 
+  float32_t * pDst, 
+  uint32_t firstIndex, 
+  uint32_t numPoints) 
+{ 
+  float32_t *pIn1 = pSrcA;                       /* inputA pointer */ 
+  float32_t *pIn2 = pSrcB;                       /* inputB pointer */ 
+  float32_t *pOut = pDst;                        /* output pointer */ 
+  float32_t *px;                                 /* Intermediate inputA pointer */ 
+  float32_t *py;                                 /* Intermediate inputB pointer */ 
+  float32_t *pSrc1, *pSrc2;                      /* Intermediate pointers */ 
+  float32_t sum, acc0, acc1, acc2, acc3;         /* Accumulator */ 
+  float32_t x0, x1, x2, x3, c0;                  /* Temporary variables to hold state and coefficient values */ 
+  uint32_t j, k, count = 0u, blkCnt, check; 
+  int32_t blockSize1, blockSize2, blockSize3;    /* loop counters */ 
+  arm_status status;                             /* status of Partial convolution */ 
+ 
+ 
+  /* Check for range of output samples to be calculated */ 
+  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u)))) 
+  { 
+    /* Set status as ARM_MATH_ARGUMENT_ERROR */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+  } 
+  else 
+  { 
+ 
+    /* The algorithm implementation is based on the lengths of the inputs. */ 
+    /* srcB is always made to slide across srcA. */ 
+    /* So srcBLen is always considered as shorter or equal to srcALen */ 
+    if(srcALen >= srcBLen) 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcA; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcB; 
+    } 
+    else 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcB; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcA; 
+ 
+      /* srcBLen is always considered as shorter or equal to srcALen */ 
+      j = srcBLen; 
+      srcBLen = srcALen; 
+      srcALen = j; 
+    } 
+ 
+    /* Conditions to check which loopCounter holds  
+     * the first and last indices of the output samples to be calculated. */ 
+    check = firstIndex + numPoints; 
+    blockSize3 = (int32_t) check - (int32_t) srcALen; 
+    blockSize3 = (blockSize3 > 0) ? blockSize3 : 0; 
+    blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex; 
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 : 
+	                                (int32_t) numPoints) : 0; 
+    blockSize2 = ((int32_t) check - blockSize3) -  
+		         (blockSize1 + (int32_t) firstIndex); 
+    blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; 
+ 
+    /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+    /* The function is internally  
+     * divided into three stages according to the number of multiplications that has to be  
+     * taken place between inputA samples and inputB samples. In the first stage of the  
+     * algorithm, the multiplications increase by one for every iteration.  
+     * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+     * In the third stage of the algorithm, the multiplications decrease by one  
+     * for every iteration. */ 
+ 
+    /* Set the output pointer to point to the firstIndex  
+     * of the output sample to be calculated. */ 
+    pOut = pDst + firstIndex; 
+ 
+    /* --------------------------  
+     * Initializations of stage1  
+     * -------------------------*/ 
+ 
+    /* sum = x[0] * y[0]  
+     * sum = x[0] * y[1] + x[1] * y[0]  
+     * ....  
+     * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+     */ 
+ 
+    /* In this stage the MAC operations are increased by 1 for every iteration.  
+       The count variable holds the number of MAC operations performed.  
+       Since the partial convolution starts from from firstIndex  
+       Number of Macs to be performed is firstIndex + 1 */ 
+    count = 1u + firstIndex; 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc1 = pIn2 + firstIndex; 
+    py = pSrc1; 
+ 
+    /* ------------------------  
+     * Stage1 process  
+     * ----------------------*/ 
+ 
+    /* The first stage starts here */ 
+    while(blockSize1 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0.0f; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* x[0] * y[srcBLen - 1] */ 
+        sum += *px++ * *py--; 
+ 
+        /* x[1] * y[srcBLen - 2] */ 
+        sum += *px++ * *py--; 
+ 
+        /* x[2] * y[srcBLen - 3] */ 
+        sum += *px++ * *py--; 
+ 
+        /* x[3] * y[srcBLen - 4] */ 
+        sum += *px++ * *py--; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the count is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += *px++ * *py--; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = sum; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      py = ++pSrc1; 
+      px = pIn1; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize1--; 
+    } 
+ 
+    /* --------------------------  
+     * Initializations of stage2  
+     * ------------------------*/ 
+ 
+    /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+     * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+     * ....  
+     * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+     */ 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    py = pSrc2; 
+ 
+    /* count is index by which the pointer pIn1 to be incremented */ 
+    count = 1u; 
+ 
+    /* -------------------  
+     * Stage2 process  
+     * ------------------*/ 
+ 
+    /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+     * So, to loop unroll over blockSize2,  
+     * srcBLen should be greater than or equal to 4 */ 
+    if(srcBLen >= 4u) 
+    { 
+      /* Loop unroll over blockSize2, by 4 */ 
+      blkCnt = ((uint32_t) blockSize2 >> 2u); 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Set all accumulators to zero */ 
+        acc0 = 0.0f; 
+        acc1 = 0.0f; 
+        acc2 = 0.0f; 
+        acc3 = 0.0f; 
+ 
+        /* read x[0], x[1], x[2] samples */ 
+        x0 = *(px++); 
+        x1 = *(px++); 
+        x2 = *(px++); 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        do 
+        { 
+          /* Read y[srcBLen - 1] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[3] sample */ 
+          x3 = *(px++); 
+ 
+          /* Perform the multiply-accumulate */ 
+          /* acc0 +=  x[0] * y[srcBLen - 1] */ 
+          acc0 += x0 * c0; 
+ 
+          /* acc1 +=  x[1] * y[srcBLen - 1] */ 
+          acc1 += x1 * c0; 
+ 
+          /* acc2 +=  x[2] * y[srcBLen - 1] */ 
+          acc2 += x2 * c0; 
+ 
+          /* acc3 +=  x[3] * y[srcBLen - 1] */ 
+          acc3 += x3 * c0; 
+ 
+          /* Read y[srcBLen - 2] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[4] sample */ 
+          x0 = *(px++); 
+ 
+          /* Perform the multiply-accumulate */ 
+          /* acc0 +=  x[1] * y[srcBLen - 2] */ 
+          acc0 += x1 * c0; 
+          /* acc1 +=  x[2] * y[srcBLen - 2] */ 
+          acc1 += x2 * c0; 
+          /* acc2 +=  x[3] * y[srcBLen - 2] */ 
+          acc2 += x3 * c0; 
+          /* acc3 +=  x[4] * y[srcBLen - 2] */ 
+          acc3 += x0 * c0; 
+ 
+          /* Read y[srcBLen - 3] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[5] sample */ 
+          x1 = *(px++); 
+ 
+          /* Perform the multiply-accumulates */ 
+          /* acc0 +=  x[2] * y[srcBLen - 3] */ 
+          acc0 += x2 * c0; 
+          /* acc1 +=  x[3] * y[srcBLen - 2] */ 
+          acc1 += x3 * c0; 
+          /* acc2 +=  x[4] * y[srcBLen - 2] */ 
+          acc2 += x0 * c0; 
+          /* acc3 +=  x[5] * y[srcBLen - 2] */ 
+          acc3 += x1 * c0; 
+ 
+          /* Read y[srcBLen - 4] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[6] sample */ 
+          x2 = *(px++); 
+ 
+          /* Perform the multiply-accumulates */ 
+          /* acc0 +=  x[3] * y[srcBLen - 4] */ 
+          acc0 += x3 * c0; 
+          /* acc1 +=  x[4] * y[srcBLen - 4] */ 
+          acc1 += x0 * c0; 
+          /* acc2 +=  x[5] * y[srcBLen - 4] */ 
+          acc2 += x1 * c0; 
+          /* acc3 +=  x[6] * y[srcBLen - 4] */ 
+          acc3 += x2 * c0; 
+ 
+ 
+        } while(--k); 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        while(k > 0u) 
+        { 
+          /* Read y[srcBLen - 5] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[7] sample */ 
+          x3 = *(px++); 
+ 
+          /* Perform the multiply-accumulates */ 
+          /* acc0 +=  x[4] * y[srcBLen - 5] */ 
+          acc0 += x0 * c0; 
+          /* acc1 +=  x[5] * y[srcBLen - 5] */ 
+          acc1 += x1 * c0; 
+          /* acc2 +=  x[6] * y[srcBLen - 5] */ 
+          acc2 += x2 * c0; 
+          /* acc3 +=  x[7] * y[srcBLen - 5] */ 
+          acc3 += x3 * c0; 
+ 
+          /* Reuse the present samples for the next MAC */ 
+          x0 = x1; 
+          x1 = x2; 
+          x2 = x3; 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = acc0; 
+        *pOut++ = acc1; 
+        *pOut++ = acc2; 
+        *pOut++ = acc3; 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + (count * 4u); 
+        py = pSrc2; 
+ 
+        /* Increment the pointer pIn1 index, count by 1 */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+ 
+      /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+       ** No loop unrolling is used. */ 
+      blkCnt = (uint32_t) blockSize2 % 0x4u; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0.0f; 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulates */ 
+          sum += *px++ * *py--; 
+          sum += *px++ * *py--; 
+          sum += *px++ * *py--; 
+          sum += *px++ * *py--; 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulate */ 
+          sum += *px++ * *py--; 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = sum; 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the MAC count */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+    else 
+    { 
+      /* If the srcBLen is not a multiple of 4,  
+       * the blockSize2 loop cannot be unrolled by 4 */ 
+      blkCnt = (uint32_t) blockSize2; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0.0f; 
+ 
+        /* srcBLen number of MACS should be performed */ 
+        k = srcBLen; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulate */ 
+          sum += *px++ * *py--; 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = sum; 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the MAC count */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+ 
+ 
+    /* --------------------------  
+     * Initializations of stage3  
+     * -------------------------*/ 
+ 
+    /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+     * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+     * ....  
+     * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+     * sum +=  x[srcALen-1] * y[srcBLen-1]  
+     */ 
+ 
+    /* In this stage the MAC operations are decreased by 1 for every iteration.  
+       The count variable holds the number of MAC operations performed */ 
+    count = srcBLen - 1u; 
+ 
+    /* Working pointer of inputA */ 
+    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+    px = pSrc1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    py = pSrc2; 
+ 
+    while(blockSize3 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0.0f; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */ 
+        sum += *px++ * *py--; 
+ 
+        /* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */ 
+        sum += *px++ * *py--; 
+ 
+        /* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */ 
+        sum += *px++ * *py--; 
+ 
+        /* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */ 
+        sum += *px++ * *py--; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the count is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        /* sum +=  x[srcALen-1] * y[srcBLen-1] */ 
+        sum += *px++ * *py--; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = sum; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = ++pSrc1; 
+      py = pSrc2; 
+ 
+      /* Decrement the MAC count */ 
+      count--; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize3--; 
+ 
+    } 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+ 
+} 
+ 
+/**  
+ * @} end of PartialConv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_fast_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,679 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_partial_fast_q15.c  
+*  
+* Description:	Fast Q15 Partial convolution.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup PartialConv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Partial convolution of Q15 sequences (fast version).  
+ * @param[in]       *pSrcA points to the first input sequence.  
+ * @param[in]       srcALen length of the first input sequence.  
+ * @param[in]       *pSrcB points to the second input sequence.  
+ * @param[in]       srcBLen length of the second input sequence.  
+ * @param[out]      *pDst points to the location where the output result is written.  
+ * @param[in]       firstIndex is the first output sample to start with.  
+ * @param[in]       numPoints is the number of output points to be computed.  
+ * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].  
+ *  
+ * See <code>arm_conv_partial_q15()</code> for a slower implementation of this function which uses a 64-bit accumulator to avoid wrap around distortion.  
+ */ 
+ 
+ 
+arm_status arm_conv_partial_fast_q15( 
+  q15_t * pSrcA, 
+  uint32_t srcALen, 
+  q15_t * pSrcB, 
+  uint32_t srcBLen, 
+  q15_t * pDst, 
+  uint32_t firstIndex, 
+  uint32_t numPoints) 
+{ 
+  q15_t *pIn1;                                   /* inputA pointer               */ 
+  q15_t *pIn2;                                   /* inputB pointer               */ 
+  q15_t *pOut = pDst;                            /* output pointer               */ 
+  q31_t sum, acc0, acc1, acc2, acc3;             /* Accumulator                  */ 
+  q15_t *px;                                     /* Intermediate inputA pointer  */ 
+  q15_t *py;                                     /* Intermediate inputB pointer  */ 
+  q15_t *pSrc1, *pSrc2;                          /* Intermediate pointers        */ 
+  q31_t x0, x1, x2, x3, c0; 
+  uint32_t j, k, count, check, blkCnt; 
+  int32_t blockSize1, blockSize2, blockSize3;    /* loop counters                 */ 
+  arm_status status;                             /* status of Partial convolution */ 
+  q31_t *pb;                                     /* 32 bit pointer for inputB buffer */ 
+ 
+  /* Check for range of output samples to be calculated */ 
+  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u)))) 
+  { 
+    /* Set status as ARM_MATH_ARGUMENT_ERROR */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+  } 
+  else 
+  { 
+ 
+    /* The algorithm implementation is based on the lengths of the inputs. */ 
+    /* srcB is always made to slide across srcA. */ 
+    /* So srcBLen is always considered as shorter or equal to srcALen */ 
+    if(srcALen >= srcBLen) 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcA; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcB; 
+    } 
+    else 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcB; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcA; 
+ 
+      /* srcBLen is always considered as shorter or equal to srcALen */ 
+      j = srcBLen; 
+      srcBLen = srcALen; 
+      srcALen = j; 
+    } 
+ 
+    /* Conditions to check which loopCounter holds  
+     * the first and last indices of the output samples to be calculated. */ 
+    check = firstIndex + numPoints; 
+    blockSize3 = ((int32_t) check - (int32_t) srcALen); 
+    blockSize3 = (blockSize3 > 0) ? blockSize3 : 0; 
+    blockSize1 = (((int32_t) srcBLen - 1) - (int32_t) firstIndex); 
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 :  
+	                                                           (int32_t) numPoints) : 0; 
+    blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) +  
+		         (int32_t) firstIndex); 
+    blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; 
+ 
+    /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+    /* The function is internally  
+     * divided into three stages according to the number of multiplications that has to be  
+     * taken place between inputA samples and inputB samples. In the first stage of the  
+     * algorithm, the multiplications increase by one for every iteration.  
+     * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+     * In the third stage of the algorithm, the multiplications decrease by one  
+     * for every iteration. */ 
+ 
+    /* Set the output pointer to point to the firstIndex  
+     * of the output sample to be calculated. */ 
+    pOut = pDst + firstIndex; 
+ 
+    /* --------------------------  
+     * Initializations of stage1  
+     * -------------------------*/ 
+ 
+    /* sum = x[0] * y[0]  
+     * sum = x[0] * y[1] + x[1] * y[0]  
+     * ....  
+     * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+     */ 
+ 
+    /* In this stage the MAC operations are increased by 1 for every iteration.  
+       The count variable holds the number of MAC operations performed.  
+       Since the partial convolution starts from firstIndex  
+       Number of Macs to be performed is firstIndex + 1 */ 
+    count = 1u + firstIndex; 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + firstIndex; 
+    py = pSrc2; 
+ 
+    /* ------------------------  
+     * Stage1 process  
+     * ----------------------*/ 
+ 
+    /* For loop unrolling by 4, this stage is divided into two. */ 
+    /* First part of this stage computes the MAC operations less than 4 */ 
+    /* Second part of this stage computes the MAC operations greater than or equal to 4 */ 
+ 
+    /* The first part of the stage starts here */ 
+    while((count < 4u) && (blockSize1 > 0)) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Loop over number of MAC operations between  
+       * inputA samples and inputB samples */ 
+      k = count; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum = __SMLAD(*px++, *py--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (sum >> 15); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      py = ++pSrc2; 
+      px = pIn1; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize1--; 
+    } 
+ 
+    /* The second part of the stage starts here */ 
+    /* The internal loop, over count, is unrolled by 4 */ 
+    /* To, read the last two inputB samples using SIMD:  
+     * y[srcBLen] and y[srcBLen-1] coefficients, py is decremented by 1 */ 
+    py = py - 1; 
+ 
+    while(blockSize1 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        /* x[0], x[1] are multiplied with y[srcBLen - 1], y[srcBLen - 2] respectively */ 
+        sum = __SMLADX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+        /* x[2], x[3] are multiplied with y[srcBLen - 3], y[srcBLen - 4] respectively */ 
+        sum = __SMLADX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* For the next MAC operations, the pointer py is used without SIMD  
+       * So, py is incremented by 1 */ 
+      py = py + 1u; 
+ 
+      /* If the count is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum = __SMLAD(*px++, *py--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (sum >> 15); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      py = ++pSrc2 - 1u; 
+      px = pIn1; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize1--; 
+    } 
+ 
+    /* --------------------------  
+     * Initializations of stage2  
+     * ------------------------*/ 
+ 
+    /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+     * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+     * ....  
+     * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+     */ 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    py = pSrc2; 
+ 
+    /* Initialize inputB pointer of type q31 */ 
+    pb = (q31_t *) (py - 1u); 
+ 
+    /* count is the index by which the pointer pIn1 to be incremented */ 
+    count = 1u; 
+ 
+ 
+    /* --------------------  
+     * Stage2 process  
+     * -------------------*/ 
+ 
+    /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+     * So, to loop unroll over blockSize2,  
+     * srcBLen should be greater than or equal to 4 */ 
+    if(srcBLen >= 4u) 
+    { 
+      /* Loop unroll over blockSize2, by 4 */ 
+      blkCnt = ((uint32_t) blockSize2 >> 2u); 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Set all accumulators to zero */ 
+        acc0 = 0; 
+        acc1 = 0; 
+        acc2 = 0; 
+        acc3 = 0; 
+ 
+ 
+        /* read x[0], x[1] samples */ 
+        x0 = *(q31_t *) (px++); 
+        /* read x[1], x[2] samples */ 
+        x1 = *(q31_t *) (px++); 
+ 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        do 
+        { 
+          /* Read the last two inputB samples using SIMD:  
+           * y[srcBLen - 1] and y[srcBLen - 2] */ 
+          c0 = *(pb--); 
+ 
+          /* acc0 +=  x[0] * y[srcBLen - 1] + x[1] * y[srcBLen - 2] */ 
+          acc0 = __SMLADX(x0, c0, acc0); 
+ 
+          /* acc1 +=  x[1] * y[srcBLen - 1] + x[2] * y[srcBLen - 2] */ 
+          acc1 = __SMLADX(x1, c0, acc1); 
+ 
+          /* Read x[2], x[3] */ 
+          x2 = *(q31_t *) (px++); 
+ 
+          /* Read x[3], x[4] */ 
+          x3 = *(q31_t *) (px++); 
+ 
+          /* acc2 +=  x[2] * y[srcBLen - 1] + x[3] * y[srcBLen - 2] */ 
+          acc2 = __SMLADX(x2, c0, acc2); 
+ 
+          /* acc3 +=  x[3] * y[srcBLen - 1] + x[4] * y[srcBLen - 2] */ 
+          acc3 = __SMLADX(x3, c0, acc3); 
+ 
+          /* Read y[srcBLen - 3] and y[srcBLen - 4] */ 
+          c0 = *(pb--); 
+ 
+          /* acc0 +=  x[2] * y[srcBLen - 3] + x[3] * y[srcBLen - 4] */ 
+          acc0 = __SMLADX(x2, c0, acc0); 
+ 
+          /* acc1 +=  x[3] * y[srcBLen - 3] + x[4] * y[srcBLen - 4] */ 
+          acc1 = __SMLADX(x3, c0, acc1); 
+ 
+          /* Read x[4], x[5] */ 
+          x0 = *(q31_t *) (px++); 
+ 
+          /* Read x[5], x[6] */ 
+          x1 = *(q31_t *) (px++); 
+ 
+          /* acc2 +=  x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */ 
+          acc2 = __SMLADX(x0, c0, acc2); 
+ 
+          /* acc3 +=  x[5] * y[srcBLen - 3] + x[6] * y[srcBLen - 4] */ 
+          acc3 = __SMLADX(x1, c0, acc3); 
+ 
+        } while(--k); 
+ 
+        /* For the next MAC operations, SIMD is not used  
+         * So, the 16 bit pointer if inputB, py is updated */ 
+        py = (q15_t *) pb; 
+        py = py + 1; 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        if(k == 1u) 
+        { 
+          /* Read y[srcBLen - 5] */ 
+          c0 = *(py); 
+ 
+          /* Read x[7] */ 
+          x3 = *(q31_t *) px++; 
+ 
+          /* Perform the multiply-accumulates */ 
+          acc0 = __SMLAD(x0, c0, acc0); 
+          acc1 = __SMLAD(x1, c0, acc1); 
+          acc2 = __SMLADX(x1, c0, acc2); 
+          acc3 = __SMLADX(x3, c0, acc3); 
+        } 
+ 
+        if(k == 2u) 
+        { 
+          /* Read y[srcBLen - 5], y[srcBLen - 6] */ 
+          c0 = *(pb); 
+ 
+          /* Read x[7], x[8] */ 
+          x3 = *(q31_t *) px++; 
+ 
+          /* Read x[9] */ 
+          x2 = *(q31_t *) px++; 
+ 
+          /* Perform the multiply-accumulates */ 
+          acc0 = __SMLADX(x0, c0, acc0); 
+          acc1 = __SMLADX(x1, c0, acc1); 
+          acc2 = __SMLADX(x3, c0, acc2); 
+          acc3 = __SMLADX(x2, c0, acc3); 
+        } 
+ 
+        if(k == 3u) 
+        { 
+          /* Read y[srcBLen - 5], y[srcBLen - 6] */ 
+          c0 = *pb--; 
+ 
+          /* Read x[7], x[8] */ 
+          x3 = *(q31_t *) px++; 
+ 
+          /* Read x[9] */ 
+          x2 = *(q31_t *) px++; 
+ 
+          /* Perform the multiply-accumulates */ 
+          acc0 = __SMLADX(x0, c0, acc0); 
+          acc1 = __SMLADX(x1, c0, acc1); 
+          acc2 = __SMLADX(x3, c0, acc2); 
+          acc3 = __SMLADX(x2, c0, acc3); 
+ 
+          /* Read y[srcBLen - 7] */ 
+          c0 = (q15_t) (*pb >> 16); 
+ 
+          /* Read x[10] */ 
+          x3 = *(q31_t *) px++; 
+ 
+          /* Perform the multiply-accumulates */ 
+          acc0 = __SMLADX(x1, c0, acc0); 
+          acc1 = __SMLAD(x2, c0, acc1); 
+          acc2 = __SMLADX(x2, c0, acc2); 
+          acc3 = __SMLADX(x3, c0, acc3); 
+        } 
+ 
+        /* Store the results in the accumulators in the destination buffer. */ 
+        *__SIMD32(pOut)++ = __PKHBT(acc0 >> 15, acc1 >> 15, 16); 
+        *__SIMD32(pOut)++ = __PKHBT(acc2 >> 15, acc3 >> 15, 16); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + (count * 4u); 
+        py = pSrc2; 
+        pb = (q31_t *) (py - 1); 
+ 
+        /* Increment the pointer pIn1 index, count by 1 */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+ 
+      /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+       ** No loop unrolling is used. */ 
+      blkCnt = (uint32_t) blockSize2 % 0x4u; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0; 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulates */ 
+          sum += ((q31_t) * px++ * *py--); 
+          sum += ((q31_t) * px++ * *py--); 
+          sum += ((q31_t) * px++ * *py--); 
+          sum += ((q31_t) * px++ * *py--); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulates */ 
+          sum += ((q31_t) * px++ * *py--); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = (q15_t) (sum >> 15); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the pointer pIn1 index, count by 1 */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+    else 
+    { 
+      /* If the srcBLen is not a multiple of 4,  
+       * the blockSize2 loop cannot be unrolled by 4 */ 
+      blkCnt = (uint32_t) blockSize2; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0; 
+ 
+        /* srcBLen number of MACS should be performed */ 
+        k = srcBLen; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulate */ 
+          sum += ((q31_t) * px++ * *py--); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = (q15_t) (sum >> 15); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the MAC count */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+ 
+ 
+    /* --------------------------  
+     * Initializations of stage3  
+     * -------------------------*/ 
+ 
+    /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+     * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+     * ....  
+     * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+     * sum +=  x[srcALen-1] * y[srcBLen-1]  
+     */ 
+ 
+    /* In this stage the MAC operations are decreased by 1 for every iteration.  
+       The count variable holds the number of MAC operations performed */ 
+    count = srcBLen - 1u; 
+ 
+    /* Working pointer of inputA */ 
+    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+    px = pSrc1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    pIn2 = pSrc2 - 1u; 
+    py = pIn2; 
+ 
+    /* -------------------  
+     * Stage3 process  
+     * ------------------*/ 
+ 
+    /* For loop unrolling by 4, this stage is divided into two. */ 
+    /* First part of this stage computes the MAC operations greater than 4 */ 
+    /* Second part of this stage computes the MAC operations less than or equal to 4 */ 
+ 
+    /* The first part of the stage starts here */ 
+    j = count >> 2u; 
+ 
+    while((j > 0u) && (blockSize3 > 0)) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* x[srcALen - srcBLen + 1], x[srcALen - srcBLen + 2] are multiplied  
+         * with y[srcBLen - 1], y[srcBLen - 2] respectively */ 
+        sum = __SMLADX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+        /* x[srcALen - srcBLen + 3], x[srcALen - srcBLen + 4] are multiplied  
+         * with y[srcBLen - 3], y[srcBLen - 4] respectively */ 
+        sum = __SMLADX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* For the next MAC operations, the pointer py is used without SIMD  
+       * So, py is incremented by 1 */ 
+      py = py + 1u; 
+ 
+      /* If the count is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* sum += x[srcALen - srcBLen + 5] * y[srcBLen - 5] */ 
+        sum = __SMLAD(*px++, *py--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (sum >> 15); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = ++pSrc1; 
+      py = pIn2; 
+ 
+      /* Decrement the MAC count */ 
+      count--; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize3--; 
+ 
+      j--; 
+    } 
+ 
+    /* The second part of the stage starts here */ 
+    /* SIMD is not used for the next MAC operations,  
+     * so pointer py is updated to read only one sample at a time */ 
+    py = py + 1u; 
+ 
+    while(blockSize3 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        /* sum +=  x[srcALen-1] * y[srcBLen-1] */ 
+        sum = __SMLAD(*px++, *py--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (sum >> 15); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = ++pSrc1; 
+      py = pSrc2; 
+ 
+      /* Decrement the MAC count */ 
+      count--; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize3--; 
+    } 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+ 
+} 
+ 
+/**  
+ * @} end of PartialConv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_fast_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,590 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_partial_fast_q31.c  
+*  
+* Description:	Fast Q31 Partial convolution.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup PartialConv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Partial convolution of Q31 sequences (fast version).  
+ * @param[in]       *pSrcA points to the first input sequence.  
+ * @param[in]       srcALen length of the first input sequence.  
+ * @param[in]       *pSrcB points to the second input sequence.  
+ * @param[in]       srcBLen length of the second input sequence.  
+ * @param[out]      *pDst points to the location where the output result is written.  
+ * @param[in]       firstIndex is the first output sample to start with.  
+ * @param[in]       numPoints is the number of output points to be computed.  
+ * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].  
+ *  
+ * \par  
+ * See <code>arm_conv_partial_q31()</code> for a slower implementation of this function which uses a 64-bit accumulator to provide higher precision.  
+ */ 
+ 
+arm_status arm_conv_partial_fast_q31( 
+  q31_t * pSrcA, 
+  uint32_t srcALen, 
+  q31_t * pSrcB, 
+  uint32_t srcBLen, 
+  q31_t * pDst, 
+  uint32_t firstIndex, 
+  uint32_t numPoints) 
+{ 
+  q31_t *pIn1;                                   /* inputA pointer               */ 
+  q31_t *pIn2;                                   /* inputB pointer               */ 
+  q31_t *pOut = pDst;                            /* output pointer               */ 
+  q31_t *px;                                     /* Intermediate inputA pointer  */ 
+  q31_t *py;                                     /* Intermediate inputB pointer  */ 
+  q31_t *pSrc1, *pSrc2;                          /* Intermediate pointers        */ 
+  q31_t sum, acc0, acc1, acc2, acc3;             /* Accumulators                  */ 
+  q31_t x0, x1, x2, x3, c0; 
+  uint32_t j, k, count, check, blkCnt; 
+  int32_t blockSize1, blockSize2, blockSize3;    /* loop counters                 */ 
+  arm_status status;                             /* status of Partial convolution */ 
+ 
+ 
+  /* Check for range of output samples to be calculated */ 
+  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u)))) 
+  { 
+    /* Set status as ARM_MATH_ARGUMENT_ERROR */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+  } 
+  else 
+  { 
+ 
+    /* The algorithm implementation is based on the lengths of the inputs. */ 
+    /* srcB is always made to slide across srcA. */ 
+    /* So srcBLen is always considered as shorter or equal to srcALen */ 
+    if(srcALen >= srcBLen) 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcA; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcB; 
+    } 
+    else 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcB; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcA; 
+ 
+      /* srcBLen is always considered as shorter or equal to srcALen */ 
+      j = srcBLen; 
+      srcBLen = srcALen; 
+      srcALen = j; 
+    } 
+ 
+    /* Conditions to check which loopCounter holds  
+     * the first and last indices of the output samples to be calculated. */ 
+    check = firstIndex + numPoints; 
+    blockSize3 = ((int32_t) check - (int32_t) srcALen); 
+    blockSize3 = (blockSize3 > 0) ? blockSize3 : 0; 
+    blockSize1 = (((int32_t) srcBLen - 1) - (int32_t) firstIndex); 
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 : 
+	                                (int32_t) numPoints) : 0; 
+    blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) +  
+		                            (int32_t) firstIndex); 
+    blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; 
+ 
+    /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+    /* The function is internally  
+     * divided into three stages according to the number of multiplications that has to be  
+     * taken place between inputA samples and inputB samples. In the first stage of the  
+     * algorithm, the multiplications increase by one for every iteration.  
+     * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+     * In the third stage of the algorithm, the multiplications decrease by one  
+     * for every iteration. */ 
+ 
+    /* Set the output pointer to point to the firstIndex  
+     * of the output sample to be calculated. */ 
+    pOut = pDst + firstIndex; 
+ 
+    /* --------------------------  
+     * Initializations of stage1  
+     * -------------------------*/ 
+ 
+    /* sum = x[0] * y[0]  
+     * sum = x[0] * y[1] + x[1] * y[0]  
+     * ....  
+     * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+     */ 
+ 
+    /* In this stage the MAC operations are increased by 1 for every iteration.  
+       The count variable holds the number of MAC operations performed.  
+       Since the partial convolution starts from firstIndex  
+       Number of Macs to be performed is firstIndex + 1 */ 
+    count = 1u + firstIndex; 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + firstIndex; 
+    py = pSrc2; 
+ 
+    /* ------------------------  
+     * Stage1 process  
+     * ----------------------*/ 
+ 
+    /* The first loop starts here */ 
+    while(blockSize1 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* x[0] * y[srcBLen - 1] */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* x[1] * y[srcBLen - 2] */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* x[2] * y[srcBLen - 3] */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* x[3] * y[srcBLen - 4] */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the count is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = sum << 1; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      py = ++pSrc2; 
+      px = pIn1; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize1--; 
+    } 
+ 
+    /* --------------------------  
+     * Initializations of stage2  
+     * ------------------------*/ 
+ 
+    /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+     * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+     * ....  
+     * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+     */ 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    py = pSrc2; 
+ 
+    /* count is index by which the pointer pIn1 to be incremented */ 
+    count = 1u; 
+ 
+    /* -------------------  
+     * Stage2 process  
+     * ------------------*/ 
+ 
+    /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+     * So, to loop unroll over blockSize2,  
+     * srcBLen should be greater than or equal to 4 */ 
+    if(srcBLen >= 4u) 
+    { 
+      /* Loop unroll over blockSize2 */ 
+      blkCnt = ((uint32_t) blockSize2 >> 2u); 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Set all accumulators to zero */ 
+        acc0 = 0; 
+        acc1 = 0; 
+        acc2 = 0; 
+        acc3 = 0; 
+ 
+        /* read x[0], x[1], x[2] samples */ 
+        x0 = *(px++); 
+        x1 = *(px++); 
+        x2 = *(px++); 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        do 
+        { 
+          /* Read y[srcBLen - 1] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[3] sample */ 
+          x3 = *(px++); 
+ 
+          /* Perform the multiply-accumulate */ 
+          /* acc0 +=  x[0] * y[srcBLen - 1] */ 
+          acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32); 
+ 
+          /* acc1 +=  x[1] * y[srcBLen - 1] */ 
+          acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32); 
+ 
+          /* acc2 +=  x[2] * y[srcBLen - 1] */ 
+          acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x2 * c0)) >> 32); 
+ 
+          /* acc3 +=  x[3] * y[srcBLen - 1] */ 
+          acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32); 
+ 
+          /* Read y[srcBLen - 2] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[4] sample */ 
+          x0 = *(px++); 
+ 
+          /* Perform the multiply-accumulate */ 
+          /* acc0 +=  x[1] * y[srcBLen - 2] */ 
+          acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x1 * c0)) >> 32); 
+          /* acc1 +=  x[2] * y[srcBLen - 2] */ 
+          acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x2 * c0)) >> 32); 
+          /* acc2 +=  x[3] * y[srcBLen - 2] */ 
+          acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x3 * c0)) >> 32); 
+          /* acc3 +=  x[4] * y[srcBLen - 2] */ 
+          acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x0 * c0)) >> 32); 
+ 
+          /* Read y[srcBLen - 3] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[5] sample */ 
+          x1 = *(px++); 
+ 
+          /* Perform the multiply-accumulates */ 
+          /* acc0 +=  x[2] * y[srcBLen - 3] */ 
+          acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x2 * c0)) >> 32); 
+          /* acc1 +=  x[3] * y[srcBLen - 2] */ 
+          acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x3 * c0)) >> 32); 
+          /* acc2 +=  x[4] * y[srcBLen - 2] */ 
+          acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x0 * c0)) >> 32); 
+          /* acc3 +=  x[5] * y[srcBLen - 2] */ 
+          acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x1 * c0)) >> 32); 
+ 
+          /* Read y[srcBLen - 4] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[6] sample */ 
+          x2 = *(px++); 
+ 
+          /* Perform the multiply-accumulates */ 
+          /* acc0 +=  x[3] * y[srcBLen - 4] */ 
+          acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x3 * c0)) >> 32); 
+          /* acc1 +=  x[4] * y[srcBLen - 4] */ 
+          acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x0 * c0)) >> 32); 
+          /* acc2 +=  x[5] * y[srcBLen - 4] */ 
+          acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x1 * c0)) >> 32); 
+          /* acc3 +=  x[6] * y[srcBLen - 4] */ 
+          acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x2 * c0)) >> 32); 
+ 
+ 
+        } while(--k); 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        while(k > 0u) 
+        { 
+          /* Read y[srcBLen - 5] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[7] sample */ 
+          x3 = *(px++); 
+ 
+          /* Perform the multiply-accumulates */ 
+          /* acc0 +=  x[4] * y[srcBLen - 5] */ 
+          acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32); 
+          /* acc1 +=  x[5] * y[srcBLen - 5] */ 
+          acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32); 
+          /* acc2 +=  x[6] * y[srcBLen - 5] */ 
+          acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x2 * c0)) >> 32); 
+          /* acc3 +=  x[7] * y[srcBLen - 5] */ 
+          acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32); 
+ 
+          /* Reuse the present samples for the next MAC */ 
+          x0 = x1; 
+          x1 = x2; 
+          x2 = x3; 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = (q31_t) (acc0 << 1); 
+        *pOut++ = (q31_t) (acc1 << 1); 
+        *pOut++ = (q31_t) (acc2 << 1); 
+        *pOut++ = (q31_t) (acc3 << 1); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + (count * 4u); 
+        py = pSrc2; 
+ 
+        /* Increment the pointer pIn1 index, count by 1 */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+ 
+      /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+       ** No loop unrolling is used. */ 
+      blkCnt = (uint32_t) blockSize2 % 0x4u; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0; 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulates */ 
+          sum = (q31_t) ((((q63_t) sum << 32) + 
+                          ((q63_t) * px++ * (*py--))) >> 32); 
+          sum = (q31_t) ((((q63_t) sum << 32) + 
+                          ((q63_t) * px++ * (*py--))) >> 32); 
+          sum = (q31_t) ((((q63_t) sum << 32) + 
+                          ((q63_t) * px++ * (*py--))) >> 32); 
+          sum = (q31_t) ((((q63_t) sum << 32) + 
+                          ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulate */ 
+          sum = (q31_t) ((((q63_t) sum << 32) + 
+                          ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = sum << 1; 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the MAC count */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+    else 
+    { 
+      /* If the srcBLen is not a multiple of 4,  
+       * the blockSize2 loop cannot be unrolled by 4 */ 
+      blkCnt = (uint32_t) blockSize2; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0; 
+ 
+        /* srcBLen number of MACS should be performed */ 
+        k = srcBLen; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulate */ 
+          sum = (q31_t) ((((q63_t) sum << 32) + 
+                          ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = sum << 1; 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the MAC count */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+ 
+ 
+    /* --------------------------  
+     * Initializations of stage3  
+     * -------------------------*/ 
+ 
+    /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+     * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+     * ....  
+     * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+     * sum +=  x[srcALen-1] * y[srcBLen-1]  
+     */ 
+ 
+    /* In this stage the MAC operations are decreased by 1 for every iteration.  
+       The count variable holds the number of MAC operations performed */ 
+    count = srcBLen - 1u; 
+ 
+    /* Working pointer of inputA */ 
+    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+    px = pSrc1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    py = pSrc2; 
+ 
+    /* -------------------  
+     * Stage3 process  
+     * ------------------*/ 
+ 
+    while(blockSize3 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the count is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        /* sum +=  x[srcALen-1] * y[srcBLen-1] */ 
+        sum = (q31_t) ((((q63_t) sum << 32) +  
+			            ((q63_t) * px++ * (*py--))) >> 32); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = sum << 1; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = ++pSrc1; 
+      py = pSrc2; 
+ 
+      /* Decrement the MAC count */ 
+      count--; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize3--; 
+ 
+    } 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+ 
+} 
+ 
+/**  
+ * @} end of PartialConv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,685 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_partial_q15.c  
+*  
+* Description:	Q15 Partial convolution.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup PartialConv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Partial convolution of Q15 sequences.  
+ * @param[in]       *pSrcA points to the first input sequence.  
+ * @param[in]       srcALen length of the first input sequence.  
+ * @param[in]       *pSrcB points to the second input sequence.  
+ * @param[in]       srcBLen length of the second input sequence.  
+ * @param[out]      *pDst points to the location where the output result is written.  
+ * @param[in]       firstIndex is the first output sample to start with.  
+ * @param[in]       numPoints is the number of output points to be computed.  
+ * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].  
+ *  
+ * Refer to <code>arm_conv_partial_fast_q15()</code> for a faster but less precise version of this function.  
+ */ 
+ 
+ 
+arm_status arm_conv_partial_q15( 
+  q15_t * pSrcA, 
+  uint32_t srcALen, 
+  q15_t * pSrcB, 
+  uint32_t srcBLen, 
+  q15_t * pDst, 
+  uint32_t firstIndex, 
+  uint32_t numPoints) 
+{ 
+  q15_t *pIn1;                                   /* inputA pointer               */ 
+  q15_t *pIn2;                                   /* inputB pointer               */ 
+  q15_t *pOut = pDst;                            /* output pointer               */ 
+  q63_t sum, acc0, acc1, acc2, acc3;             /* Accumulator                  */ 
+  q15_t *px;                                     /* Intermediate inputA pointer  */ 
+  q15_t *py;                                     /* Intermediate inputB pointer  */ 
+  q15_t *pSrc1, *pSrc2;                          /* Intermediate pointers        */ 
+  q31_t x0, x1, x2, x3, c0;                      /* Temporary input variables */ 
+  uint32_t j, k, count, check, blkCnt; 
+  int32_t blockSize1, blockSize2, blockSize3;    /* loop counter                 */ 
+  arm_status status;                             /* status of Partial convolution */ 
+  q31_t *pb;                                     /* 32 bit pointer for inputB buffer */ 
+ 
+  /* Check for range of output samples to be calculated */ 
+  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u)))) 
+  { 
+    /* Set status as ARM_MATH_ARGUMENT_ERROR */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+  } 
+  else 
+  { 
+ 
+    /* The algorithm implementation is based on the lengths of the inputs. */ 
+    /* srcB is always made to slide across srcA. */ 
+    /* So srcBLen is always considered as shorter or equal to srcALen */ 
+    if(srcALen >= srcBLen) 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcA; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcB; 
+    } 
+    else 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcB; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcA; 
+ 
+      /* srcBLen is always considered as shorter or equal to srcALen */ 
+      j = srcBLen; 
+      srcBLen = srcALen; 
+      srcALen = j; 
+    } 
+ 
+    /* Conditions to check which loopCounter holds  
+     * the first and last indices of the output samples to be calculated. */ 
+    check = firstIndex + numPoints; 
+    blockSize3 = ((int32_t) check - (int32_t) srcALen); 
+    blockSize3 = (blockSize3 > 0) ? blockSize3 : 0; 
+    blockSize1 = (((int32_t) srcBLen - 1) - (int32_t) firstIndex); 
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 : 
+	                                (int32_t) numPoints) : 0; 
+    blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + 
+		                            (int32_t) firstIndex); 
+    blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; 
+ 
+    /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+    /* The function is internally  
+     * divided into three stages according to the number of multiplications that has to be  
+     * taken place between inputA samples and inputB samples. In the first stage of the  
+     * algorithm, the multiplications increase by one for every iteration.  
+     * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+     * In the third stage of the algorithm, the multiplications decrease by one  
+     * for every iteration. */ 
+ 
+    /* Set the output pointer to point to the firstIndex  
+     * of the output sample to be calculated. */ 
+    pOut = pDst + firstIndex; 
+ 
+    /* --------------------------  
+     * Initializations of stage1  
+     * -------------------------*/ 
+ 
+    /* sum = x[0] * y[0]  
+     * sum = x[0] * y[1] + x[1] * y[0]  
+     * ....  
+     * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+     */ 
+ 
+    /* In this stage the MAC operations are increased by 1 for every iteration.  
+       The count variable holds the number of MAC operations performed.  
+       Since the partial convolution starts from firstIndex  
+       Number of Macs to be performed is firstIndex + 1 */ 
+    count = 1u + firstIndex; 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + firstIndex; 
+    py = pSrc2; 
+ 
+    /* ------------------------  
+     * Stage1 process  
+     * ----------------------*/ 
+ 
+    /* For loop unrolling by 4, this stage is divided into two. */ 
+    /* First part of this stage computes the MAC operations less than 4 */ 
+    /* Second part of this stage computes the MAC operations greater than or equal to 4 */ 
+ 
+    /* The first part of the stage starts here */ 
+    while((count < 4u) && (blockSize1 > 0)) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Loop over number of MAC operations between  
+       * inputA samples and inputB samples */ 
+      k = count; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum = __SMLALD(*px++, *py--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (__SSAT((sum >> 15), 16)); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      py = ++pSrc2; 
+      px = pIn1; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize1--; 
+    } 
+ 
+    /* The second part of the stage starts here */ 
+    /* The internal loop, over count, is unrolled by 4 */ 
+    /* To, read the last two inputB samples using SIMD:  
+     * y[srcBLen] and y[srcBLen-1] coefficients, py is decremented by 1 */ 
+    py = py - 1; 
+ 
+    while(blockSize1 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        /* x[0], x[1] are multiplied with y[srcBLen - 1], y[srcBLen - 2] respectively */ 
+        sum = __SMLALDX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+        /* x[2], x[3] are multiplied with y[srcBLen - 3], y[srcBLen - 4] respectively */ 
+        sum = __SMLALDX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* For the next MAC operations, the pointer py is used without SIMD  
+       * So, py is incremented by 1 */ 
+      py = py + 1u; 
+ 
+      /* If the count is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum = __SMLALD(*px++, *py--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (__SSAT((sum >> 15), 16)); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      py = ++pSrc2 - 1u; 
+      px = pIn1; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize1--; 
+    } 
+ 
+    /* --------------------------  
+     * Initializations of stage2  
+     * ------------------------*/ 
+ 
+    /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+     * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+     * ....  
+     * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+     */ 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    py = pSrc2; 
+ 
+    /* Initialize inputB pointer of type q31 */ 
+    pb = (q31_t *) (py - 1u); 
+ 
+    /* count is the index by which the pointer pIn1 to be incremented */ 
+    count = 1u; 
+ 
+ 
+    /* --------------------  
+     * Stage2 process  
+     * -------------------*/ 
+ 
+    /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+     * So, to loop unroll over blockSize2,  
+     * srcBLen should be greater than or equal to 4 */ 
+    if(srcBLen >= 4u) 
+    { 
+      /* Loop unroll over blockSize2, by 4 */ 
+      blkCnt = ((uint32_t) blockSize2 >> 2u); 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Set all accumulators to zero */ 
+        acc0 = 0; 
+        acc1 = 0; 
+        acc2 = 0; 
+        acc3 = 0; 
+ 
+ 
+        /* read x[0], x[1] samples */ 
+        x0 = *(q31_t *) (px++); 
+        /* read x[1], x[2] samples */ 
+        x1 = *(q31_t *) (px++); 
+ 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        do 
+        { 
+          /* Read the last two inputB samples using SIMD:  
+           * y[srcBLen - 1] and y[srcBLen - 2] */ 
+          c0 = *(pb--); 
+ 
+          /* acc0 +=  x[0] * y[srcBLen - 1] + x[1] * y[srcBLen - 2] */ 
+          acc0 = __SMLALDX(x0, c0, acc0); 
+ 
+          /* acc1 +=  x[1] * y[srcBLen - 1] + x[2] * y[srcBLen - 2] */ 
+          acc1 = __SMLALDX(x1, c0, acc1); 
+ 
+          /* Read x[2], x[3] */ 
+          x2 = *(q31_t *) (px++); 
+ 
+          /* Read x[3], x[4] */ 
+          x3 = *(q31_t *) (px++); 
+ 
+          /* acc2 +=  x[2] * y[srcBLen - 1] + x[3] * y[srcBLen - 2] */ 
+          acc2 = __SMLALDX(x2, c0, acc2); 
+ 
+          /* acc3 +=  x[3] * y[srcBLen - 1] + x[4] * y[srcBLen - 2] */ 
+          acc3 = __SMLALDX(x3, c0, acc3); 
+ 
+          /* Read y[srcBLen - 3] and y[srcBLen - 4] */ 
+          c0 = *(pb--); 
+ 
+          /* acc0 +=  x[2] * y[srcBLen - 3] + x[3] * y[srcBLen - 4] */ 
+          acc0 = __SMLALDX(x2, c0, acc0); 
+ 
+          /* acc1 +=  x[3] * y[srcBLen - 3] + x[4] * y[srcBLen - 4] */ 
+          acc1 = __SMLALDX(x3, c0, acc1); 
+ 
+          /* Read x[4], x[5] */ 
+          x0 = *(q31_t *) (px++); 
+ 
+          /* Read x[5], x[6] */ 
+          x1 = *(q31_t *) (px++); 
+ 
+          /* acc2 +=  x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */ 
+          acc2 = __SMLALDX(x0, c0, acc2); 
+ 
+          /* acc3 +=  x[5] * y[srcBLen - 3] + x[6] * y[srcBLen - 4] */ 
+          acc3 = __SMLALDX(x1, c0, acc3); 
+ 
+        } while(--k); 
+ 
+        /* For the next MAC operations, SIMD is not used  
+         * So, the 16 bit pointer if inputB, py is updated */ 
+        py = (q15_t *) pb; 
+        py = py + 1; 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        if(k == 1u) 
+        { 
+          /* Read y[srcBLen - 5] */ 
+          c0 = *(py); 
+ 
+          /* Read x[7] */ 
+          x3 = *(q31_t *) px++; 
+ 
+          /* Perform the multiply-accumulates */ 
+          acc0 = __SMLALD(x0, c0, acc0); 
+          acc1 = __SMLALD(x1, c0, acc1); 
+          acc2 = __SMLALDX(x1, c0, acc2); 
+          acc3 = __SMLALDX(x3, c0, acc3); 
+        } 
+ 
+        if(k == 2u) 
+        { 
+          /* Read y[srcBLen - 5], y[srcBLen - 6] */ 
+          c0 = *(pb); 
+ 
+          /* Read x[7], x[8] */ 
+          x3 = *(q31_t *) px++; 
+ 
+          /* Read x[9] */ 
+          x2 = *(q31_t *) px++; 
+ 
+          /* Perform the multiply-accumulates */ 
+          acc0 = __SMLALDX(x0, c0, acc0); 
+          acc1 = __SMLALDX(x1, c0, acc1); 
+          acc2 = __SMLALDX(x3, c0, acc2); 
+          acc3 = __SMLALDX(x2, c0, acc3); 
+        } 
+ 
+        if(k == 3u) 
+        { 
+          /* Read y[srcBLen - 5], y[srcBLen - 6] */ 
+          c0 = *pb--; 
+ 
+          /* Read x[7], x[8] */ 
+          x3 = *(q31_t *) px++; 
+ 
+          /* Read x[9] */ 
+          x2 = *(q31_t *) px++; 
+ 
+          /* Perform the multiply-accumulates */ 
+          acc0 = __SMLALDX(x0, c0, acc0); 
+          acc1 = __SMLALDX(x1, c0, acc1); 
+          acc2 = __SMLALDX(x3, c0, acc2); 
+          acc3 = __SMLALDX(x2, c0, acc3); 
+ 
+          /* Read y[srcBLen - 7] */ 
+          c0 = (q15_t) (*pb >> 16); 
+ 
+          /* Read x[10] */ 
+          x3 = *(q31_t *) px++; 
+ 
+          /* Perform the multiply-accumulates */ 
+          acc0 = __SMLALDX(x1, c0, acc0); 
+          acc1 = __SMLALD(x2, c0, acc1); 
+          acc2 = __SMLALDX(x2, c0, acc2); 
+          acc3 = __SMLALDX(x3, c0, acc3); 
+        } 
+ 
+        /* Store the results in the accumulators in the destination buffer. */ 
+        *__SIMD32(pOut)++ = 
+          __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16); 
+        *__SIMD32(pOut)++ = 
+          __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + (count * 4u); 
+        py = pSrc2; 
+        pb = (q31_t *) (py - 1); 
+ 
+        /* Increment the pointer pIn1 index, count by 1 */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+ 
+      /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+       ** No loop unrolling is used. */ 
+      blkCnt = (uint32_t) blockSize2 % 0x4u; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0; 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulates */ 
+          sum += (q63_t) ((q31_t) * px++ * *py--); 
+          sum += (q63_t) ((q31_t) * px++ * *py--); 
+          sum += (q63_t) ((q31_t) * px++ * *py--); 
+          sum += (q63_t) ((q31_t) * px++ * *py--); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulates */ 
+          sum += (q63_t) ((q31_t) * px++ * *py--); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = (q15_t) (__SSAT(sum >> 15, 16)); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the pointer pIn1 index, count by 1 */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+    else 
+    { 
+      /* If the srcBLen is not a multiple of 4,  
+       * the blockSize2 loop cannot be unrolled by 4 */ 
+      blkCnt = (uint32_t) blockSize2; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0; 
+ 
+        /* srcBLen number of MACS should be performed */ 
+        k = srcBLen; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulate */ 
+          sum += (q63_t) ((q31_t) * px++ * *py--); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = (q15_t) (__SSAT(sum >> 15, 16)); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the MAC count */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+ 
+ 
+    /* --------------------------  
+     * Initializations of stage3  
+     * -------------------------*/ 
+ 
+    /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+     * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+     * ....  
+     * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+     * sum +=  x[srcALen-1] * y[srcBLen-1]  
+     */ 
+ 
+    /* In this stage the MAC operations are decreased by 1 for every iteration.  
+       The count variable holds the number of MAC operations performed */ 
+    count = srcBLen - 1u; 
+ 
+    /* Working pointer of inputA */ 
+    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+    px = pSrc1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    pIn2 = pSrc2 - 1u; 
+    py = pIn2; 
+ 
+    /* -------------------  
+     * Stage3 process  
+     * ------------------*/ 
+ 
+    /* For loop unrolling by 4, this stage is divided into two. */ 
+    /* First part of this stage computes the MAC operations greater than 4 */ 
+    /* Second part of this stage computes the MAC operations less than or equal to 4 */ 
+ 
+    /* The first part of the stage starts here */ 
+    j = count >> 2u; 
+ 
+    while((j > 0u) && (blockSize3 > 0)) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* x[srcALen - srcBLen + 1], x[srcALen - srcBLen + 2] are multiplied  
+         * with y[srcBLen - 1], y[srcBLen - 2] respectively */ 
+        sum = __SMLALDX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+        /* x[srcALen - srcBLen + 3], x[srcALen - srcBLen + 4] are multiplied  
+         * with y[srcBLen - 3], y[srcBLen - 4] respectively */ 
+        sum = __SMLALDX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* For the next MAC operations, the pointer py is used without SIMD  
+       * So, py is incremented by 1 */ 
+      py = py + 1u; 
+ 
+      /* If the count is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* sum += x[srcALen - srcBLen + 5] * y[srcBLen - 5] */ 
+        sum = __SMLALD(*px++, *py--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (__SSAT((sum >> 15), 16)); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = ++pSrc1; 
+      py = pIn2; 
+ 
+      /* Decrement the MAC count */ 
+      count--; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize3--; 
+ 
+      j--; 
+    } 
+ 
+    /* The second part of the stage starts here */ 
+    /* SIMD is not used for the next MAC operations,  
+     * so pointer py is updated to read only one sample at a time */ 
+    py = py + 1u; 
+ 
+    while(blockSize3 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        /* sum +=  x[srcALen-1] * y[srcBLen-1] */ 
+        sum = __SMLALD(*px++, *py--, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (__SSAT((sum >> 15), 16)); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = ++pSrc1; 
+      py = pSrc2; 
+ 
+      /* Decrement the MAC count */ 
+      count--; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize3--; 
+    } 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+ 
+} 
+ 
+/**  
+ * @} end of PartialConv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,562 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_partial_q31.c  
+*  
+* Description:	Q31 Partial convolution.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup PartialConv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Partial convolution of Q31 sequences.  
+ * @param[in]       *pSrcA points to the first input sequence.  
+ * @param[in]       srcALen length of the first input sequence.  
+ * @param[in]       *pSrcB points to the second input sequence.  
+ * @param[in]       srcBLen length of the second input sequence.  
+ * @param[out]      *pDst points to the location where the output result is written.  
+ * @param[in]       firstIndex is the first output sample to start with.  
+ * @param[in]       numPoints is the number of output points to be computed.  
+ * @return Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].  
+ *  
+ * See <code>arm_conv_partial_fast_q31()</code> for a faster but less precise implementation of this function.  
+ */ 
+ 
+arm_status arm_conv_partial_q31( 
+  q31_t * pSrcA, 
+  uint32_t srcALen, 
+  q31_t * pSrcB, 
+  uint32_t srcBLen, 
+  q31_t * pDst, 
+  uint32_t firstIndex, 
+  uint32_t numPoints) 
+{ 
+  q31_t *pIn1;                                   /* inputA pointer               */ 
+  q31_t *pIn2;                                   /* inputB pointer               */ 
+  q31_t *pOut = pDst;                            /* output pointer               */ 
+  q31_t *px;                                     /* Intermediate inputA pointer  */ 
+  q31_t *py;                                     /* Intermediate inputB pointer  */ 
+  q31_t *pSrc1, *pSrc2;                          /* Intermediate pointers        */ 
+  q63_t sum, acc0, acc1, acc2, acc3;             /* Accumulator                  */ 
+  q31_t x0, x1, x2, x3, c0; 
+  uint32_t j, k, count, check, blkCnt; 
+  int32_t blockSize1, blockSize2, blockSize3;    /* loop counter                 */ 
+  arm_status status;                             /* status of Partial convolution */ 
+ 
+ 
+  /* Check for range of output samples to be calculated */ 
+  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u)))) 
+  { 
+    /* Set status as ARM_MATH_ARGUMENT_ERROR */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+  } 
+  else 
+  { 
+ 
+    /* The algorithm implementation is based on the lengths of the inputs. */ 
+    /* srcB is always made to slide across srcA. */ 
+    /* So srcBLen is always considered as shorter or equal to srcALen */ 
+    if(srcALen >= srcBLen) 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcA; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcB; 
+    } 
+    else 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcB; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcA; 
+ 
+      /* srcBLen is always considered as shorter or equal to srcALen */ 
+      j = srcBLen; 
+      srcBLen = srcALen; 
+      srcALen = j; 
+    } 
+ 
+    /* Conditions to check which loopCounter holds  
+     * the first and last indices of the output samples to be calculated. */ 
+    check = firstIndex + numPoints; 
+    blockSize3 = ((int32_t) check - (int32_t) srcALen); 
+    blockSize3 = (blockSize3 > 0) ? blockSize3 : 0; 
+    blockSize1 = (((int32_t) srcBLen - 1) - (int32_t) firstIndex); 
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 : 
+	                                (int32_t) numPoints) : 0; 
+    blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) + 
+		                            (int32_t) firstIndex); 
+    blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; 
+ 
+    /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+    /* The function is internally  
+     * divided into three stages according to the number of multiplications that has to be  
+     * taken place between inputA samples and inputB samples. In the first stage of the  
+     * algorithm, the multiplications increase by one for every iteration.  
+     * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+     * In the third stage of the algorithm, the multiplications decrease by one  
+     * for every iteration. */ 
+ 
+    /* Set the output pointer to point to the firstIndex  
+     * of the output sample to be calculated. */ 
+    pOut = pDst + firstIndex; 
+ 
+    /* --------------------------  
+     * Initializations of stage1  
+     * -------------------------*/ 
+ 
+    /* sum = x[0] * y[0]  
+     * sum = x[0] * y[1] + x[1] * y[0]  
+     * ....  
+     * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+     */ 
+ 
+    /* In this stage the MAC operations are increased by 1 for every iteration.  
+       The count variable holds the number of MAC operations performed.  
+       Since the partial convolution starts from firstIndex  
+       Number of Macs to be performed is firstIndex + 1 */ 
+    count = 1u + firstIndex; 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + firstIndex; 
+    py = pSrc2; 
+ 
+    /* ------------------------  
+     * Stage1 process  
+     * ----------------------*/ 
+ 
+    /* The first loop starts here */ 
+    while(blockSize1 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* x[0] * y[srcBLen - 1] */ 
+        sum += (q63_t) * px++ * (*py--); 
+        /* x[1] * y[srcBLen - 2] */ 
+        sum += (q63_t) * px++ * (*py--); 
+        /* x[2] * y[srcBLen - 3] */ 
+        sum += (q63_t) * px++ * (*py--); 
+        /* x[3] * y[srcBLen - 4] */ 
+        sum += (q63_t) * px++ * (*py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the count is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += (q63_t) * px++ * (*py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q31_t) (sum >> 31); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      py = ++pSrc2; 
+      px = pIn1; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize1--; 
+    } 
+ 
+    /* --------------------------  
+     * Initializations of stage2  
+     * ------------------------*/ 
+ 
+    /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+     * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+     * ....  
+     * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+     */ 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    py = pSrc2; 
+ 
+    /* count is index by which the pointer pIn1 to be incremented */ 
+    count = 1u; 
+ 
+    /* -------------------  
+     * Stage2 process  
+     * ------------------*/ 
+ 
+    /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+     * So, to loop unroll over blockSize2,  
+     * srcBLen should be greater than or equal to 4 */ 
+    if(srcBLen >= 4u) 
+    { 
+      /* Loop unroll over blockSize2 */ 
+      blkCnt = ((uint32_t) blockSize2 >> 2u); 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Set all accumulators to zero */ 
+        acc0 = 0; 
+        acc1 = 0; 
+        acc2 = 0; 
+        acc3 = 0; 
+ 
+        /* read x[0], x[1], x[2] samples */ 
+        x0 = *(px++); 
+        x1 = *(px++); 
+        x2 = *(px++); 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        do 
+        { 
+          /* Read y[srcBLen - 1] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[3] sample */ 
+          x3 = *(px++); 
+ 
+          /* Perform the multiply-accumulates */ 
+          /* acc0 +=  x[0] * y[srcBLen - 1] */ 
+          acc0 += (q63_t) x0 *c0; 
+          /* acc1 +=  x[1] * y[srcBLen - 1] */ 
+          acc1 += (q63_t) x1 *c0; 
+          /* acc2 +=  x[2] * y[srcBLen - 1] */ 
+          acc2 += (q63_t) x2 *c0; 
+          /* acc3 +=  x[3] * y[srcBLen - 1] */ 
+          acc3 += (q63_t) x3 *c0; 
+ 
+          /* Read y[srcBLen - 2] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[4] sample */ 
+          x0 = *(px++); 
+ 
+          /* Perform the multiply-accumulate */ 
+          /* acc0 +=  x[1] * y[srcBLen - 2] */ 
+          acc0 += (q63_t) x1 *c0; 
+          /* acc1 +=  x[2] * y[srcBLen - 2] */ 
+          acc1 += (q63_t) x2 *c0; 
+          /* acc2 +=  x[3] * y[srcBLen - 2] */ 
+          acc2 += (q63_t) x3 *c0; 
+          /* acc3 +=  x[4] * y[srcBLen - 2] */ 
+          acc3 += (q63_t) x0 *c0; 
+ 
+          /* Read y[srcBLen - 3] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[5] sample */ 
+          x1 = *(px++); 
+ 
+          /* Perform the multiply-accumulates */ 
+          /* acc0 +=  x[2] * y[srcBLen - 3] */ 
+          acc0 += (q63_t) x2 *c0; 
+          /* acc1 +=  x[3] * y[srcBLen - 2] */ 
+          acc1 += (q63_t) x3 *c0; 
+          /* acc2 +=  x[4] * y[srcBLen - 2] */ 
+          acc2 += (q63_t) x0 *c0; 
+          /* acc3 +=  x[5] * y[srcBLen - 2] */ 
+          acc3 += (q63_t) x1 *c0; 
+ 
+          /* Read y[srcBLen - 4] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[6] sample */ 
+          x2 = *(px++); 
+ 
+          /* Perform the multiply-accumulates */ 
+          /* acc0 +=  x[3] * y[srcBLen - 4] */ 
+          acc0 += (q63_t) x3 *c0; 
+          /* acc1 +=  x[4] * y[srcBLen - 4] */ 
+          acc1 += (q63_t) x0 *c0; 
+          /* acc2 +=  x[5] * y[srcBLen - 4] */ 
+          acc2 += (q63_t) x1 *c0; 
+          /* acc3 +=  x[6] * y[srcBLen - 4] */ 
+          acc3 += (q63_t) x2 *c0; 
+ 
+        } while(--k); 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        while(k > 0u) 
+        { 
+          /* Read y[srcBLen - 5] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[7] sample */ 
+          x3 = *(px++); 
+ 
+          /* Perform the multiply-accumulates */ 
+          /* acc0 +=  x[4] * y[srcBLen - 5] */ 
+          acc0 += (q63_t) x0 *c0; 
+          /* acc1 +=  x[5] * y[srcBLen - 5] */ 
+          acc1 += (q63_t) x1 *c0; 
+          /* acc2 +=  x[6] * y[srcBLen - 5] */ 
+          acc2 += (q63_t) x2 *c0; 
+          /* acc3 +=  x[7] * y[srcBLen - 5] */ 
+          acc3 += (q63_t) x3 *c0; 
+ 
+          /* Reuse the present samples for the next MAC */ 
+          x0 = x1; 
+          x1 = x2; 
+          x2 = x3; 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = (q31_t) (acc0 >> 31); 
+        *pOut++ = (q31_t) (acc1 >> 31); 
+        *pOut++ = (q31_t) (acc2 >> 31); 
+        *pOut++ = (q31_t) (acc3 >> 31); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + (count * 4u); 
+        py = pSrc2; 
+ 
+        /* Increment the pointer pIn1 index, count by 1 */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+ 
+      /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+       ** No loop unrolling is used. */ 
+      blkCnt = (uint32_t) blockSize2 % 0x4u; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0; 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulates */ 
+          sum += (q63_t) * px++ * (*py--); 
+          sum += (q63_t) * px++ * (*py--); 
+          sum += (q63_t) * px++ * (*py--); 
+          sum += (q63_t) * px++ * (*py--); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulate */ 
+          sum += (q63_t) * px++ * (*py--); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = (q31_t) (sum >> 31); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the MAC count */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+    else 
+    { 
+      /* If the srcBLen is not a multiple of 4,  
+       * the blockSize2 loop cannot be unrolled by 4 */ 
+      blkCnt = (uint32_t) blockSize2; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0; 
+ 
+        /* srcBLen number of MACS should be performed */ 
+        k = srcBLen; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulate */ 
+          sum += (q63_t) * px++ * (*py--); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = (q31_t) (sum >> 31); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the MAC count */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+ 
+ 
+    /* --------------------------  
+     * Initializations of stage3  
+     * -------------------------*/ 
+ 
+    /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+     * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+     * ....  
+     * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+     * sum +=  x[srcALen-1] * y[srcBLen-1]  
+     */ 
+ 
+    /* In this stage the MAC operations are decreased by 1 for every iteration.  
+       The blockSize3 variable holds the number of MAC operations performed */ 
+    count = srcBLen - 1u; 
+ 
+    /* Working pointer of inputA */ 
+    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+    px = pSrc1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    py = pSrc2; 
+ 
+    /* -------------------  
+     * Stage3 process  
+     * ------------------*/ 
+ 
+    while(blockSize3 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        sum += (q63_t) * px++ * (*py--); 
+        sum += (q63_t) * px++ * (*py--); 
+        sum += (q63_t) * px++ * (*py--); 
+        sum += (q63_t) * px++ * (*py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += (q63_t) * px++ * (*py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q31_t) (sum >> 31); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = ++pSrc1; 
+      py = pSrc2; 
+ 
+      /* Decrement the MAC count */ 
+      count--; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize3--; 
+ 
+    } 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+ 
+} 
+ 
+/**  
+ * @} end of PartialConv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_partial_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,669 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_partial_q7.c  
+*  
+* Description:	Q7 Partial convolution.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup PartialConv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Partial convolution of Q7 sequences  
+ * @param[in]       *pSrcA points to the first input sequence.  
+ * @param[in]       srcALen length of the first input sequence.  
+ * @param[in]       *pSrcB points to the second input sequence.  
+ * @param[in]       srcBLen length of the second input sequence.  
+ * @param[out]      *pDst points to the location where the output result is written.  
+ * @param[in]       firstIndex is the first output sample to start with.  
+ * @param[in]       numPoints is the number of output points to be computed.  
+ * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].  
+ *  
+ */ 
+ 
+arm_status arm_conv_partial_q7( 
+  q7_t * pSrcA, 
+  uint32_t srcALen, 
+  q7_t * pSrcB, 
+  uint32_t srcBLen, 
+  q7_t * pDst, 
+  uint32_t firstIndex, 
+  uint32_t numPoints) 
+{ 
+  q7_t *pIn1;                                    /* inputA pointer */ 
+  q7_t *pIn2;                                    /* inputB pointer */ 
+  q7_t *pOut = pDst;                             /* output pointer */ 
+  q7_t *px;                                      /* Intermediate inputA pointer */ 
+  q7_t *py;                                      /* Intermediate inputB pointer */ 
+  q7_t *pSrc1, *pSrc2;                           /* Intermediate pointers */ 
+  q31_t sum, acc0, acc1, acc2, acc3;             /* Accumulator */ 
+  q31_t input1, input2; 
+  q15_t in1, in2; 
+  q7_t x0, x1, x2, x3, c0, c1; 
+  uint32_t j, k, count, check, blkCnt; 
+  int32_t blockSize1, blockSize2, blockSize3;    /* loop counter */ 
+  arm_status status; 
+ 
+ 
+  /* Check for range of output samples to be calculated */ 
+  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u)))) 
+  { 
+    /* Set status as ARM_MATH_ARGUMENT_ERROR */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+  } 
+  else 
+  { 
+ 
+    /* The algorithm implementation is based on the lengths of the inputs. */ 
+    /* srcB is always made to slide across srcA. */ 
+    /* So srcBLen is always considered as shorter or equal to srcALen */ 
+    if(srcALen >= srcBLen) 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcA; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcB; 
+    } 
+    else 
+    { 
+      /* Initialization of inputA pointer */ 
+      pIn1 = pSrcB; 
+ 
+      /* Initialization of inputB pointer */ 
+      pIn2 = pSrcA; 
+ 
+      /* srcBLen is always considered as shorter or equal to srcALen */ 
+      j = srcBLen; 
+      srcBLen = srcALen; 
+      srcALen = j; 
+    } 
+ 
+    /* Conditions to check which loopCounter holds  
+     * the first and last indices of the output samples to be calculated. */ 
+    check = firstIndex + numPoints; 
+    blockSize3 = ((int32_t) check - (int32_t) srcALen); 
+    blockSize3 = (blockSize3 > 0) ? blockSize3 : 0; 
+    blockSize1 = (((int32_t) srcBLen - 1) - (int32_t) firstIndex); 
+    blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 : 
+	                                (int32_t) numPoints) : 0; 
+    blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) +  
+		                            (int32_t) firstIndex); 
+    blockSize2 = (blockSize2 > 0) ? blockSize2 : 0; 
+ 
+    /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+    /* The function is internally  
+     * divided into three stages according to the number of multiplications that has to be  
+     * taken place between inputA samples and inputB samples. In the first stage of the  
+     * algorithm, the multiplications increase by one for every iteration.  
+     * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+     * In the third stage of the algorithm, the multiplications decrease by one  
+     * for every iteration. */ 
+ 
+    /* Set the output pointer to point to the firstIndex  
+     * of the output sample to be calculated. */ 
+    pOut = pDst + firstIndex; 
+ 
+    /* --------------------------  
+     * Initializations of stage1  
+     * -------------------------*/ 
+ 
+    /* sum = x[0] * y[0]  
+     * sum = x[0] * y[1] + x[1] * y[0]  
+     * ....  
+     * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+     */ 
+ 
+    /* In this stage the MAC operations are increased by 1 for every iteration.  
+       The count variable holds the number of MAC operations performed.  
+       Since the partial convolution starts from from firstIndex  
+       Number of Macs to be performed is firstIndex + 1 */ 
+    count = 1u + firstIndex; 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + firstIndex; 
+    py = pSrc2; 
+ 
+    /* ------------------------  
+     * Stage1 process  
+     * ----------------------*/ 
+ 
+    /* The first stage starts here */ 
+    while(blockSize1 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* x[0] , x[1] */ 
+        in1 = (q15_t) * px++; 
+        in2 = (q15_t) * px++; 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* y[srcBLen - 1] , y[srcBLen - 2] */ 
+        in1 = (q15_t) * py--; 
+        in2 = (q15_t) * py--; 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* x[0] * y[srcBLen - 1] */ 
+        /* x[1] * y[srcBLen - 2] */ 
+        sum = __SMLAD(input1, input2, sum); 
+ 
+        /* x[2] , x[3] */ 
+        in1 = (q15_t) * px++; 
+        in2 = (q15_t) * px++; 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* y[srcBLen - 3] , y[srcBLen - 4] */ 
+        in1 = (q15_t) * py--; 
+        in2 = (q15_t) * py--; 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* x[2] * y[srcBLen - 3] */ 
+        /* x[3] * y[srcBLen - 4] */ 
+        sum = __SMLAD(input1, input2, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the count is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += ((q31_t) * px++ * *py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q7_t) (__SSAT(sum >> 7, 8)); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      py = ++pSrc2; 
+      px = pIn1; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize1--; 
+    } 
+ 
+    /* --------------------------  
+     * Initializations of stage2  
+     * ------------------------*/ 
+ 
+    /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+     * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+     * ....  
+     * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+     */ 
+ 
+    /* Working pointer of inputA */ 
+    px = pIn1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    py = pSrc2; 
+ 
+    /* count is index by which the pointer pIn1 to be incremented */ 
+    count = 1u; 
+ 
+    /* -------------------  
+     * Stage2 process  
+     * ------------------*/ 
+ 
+    /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+     * So, to loop unroll over blockSize2,  
+     * srcBLen should be greater than or equal to 4 */ 
+    if(srcBLen >= 4u) 
+    { 
+      /* Loop unroll over blockSize2, by 4 */ 
+      blkCnt = ((uint32_t) blockSize2 >> 2u); 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Set all accumulators to zero */ 
+        acc0 = 0; 
+        acc1 = 0; 
+        acc2 = 0; 
+        acc3 = 0; 
+ 
+        /* read x[0], x[1], x[2] samples */ 
+        x0 = *(px++); 
+        x1 = *(px++); 
+        x2 = *(px++); 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        do 
+        { 
+          /* Read y[srcBLen - 1] sample */ 
+          c0 = *(py--); 
+          /* Read y[srcBLen - 2] sample */ 
+          c1 = *(py--); 
+ 
+          /* Read x[3] sample */ 
+          x3 = *(px++); 
+ 
+          /* x[0] and x[1] are packed */ 
+          in1 = (q15_t) x0; 
+          in2 = (q15_t) x1; 
+ 
+          input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* y[srcBLen - 1]   and y[srcBLen - 2] are packed */ 
+          in1 = (q15_t) c0; 
+          in2 = (q15_t) c1; 
+ 
+          input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* acc0 += x[0] * y[srcBLen - 1] + x[1] * y[srcBLen - 2]  */ 
+          acc0 = __SMLAD(input1, input2, acc0); 
+ 
+          /* x[1] and x[2] are packed */ 
+          in1 = (q15_t) x1; 
+          in2 = (q15_t) x2; 
+ 
+          input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* acc1 += x[1] * y[srcBLen - 1] + x[2] * y[srcBLen - 2]  */ 
+          acc1 = __SMLAD(input1, input2, acc1); 
+ 
+          /* x[2] and x[3] are packed */ 
+          in1 = (q15_t) x2; 
+          in2 = (q15_t) x3; 
+ 
+          input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* acc2 += x[2] * y[srcBLen - 1] + x[3] * y[srcBLen - 2]  */ 
+          acc2 = __SMLAD(input1, input2, acc2); 
+ 
+          /* Read x[4] sample */ 
+          x0 = *(px++); 
+ 
+          /* x[3] and x[4] are packed */ 
+          in1 = (q15_t) x3; 
+          in2 = (q15_t) x0; 
+ 
+          input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* acc3 += x[3] * y[srcBLen - 1] + x[4] * y[srcBLen - 2]  */ 
+          acc3 = __SMLAD(input1, input2, acc3); 
+ 
+          /* Read y[srcBLen - 3] sample */ 
+          c0 = *(py--); 
+          /* Read y[srcBLen - 4] sample */ 
+          c1 = *(py--); 
+ 
+          /* Read x[5] sample */ 
+          x1 = *(px++); 
+ 
+          /* x[2] and x[3] are packed */ 
+          in1 = (q15_t) x2; 
+          in2 = (q15_t) x3; 
+ 
+          input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* y[srcBLen - 3] and y[srcBLen - 4] are packed */ 
+          in1 = (q15_t) c0; 
+          in2 = (q15_t) c1; 
+ 
+          input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* acc0 += x[2] * y[srcBLen - 3] + x[3] * y[srcBLen - 4]  */ 
+          acc0 = __SMLAD(input1, input2, acc0); 
+ 
+          /* x[3] and x[4] are packed */ 
+          in1 = (q15_t) x3; 
+          in2 = (q15_t) x0; 
+ 
+          input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* acc1 += x[3] * y[srcBLen - 3] + x[4] * y[srcBLen - 4]  */ 
+          acc1 = __SMLAD(input1, input2, acc1); 
+ 
+          /* x[4] and x[5] are packed */ 
+          in1 = (q15_t) x0; 
+          in2 = (q15_t) x1; 
+ 
+          input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* acc2 += x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4]  */ 
+          acc2 = __SMLAD(input1, input2, acc2); 
+ 
+          /* Read x[6] sample */ 
+          x2 = *(px++); 
+ 
+          /* x[5] and x[6] are packed */ 
+          in1 = (q15_t) x1; 
+          in2 = (q15_t) x2; 
+ 
+          input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* acc3 += x[5] * y[srcBLen - 3] + x[6] * y[srcBLen - 4]  */ 
+          acc3 = __SMLAD(input1, input2, acc3); 
+ 
+        } while(--k); 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        while(k > 0u) 
+        { 
+          /* Read y[srcBLen - 5] sample */ 
+          c0 = *(py--); 
+ 
+          /* Read x[7] sample */ 
+          x3 = *(px++); 
+ 
+          /* Perform the multiply-accumulates */ 
+          /* acc0 +=  x[4] * y[srcBLen - 5] */ 
+          acc0 += ((q31_t) x0 * c0); 
+          /* acc1 +=  x[5] * y[srcBLen - 5] */ 
+          acc1 += ((q31_t) x1 * c0); 
+          /* acc2 +=  x[6] * y[srcBLen - 5] */ 
+          acc2 += ((q31_t) x2 * c0); 
+          /* acc3 +=  x[7] * y[srcBLen - 5] */ 
+          acc3 += ((q31_t) x3 * c0); 
+ 
+          /* Reuse the present samples for the next MAC */ 
+          x0 = x1; 
+          x1 = x2; 
+          x2 = x3; 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = (q7_t) (__SSAT(acc0 >> 7, 8)); 
+        *pOut++ = (q7_t) (__SSAT(acc1 >> 7, 8)); 
+        *pOut++ = (q7_t) (__SSAT(acc2 >> 7, 8)); 
+        *pOut++ = (q7_t) (__SSAT(acc3 >> 7, 8)); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count * 4u; 
+        py = pSrc2; 
+ 
+        /* Increment the pointer pIn1 index, count by 1 */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+ 
+      /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+       ** No loop unrolling is used. */ 
+      blkCnt = (uint32_t) blockSize2 % 0x4u; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0; 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        k = srcBLen >> 2u; 
+ 
+        /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+         ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+        while(k > 0u) 
+        { 
+ 
+          /* Reading two inputs of SrcA buffer and packing */ 
+          in1 = (q15_t) * px++; 
+          in2 = (q15_t) * px++; 
+          input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* Reading two inputs of SrcB buffer and packing */ 
+          in1 = (q15_t) * py--; 
+          in2 = (q15_t) * py--; 
+          input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* Perform the multiply-accumulates */ 
+          sum = __SMLAD(input1, input2, sum); 
+ 
+          /* Reading two inputs of SrcA buffer and packing */ 
+          in1 = (q15_t) * px++; 
+          in2 = (q15_t) * px++; 
+          input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* Reading two inputs of SrcB buffer and packing */ 
+          in1 = (q15_t) * py--; 
+          in2 = (q15_t) * py--; 
+          input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+          /* Perform the multiply-accumulates */ 
+          sum = __SMLAD(input1, input2, sum); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        k = srcBLen % 0x4u; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulates */ 
+          sum += ((q31_t) * px++ * *py--); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = (q7_t) (__SSAT(sum >> 7, 8)); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the pointer pIn1 index, count by 1 */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+    else 
+    { 
+      /* If the srcBLen is not a multiple of 4,  
+       * the blockSize2 loop cannot be unrolled by 4 */ 
+      blkCnt = (uint32_t) blockSize2; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Accumulator is made zero for every iteration */ 
+        sum = 0; 
+ 
+        /* srcBLen number of MACS should be performed */ 
+        k = srcBLen; 
+ 
+        while(k > 0u) 
+        { 
+          /* Perform the multiply-accumulate */ 
+          sum += ((q31_t) * px++ * *py--); 
+ 
+          /* Decrement the loop counter */ 
+          k--; 
+        } 
+ 
+        /* Store the result in the accumulator in the destination buffer. */ 
+        *pOut++ = (q7_t) (__SSAT(sum >> 7, 8)); 
+ 
+        /* Update the inputA and inputB pointers for next MAC calculation */ 
+        px = pIn1 + count; 
+        py = pSrc2; 
+ 
+        /* Increment the MAC count */ 
+        count++; 
+ 
+        /* Decrement the loop counter */ 
+        blkCnt--; 
+      } 
+    } 
+ 
+ 
+    /* --------------------------  
+     * Initializations of stage3  
+     * -------------------------*/ 
+ 
+    /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+     * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+     * ....  
+     * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+     * sum +=  x[srcALen-1] * y[srcBLen-1]  
+     */ 
+ 
+    /* In this stage the MAC operations are decreased by 1 for every iteration.  
+       The count variable holds the number of MAC operations performed */ 
+    count = srcBLen - 1u; 
+ 
+    /* Working pointer of inputA */ 
+    pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+    px = pSrc1; 
+ 
+    /* Working pointer of inputB */ 
+    pSrc2 = pIn2 + (srcBLen - 1u); 
+    py = pSrc2; 
+ 
+    /* -------------------  
+     * Stage3 process  
+     * ------------------*/ 
+ 
+    while(blockSize3 > 0) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = count >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Reading two inputs, x[srcALen - srcBLen + 1] and x[srcALen - srcBLen + 2] of SrcA buffer and packing */ 
+        in1 = (q15_t) * px++; 
+        in2 = (q15_t) * px++; 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* Reading two inputs, y[srcBLen - 1] and y[srcBLen - 2] of SrcB buffer and packing */ 
+        in1 = (q15_t) * py--; 
+        in2 = (q15_t) * py--; 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */ 
+        /* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */ 
+        sum = __SMLAD(input1, input2, sum); 
+ 
+        /* Reading two inputs, x[srcALen - srcBLen + 3] and x[srcALen - srcBLen + 4] of SrcA buffer and packing */ 
+        in1 = (q15_t) * px++; 
+        in2 = (q15_t) * px++; 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* Reading two inputs, y[srcBLen - 3] and y[srcBLen - 4] of SrcB buffer and packing */ 
+        in1 = (q15_t) * py--; 
+        in2 = (q15_t) * py--; 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */ 
+        /* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */ 
+        sum = __SMLAD(input1, input2, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the count is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = count % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        /* sum +=  x[srcALen-1] * y[srcBLen-1] */ 
+        sum += ((q31_t) * px++ * *py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q7_t) (__SSAT(sum >> 7, 8)); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = ++pSrc1; 
+      py = pSrc2; 
+ 
+      /* Decrement the MAC count */ 
+      count--; 
+ 
+      /* Decrement the loop counter */ 
+      blockSize3--; 
+ 
+    } 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+ 
+} 
+ 
+/**  
+ * @} end of PartialConv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,656 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_q15.c  
+*  
+* Description:	Q15 Convolution.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup Conv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Convolution of Q15 sequences.  
+ * @param[in] *pSrcA points to the first input sequence.  
+ * @param[in] srcALen length of the first input sequence.  
+ * @param[in] *pSrcB points to the second input sequence.  
+ * @param[in] srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * Both inputs are in 1.15 format and multiplications yield a 2.30 result.  
+ * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.  
+ * This approach provides 33 guard bits and there is no risk of overflow.  
+ * The 34.30 result is then truncated to 34.15 format by discarding the low 15 bits and then saturated to 1.15 format.  
+ *  
+ * \par  
+ * Refer to <code>arm_conv_fast_q15()</code> for a faster but less precise version of this function.  
+ */ 
+ 
+void arm_conv_q15( 
+  q15_t * pSrcA, 
+  uint32_t srcALen, 
+  q15_t * pSrcB, 
+  uint32_t srcBLen, 
+  q15_t * pDst) 
+{ 
+  q15_t *pIn1;                                   /* inputA pointer */ 
+  q15_t *pIn2;                                   /* inputB pointer */ 
+  q15_t *pOut = pDst;                            /* output pointer */ 
+  q63_t sum, acc0, acc1, acc2, acc3;             /* Accumulator */ 
+  q15_t *px;                                     /* Intermediate inputA pointer  */ 
+  q15_t *py;                                     /* Intermediate inputB pointer  */ 
+  q15_t *pSrc1, *pSrc2;                          /* Intermediate pointers */ 
+  q31_t x0, x1, x2, x3, c0;                      /* Temporary variables to hold state and coefficient values */ 
+  uint32_t blockSize1, blockSize2, blockSize3, j, k, count, blkCnt;     /* loop counter */ 
+  q31_t *pb;                                     /* 32 bit pointer for inputB buffer */ 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcA; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcB; 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcB; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcA; 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+  } 
+ 
+  /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+  /* The function is internally  
+   * divided into three stages according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first stage of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+   * In the third stage of the algorithm, the multiplications decrease by one  
+   * for every iteration. */ 
+ 
+  /* The algorithm is implemented in three stages.  
+     The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = srcALen - (srcBLen - 1u); 
+ 
+  /* --------------------------  
+   * Initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[0]  
+   * sum = x[0] * y[1] + x[1] * y[0]  
+   * ....  
+   * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* For loop unrolling by 4, this stage is divided into two. */ 
+  /* First part of this stage computes the MAC operations less than 4 */ 
+  /* Second part of this stage computes the MAC operations greater than or equal to 4 */ 
+ 
+  /* The first part of the stage starts here */ 
+  while((count < 4u) && (blockSize1 > 0u)) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Loop over number of MAC operations between  
+     * inputA samples and inputB samples */ 
+    k = count; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum = __SMLALD(*px++, *py--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q15_t) (__SSAT((sum >> 15), 16)); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pIn2 + count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* The second part of the stage starts here */ 
+  /* The internal loop, over count, is unrolled by 4 */ 
+  /* To, read the last two inputB samples using SIMD:  
+   * y[srcBLen] and y[srcBLen-1] coefficients, py is decremented by 1 */ 
+  py = py - 1; 
+ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* x[0], x[1] are multiplied with y[srcBLen - 1], y[srcBLen - 2] respectively */ 
+      sum = __SMLALDX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+      /* x[2], x[3] are multiplied with y[srcBLen - 3], y[srcBLen - 4] respectively */ 
+      sum = __SMLALDX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* For the next MAC operations, the pointer py is used without SIMD  
+     * So, py is incremented by 1 */ 
+    py = py + 1u; 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum = __SMLALD(*px++, *py--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q15_t) (__SSAT((sum >> 15), 16)); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pIn2 + (count - 1u); 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+   * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  py = pSrc2; 
+ 
+  /* Initialize inputB pointer of type q31 */ 
+  pb = (q31_t *) (py - 1u); 
+ 
+  /* count is the index by which the pointer pIn1 to be incremented */ 
+  count = 1u; 
+ 
+ 
+  /* --------------------  
+   * Stage2 process  
+   * -------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4 */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0; 
+      acc1 = 0; 
+      acc2 = 0; 
+      acc3 = 0; 
+ 
+ 
+      /* read x[0], x[1] samples */ 
+      x0 = *(q31_t *) (px++); 
+      /* read x[1], x[2] samples */ 
+      x1 = *(q31_t *) (px++); 
+ 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read the last two inputB samples using SIMD:  
+         * y[srcBLen - 1] and y[srcBLen - 2] */ 
+        c0 = *(pb--); 
+ 
+        /* acc0 +=  x[0] * y[srcBLen - 1] + x[1] * y[srcBLen - 2] */ 
+        acc0 = __SMLALDX(x0, c0, acc0); 
+ 
+        /* acc1 +=  x[1] * y[srcBLen - 1] + x[2] * y[srcBLen - 2] */ 
+        acc1 = __SMLALDX(x1, c0, acc1); 
+ 
+        /* Read x[2], x[3] */ 
+        x2 = *(q31_t *) (px++); 
+ 
+        /* Read x[3], x[4] */ 
+        x3 = *(q31_t *) (px++); 
+ 
+        /* acc2 +=  x[2] * y[srcBLen - 1] + x[3] * y[srcBLen - 2] */ 
+        acc2 = __SMLALDX(x2, c0, acc2); 
+ 
+        /* acc3 +=  x[3] * y[srcBLen - 1] + x[4] * y[srcBLen - 2] */ 
+        acc3 = __SMLALDX(x3, c0, acc3); 
+ 
+        /* Read y[srcBLen - 3] and y[srcBLen - 4] */ 
+        c0 = *(pb--); 
+ 
+        /* acc0 +=  x[2] * y[srcBLen - 3] + x[3] * y[srcBLen - 4] */ 
+        acc0 = __SMLALDX(x2, c0, acc0); 
+ 
+        /* acc1 +=  x[3] * y[srcBLen - 3] + x[4] * y[srcBLen - 4] */ 
+        acc1 = __SMLALDX(x3, c0, acc1); 
+ 
+        /* Read x[4], x[5] */ 
+        x0 = *(q31_t *) (px++); 
+ 
+        /* Read x[5], x[6] */ 
+        x1 = *(q31_t *) (px++); 
+ 
+        /* acc2 +=  x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4] */ 
+        acc2 = __SMLALDX(x0, c0, acc2); 
+ 
+        /* acc3 +=  x[5] * y[srcBLen - 3] + x[6] * y[srcBLen - 4] */ 
+        acc3 = __SMLALDX(x1, c0, acc3); 
+ 
+      } while(--k); 
+ 
+      /* For the next MAC operations, SIMD is not used  
+       * So, the 16 bit pointer if inputB, py is updated */ 
+      py = (q15_t *) pb; 
+      py = py + 1; 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      if(k == 1u) 
+      { 
+        /* Read y[srcBLen - 5] */ 
+        c0 = *(py); 
+ 
+        /* Read x[7] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLALD(x0, c0, acc0); 
+        acc1 = __SMLALD(x1, c0, acc1); 
+        acc2 = __SMLALDX(x1, c0, acc2); 
+        acc3 = __SMLALDX(x3, c0, acc3); 
+      } 
+ 
+      if(k == 2u) 
+      { 
+        /* Read y[srcBLen - 5], y[srcBLen - 6] */ 
+        c0 = *(pb); 
+ 
+        /* Read x[7], x[8] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Read x[9] */ 
+        x2 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLALDX(x0, c0, acc0); 
+        acc1 = __SMLALDX(x1, c0, acc1); 
+        acc2 = __SMLALDX(x3, c0, acc2); 
+        acc3 = __SMLALDX(x2, c0, acc3); 
+      } 
+ 
+      if(k == 3u) 
+      { 
+        /* Read y[srcBLen - 5], y[srcBLen - 6] */ 
+        c0 = *pb--; 
+ 
+        /* Read x[7], x[8] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Read x[9] */ 
+        x2 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLALDX(x0, c0, acc0); 
+        acc1 = __SMLALDX(x1, c0, acc1); 
+        acc2 = __SMLALDX(x3, c0, acc2); 
+        acc3 = __SMLALDX(x2, c0, acc3); 
+ 
+        /* Read y[srcBLen - 7] */ 
+        c0 = (q15_t) (*pb >> 16); 
+ 
+        /* Read x[10] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLALDX(x1, c0, acc0); 
+        acc1 = __SMLALD(x2, c0, acc1); 
+        acc2 = __SMLALDX(x2, c0, acc2); 
+        acc3 = __SMLALDX(x3, c0, acc3); 
+      } 
+ 
+      /* Store the results in the accumulators in the destination buffer. */ 
+      *__SIMD32(pOut)++ = 
+        __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16); 
+      *__SIMD32(pOut)++ = 
+        __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + (count * 4u); 
+      py = pSrc2; 
+      pb = (q31_t *) (py - 1); 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += (q63_t) ((q31_t) * px++ * *py--); 
+        sum += (q63_t) ((q31_t) * px++ * *py--); 
+        sum += (q63_t) ((q31_t) * px++ * *py--); 
+        sum += (q63_t) ((q31_t) * px++ * *py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += (q63_t) ((q31_t) * px++ * *py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (__SSAT(sum >> 15, 16)); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* srcBLen number of MACS should be performed */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += (q63_t) ((q31_t) * px++ * *py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q15_t) (__SSAT(sum >> 15, 16)); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+   * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+   * sum +=  x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The blockSize3 variable holds the number of MAC operations performed */ 
+ 
+  blockSize3 = srcBLen - 1u; 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  pIn2 = pSrc2 - 1u; 
+  py = pIn2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  /* For loop unrolling by 4, this stage is divided into two. */ 
+  /* First part of this stage computes the MAC operations greater than 4 */ 
+  /* Second part of this stage computes the MAC operations less than or equal to 4 */ 
+ 
+  /* The first part of the stage starts here */ 
+  j = blockSize3 >> 2u; 
+ 
+  while((j > 0u) && (blockSize3 > 0u)) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = blockSize3 >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[srcALen - srcBLen + 1], x[srcALen - srcBLen + 2] are multiplied  
+       * with y[srcBLen - 1], y[srcBLen - 2] respectively */ 
+      sum = __SMLALDX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+      /* x[srcALen - srcBLen + 3], x[srcALen - srcBLen + 4] are multiplied  
+       * with y[srcBLen - 3], y[srcBLen - 4] respectively */ 
+      sum = __SMLALDX(*__SIMD32(px)++, *__SIMD32(py)--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* For the next MAC operations, the pointer py is used without SIMD  
+     * So, py is incremented by 1 */ 
+    py = py + 1u; 
+ 
+    /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = blockSize3 % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* sum += x[srcALen - srcBLen + 5] * y[srcBLen - 5] */ 
+      sum = __SMLALD(*px++, *py--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q15_t) (__SSAT((sum >> 15), 16)); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pIn2; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+ 
+    j--; 
+  } 
+ 
+  /* The second part of the stage starts here */ 
+  /* SIMD is not used for the next MAC operations,  
+   * so pointer py is updated to read only one sample at a time */ 
+  py = py + 1u; 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = blockSize3; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* sum +=  x[srcALen-1] * y[srcBLen-1] */ 
+      sum = __SMLALD(*px++, *py--, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q15_t) (__SSAT((sum >> 15), 16)); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pSrc2; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Conv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,542 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_q31.c  
+*  
+* Description:	Q31 Convolution.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup Conv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Convolution of Q31 sequences.  
+ * @param[in] *pSrcA points to the first input sequence.  
+ * @param[in] srcALen length of the first input sequence.  
+ * @param[in] *pSrcB points to the second input sequence.  
+ * @param[in] srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * There is no saturation on intermediate additions.  
+ * Thus, if the accumulator overflows it wraps around and distorts the result.  
+ * The input signals should be scaled down to avoid intermediate overflows.  
+ * Scale down the inputs by log2(min(srcALen, srcBLen)) (log2 is read as log to the base 2) times to avoid overflows,  
+ * as maximum of min(srcALen, srcBLen) number of additions are carried internally.  
+ * The 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result.  
+ *  
+ * \par  
+ * See <code>arm_conv_fast_q31()</code> for a faster but less precise implementation of this function.  
+ */ 
+ 
+void arm_conv_q31( 
+  q31_t * pSrcA, 
+  uint32_t srcALen, 
+  q31_t * pSrcB, 
+  uint32_t srcBLen, 
+  q31_t * pDst) 
+{ 
+  q31_t *pIn1;                                   /* inputA pointer */ 
+  q31_t *pIn2;                                   /* inputB pointer */ 
+  q31_t *pOut = pDst;                            /* output pointer */ 
+  q31_t *px;                                     /* Intermediate inputA pointer  */ 
+  q31_t *py;                                     /* Intermediate inputB pointer  */ 
+  q31_t *pSrc1, *pSrc2;                          /* Intermediate pointers */ 
+  q63_t sum;                                     /* Accumulator */ 
+  q63_t acc0, acc1, acc2, acc3;                  /* Accumulator */ 
+  q31_t x0, x1, x2, x3, c0;                      /* Temporary variables to hold state and coefficient values */ 
+  uint32_t j, k, count, blkCnt, blockSize1, blockSize2, blockSize3;     /* loop counter */ 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcA; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcB; 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = (q31_t *) pSrcB; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = (q31_t *) pSrcA; 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+  } 
+ 
+  /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+  /* The function is internally  
+   * divided into three stages according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first stage of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+   * In the third stage of the algorithm, the multiplications decrease by one  
+   * for every iteration. */ 
+ 
+  /* The algorithm is implemented in three stages.  
+     The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = srcALen - (srcBLen - 1u); 
+  blockSize3 = blockSize1; 
+ 
+  /* --------------------------  
+   * Initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[0]  
+   * sum = x[0] * y[1] + x[1] * y[0]  
+   * ....  
+   * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* The first stage starts here */ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[0] * y[srcBLen - 1] */ 
+      sum += (q63_t) * px++ * (*py--); 
+      /* x[1] * y[srcBLen - 2] */ 
+      sum += (q63_t) * px++ * (*py--); 
+      /* x[2] * y[srcBLen - 3] */ 
+      sum += (q63_t) * px++ * (*py--); 
+      /* x[3] * y[srcBLen - 4] */ 
+      sum += (q63_t) * px++ * (*py--); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      sum += (q63_t) * px++ * (*py--); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q31_t) (sum >> 31); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pIn2 + count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+   * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  py = pSrc2; 
+ 
+  /* count is index by which the pointer pIn1 to be incremented */ 
+  count = 1u; 
+ 
+  /* -------------------  
+   * Stage2 process  
+   * ------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4 */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0; 
+      acc1 = 0; 
+      acc2 = 0; 
+      acc3 = 0; 
+ 
+      /* read x[0], x[1], x[2] samples */ 
+      x0 = *(px++); 
+      x1 = *(px++); 
+      x2 = *(px++); 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read y[srcBLen - 1] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[3] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[0] * y[srcBLen - 1] */ 
+        acc0 += ((q63_t) x0 * c0); 
+        /* acc1 +=  x[1] * y[srcBLen - 1] */ 
+        acc1 += ((q63_t) x1 * c0); 
+        /* acc2 +=  x[2] * y[srcBLen - 1] */ 
+        acc2 += ((q63_t) x2 * c0); 
+        /* acc3 +=  x[3] * y[srcBLen - 1] */ 
+        acc3 += ((q63_t) x3 * c0); 
+ 
+        /* Read y[srcBLen - 2] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[4] sample */ 
+        x0 = *(px++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        /* acc0 +=  x[1] * y[srcBLen - 2] */ 
+        acc0 += ((q63_t) x1 * c0); 
+        /* acc1 +=  x[2] * y[srcBLen - 2] */ 
+        acc1 += ((q63_t) x2 * c0); 
+        /* acc2 +=  x[3] * y[srcBLen - 2] */ 
+        acc2 += ((q63_t) x3 * c0); 
+        /* acc3 +=  x[4] * y[srcBLen - 2] */ 
+        acc3 += ((q63_t) x0 * c0); 
+ 
+        /* Read y[srcBLen - 3] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[5] sample */ 
+        x1 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[2] * y[srcBLen - 3] */ 
+        acc0 += ((q63_t) x2 * c0); 
+        /* acc1 +=  x[3] * y[srcBLen - 2] */ 
+        acc1 += ((q63_t) x3 * c0); 
+        /* acc2 +=  x[4] * y[srcBLen - 2] */ 
+        acc2 += ((q63_t) x0 * c0); 
+        /* acc3 +=  x[5] * y[srcBLen - 2] */ 
+        acc3 += ((q63_t) x1 * c0); 
+ 
+        /* Read y[srcBLen - 4] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[6] sample */ 
+        x2 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[3] * y[srcBLen - 4] */ 
+        acc0 += ((q63_t) x3 * c0); 
+        /* acc1 +=  x[4] * y[srcBLen - 4] */ 
+        acc1 += ((q63_t) x0 * c0); 
+        /* acc2 +=  x[5] * y[srcBLen - 4] */ 
+        acc2 += ((q63_t) x1 * c0); 
+        /* acc3 +=  x[6] * y[srcBLen - 4] */ 
+        acc3 += ((q63_t) x2 * c0); 
+ 
+      } while(--k); 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Read y[srcBLen - 5] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[7] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[4] * y[srcBLen - 5] */ 
+        acc0 += ((q63_t) x0 * c0); 
+        /* acc1 +=  x[5] * y[srcBLen - 5] */ 
+        acc1 += ((q63_t) x1 * c0); 
+        /* acc2 +=  x[6] * y[srcBLen - 5] */ 
+        acc2 += ((q63_t) x2 * c0); 
+        /* acc3 +=  x[7] * y[srcBLen - 5] */ 
+        acc3 += ((q63_t) x3 * c0); 
+ 
+        /* Reuse the present samples for the next MAC */ 
+        x0 = x1; 
+        x1 = x2; 
+        x2 = x3; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the results in the accumulators in the destination buffer. */ 
+      *pOut++ = (q31_t) (acc0 >> 31); 
+      *pOut++ = (q31_t) (acc1 >> 31); 
+      *pOut++ = (q31_t) (acc2 >> 31); 
+      *pOut++ = (q31_t) (acc3 >> 31); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + (count * 4u); 
+      py = pSrc2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += (q63_t) * px++ * (*py--); 
+        sum += (q63_t) * px++ * (*py--); 
+        sum += (q63_t) * px++ * (*py--); 
+        sum += (q63_t) * px++ * (*py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += (q63_t) * px++ * (*py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q31_t) (sum >> 31); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* srcBLen number of MACS should be performed */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += (q63_t) * px++ * (*py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q31_t) (sum >> 31); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+   * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+   * sum +=  x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The blockSize3 variable holds the number of MAC operations performed */ 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  py = pSrc2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = blockSize3 >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */ 
+      sum += (q63_t) * px++ * (*py--); 
+      /* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */ 
+      sum += (q63_t) * px++ * (*py--); 
+      /* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */ 
+      sum += (q63_t) * px++ * (*py--); 
+      /* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */ 
+      sum += (q63_t) * px++ * (*py--); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = blockSize3 % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      sum += (q63_t) * px++ * (*py--); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q31_t) (sum >> 31); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pSrc2; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Conv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_conv_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,639 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_conv_q7.c  
+*  
+* Description:	Q7 Convolution.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup Conv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Convolution of Q7 sequences.  
+ * @param[in] *pSrcA points to the first input sequence.  
+ * @param[in] srcALen length of the first input sequence.  
+ * @param[in] *pSrcB points to the second input sequence.  
+ * @param[in] srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using a 32-bit internal accumulator.  
+ * Both the inputs are represented in 1.7 format and multiplications yield a 2.14 result.  
+ * The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format.  
+ * This approach provides 17 guard bits and there is no risk of overflow as long as <code>max(srcALen, srcBLen)<131072</code>.  
+ * The 18.14 result is then truncated to 18.7 format by discarding the low 7 bits and then saturated to 1.7 format.  
+ */ 
+ 
+void arm_conv_q7( 
+  q7_t * pSrcA, 
+  uint32_t srcALen, 
+  q7_t * pSrcB, 
+  uint32_t srcBLen, 
+  q7_t * pDst) 
+{ 
+  q7_t *pIn1;                                    /* inputA pointer */ 
+  q7_t *pIn2;                                    /* inputB pointer */ 
+  q7_t *pOut = pDst;                             /* output pointer */ 
+  q7_t *px;                                      /* Intermediate inputA pointer */ 
+  q7_t *py;                                      /* Intermediate inputB pointer */ 
+  q7_t *pSrc1, *pSrc2;                           /* Intermediate pointers */ 
+  q7_t x0, x1, x2, x3, c0, c1;                   /* Temporary variables to hold state and coefficient values */ 
+  q31_t sum, acc0, acc1, acc2, acc3;             /* Accumulator */ 
+  q31_t input1, input2;                          /* Temporary input variables */ 
+  q15_t in1, in2;                                /* Temporary input variables */ 
+  uint32_t j, k, count, blkCnt, blockSize1, blockSize2, blockSize3;     /* loop counter */ 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcA; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcB; 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcB; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcA; 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+  } 
+ 
+  /* conv(x,y) at n = x[n] * y[0] + x[n-1] * y[1] + x[n-2] * y[2] + ...+ x[n-N+1] * y[N -1] */ 
+  /* The function is internally  
+   * divided into three stages according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first stage of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second stage of the algorithm, srcBLen number of multiplications are done.  
+   * In the third stage of the algorithm, the multiplications decrease by one  
+   * for every iteration. */ 
+ 
+  /* The algorithm is implemented in three stages.  
+     The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = (srcALen - srcBLen) + 1u; 
+  blockSize3 = blockSize1; 
+ 
+  /* --------------------------  
+   * Initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[0]  
+   * sum = x[0] * y[1] + x[1] * y[0]  
+   * ....  
+   * sum = x[0] * y[srcBlen - 1] + x[1] * y[srcBlen - 2] +...+ x[srcBLen - 1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* The first stage starts here */ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[0] , x[1] */ 
+      in1 = (q15_t) * px++; 
+      in2 = (q15_t) * px++; 
+      input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+      /* y[srcBLen - 1] , y[srcBLen - 2] */ 
+      in1 = (q15_t) * py--; 
+      in2 = (q15_t) * py--; 
+      input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+      /* x[0] * y[srcBLen - 1] */ 
+      /* x[1] * y[srcBLen - 2] */ 
+      sum = __SMLAD(input1, input2, sum); 
+ 
+      /* x[2] , x[3] */ 
+      in1 = (q15_t) * px++; 
+      in2 = (q15_t) * px++; 
+      input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+      /* y[srcBLen - 3] , y[srcBLen - 4] */ 
+      in1 = (q15_t) * py--; 
+      in2 = (q15_t) * py--; 
+      input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+      /* x[2] * y[srcBLen - 3] */ 
+      /* x[3] * y[srcBLen - 4] */ 
+      sum = __SMLAD(input1, input2, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum += ((q15_t) * px++ * *py--); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q7_t) (__SSAT(sum >> 7u, 8)); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pIn2 + count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBLen-1] + x[1] * y[srcBLen-2] +...+ x[srcBLen-1] * y[0]  
+   * sum = x[1] * y[srcBLen-1] + x[2] * y[srcBLen-2] +...+ x[srcBLen] * y[0]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[srcBLen-1] + x[srcALen] * y[srcBLen-2] +...+ x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  py = pSrc2; 
+ 
+  /* count is index by which the pointer pIn1 to be incremented */ 
+  count = 1u; 
+ 
+  /* -------------------  
+   * Stage2 process  
+   * ------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4 */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0; 
+      acc1 = 0; 
+      acc2 = 0; 
+      acc3 = 0; 
+ 
+      /* read x[0], x[1], x[2] samples */ 
+      x0 = *(px++); 
+      x1 = *(px++); 
+      x2 = *(px++); 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read y[srcBLen - 1] sample */ 
+        c0 = *(py--); 
+        /* Read y[srcBLen - 2] sample */ 
+        c1 = *(py--); 
+ 
+        /* Read x[3] sample */ 
+        x3 = *(px++); 
+ 
+        /* x[0] and x[1] are packed */ 
+        in1 = (q15_t) x0; 
+        in2 = (q15_t) x1; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* y[srcBLen - 1]   and y[srcBLen - 2] are packed */ 
+        in1 = (q15_t) c0; 
+        in2 = (q15_t) c1; 
+ 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* acc0 += x[0] * y[srcBLen - 1] + x[1] * y[srcBLen - 2]  */ 
+        acc0 = __SMLAD(input1, input2, acc0); 
+ 
+        /* x[1] and x[2] are packed */ 
+        in1 = (q15_t) x1; 
+        in2 = (q15_t) x2; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* acc1 += x[1] * y[srcBLen - 1] + x[2] * y[srcBLen - 2]  */ 
+        acc1 = __SMLAD(input1, input2, acc1); 
+ 
+        /* x[2] and x[3] are packed */ 
+        in1 = (q15_t) x2; 
+        in2 = (q15_t) x3; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* acc2 += x[2] * y[srcBLen - 1] + x[3] * y[srcBLen - 2]  */ 
+        acc2 = __SMLAD(input1, input2, acc2); 
+ 
+        /* Read x[4] sample */ 
+        x0 = *(px++); 
+ 
+        /* x[3] and x[4] are packed */ 
+        in1 = (q15_t) x3; 
+        in2 = (q15_t) x0; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* acc3 += x[3] * y[srcBLen - 1] + x[4] * y[srcBLen - 2]  */ 
+        acc3 = __SMLAD(input1, input2, acc3); 
+ 
+        /* Read y[srcBLen - 3] sample */ 
+        c0 = *(py--); 
+        /* Read y[srcBLen - 4] sample */ 
+        c1 = *(py--); 
+ 
+        /* Read x[5] sample */ 
+        x1 = *(px++); 
+ 
+        /* x[2] and x[3] are packed */ 
+        in1 = (q15_t) x2; 
+        in2 = (q15_t) x3; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* y[srcBLen - 3] and y[srcBLen - 4] are packed */ 
+        in1 = (q15_t) c0; 
+        in2 = (q15_t) c1; 
+ 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* acc0 += x[2] * y[srcBLen - 3] + x[3] * y[srcBLen - 4]  */ 
+        acc0 = __SMLAD(input1, input2, acc0); 
+ 
+        /* x[3] and x[4] are packed */ 
+        in1 = (q15_t) x3; 
+        in2 = (q15_t) x0; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* acc1 += x[3] * y[srcBLen - 3] + x[4] * y[srcBLen - 4]  */ 
+        acc1 = __SMLAD(input1, input2, acc1); 
+ 
+        /* x[4] and x[5] are packed */ 
+        in1 = (q15_t) x0; 
+        in2 = (q15_t) x1; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* acc2 += x[4] * y[srcBLen - 3] + x[5] * y[srcBLen - 4]  */ 
+        acc2 = __SMLAD(input1, input2, acc2); 
+ 
+        /* Read x[6] sample */ 
+        x2 = *(px++); 
+ 
+        /* x[5] and x[6] are packed */ 
+        in1 = (q15_t) x1; 
+        in2 = (q15_t) x2; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* acc3 += x[5] * y[srcBLen - 3] + x[6] * y[srcBLen - 4]  */ 
+        acc3 = __SMLAD(input1, input2, acc3); 
+ 
+      } while(--k); 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Read y[srcBLen - 5] sample */ 
+        c0 = *(py--); 
+ 
+        /* Read x[7] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[4] * y[srcBLen - 5] */ 
+        acc0 += ((q15_t) x0 * c0); 
+        /* acc1 +=  x[5] * y[srcBLen - 5] */ 
+        acc1 += ((q15_t) x1 * c0); 
+        /* acc2 +=  x[6] * y[srcBLen - 5] */ 
+        acc2 += ((q15_t) x2 * c0); 
+        /* acc3 +=  x[7] * y[srcBLen - 5] */ 
+        acc3 += ((q15_t) x3 * c0); 
+ 
+        /* Reuse the present samples for the next MAC */ 
+        x0 = x1; 
+        x1 = x2; 
+        x2 = x3; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q7_t) (__SSAT(acc0 >> 7u, 8)); 
+      *pOut++ = (q7_t) (__SSAT(acc1 >> 7u, 8)); 
+      *pOut++ = (q7_t) (__SSAT(acc2 >> 7u, 8)); 
+      *pOut++ = (q7_t) (__SSAT(acc3 >> 7u, 8)); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + (count * 4u); 
+      py = pSrc2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+ 
+        /* Reading two inputs of SrcA buffer and packing */ 
+        in1 = (q15_t) * px++; 
+        in2 = (q15_t) * px++; 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* Reading two inputs of SrcB buffer and packing */ 
+        in1 = (q15_t) * py--; 
+        in2 = (q15_t) * py--; 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* Perform the multiply-accumulates */ 
+        sum = __SMLAD(input1, input2, sum); 
+ 
+        /* Reading two inputs of SrcA buffer and packing */ 
+        in1 = (q15_t) * px++; 
+        in2 = (q15_t) * px++; 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* Reading two inputs of SrcB buffer and packing */ 
+        in1 = (q15_t) * py--; 
+        in2 = (q15_t) * py--; 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+        /* Perform the multiply-accumulates */ 
+        sum = __SMLAD(input1, input2, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += ((q15_t) * px++ * *py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q7_t) (__SSAT(sum >> 7u, 8)); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* srcBLen number of MACS should be performed */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += ((q15_t) * px++ * *py--); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut++ = (q7_t) (__SSAT(sum >> 7u, 8)); 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pSrc2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[srcBLen-1] + x[srcALen-srcBLen+2] * y[srcBLen-2] +...+ x[srcALen-1] * y[1]  
+   * sum += x[srcALen-srcBLen+2] * y[srcBLen-1] + x[srcALen-srcBLen+3] * y[srcBLen-2] +...+ x[srcALen-1] * y[2]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[srcBLen-1] + x[srcALen-1] * y[srcBLen-2]  
+   * sum +=  x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The blockSize3 variable holds the number of MAC operations performed */ 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = pIn1 + (srcALen - (srcBLen - 1u)); 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc2 = pIn2 + (srcBLen - 1u); 
+  py = pSrc2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = blockSize3 >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* Reading two inputs, x[srcALen - srcBLen + 1] and x[srcALen - srcBLen + 2] of SrcA buffer and packing */ 
+      in1 = (q15_t) * px++; 
+      in2 = (q15_t) * px++; 
+      input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+      /* Reading two inputs, y[srcBLen - 1] and y[srcBLen - 2] of SrcB buffer and packing */ 
+      in1 = (q15_t) * py--; 
+      in2 = (q15_t) * py--; 
+      input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+      /* sum += x[srcALen - srcBLen + 1] * y[srcBLen - 1] */ 
+      /* sum += x[srcALen - srcBLen + 2] * y[srcBLen - 2] */ 
+      sum = __SMLAD(input1, input2, sum); 
+ 
+      /* Reading two inputs, x[srcALen - srcBLen + 3] and x[srcALen - srcBLen + 4] of SrcA buffer and packing */ 
+      in1 = (q15_t) * px++; 
+      in2 = (q15_t) * px++; 
+      input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+      /* Reading two inputs, y[srcBLen - 3] and y[srcBLen - 4] of SrcB buffer and packing */ 
+      in1 = (q15_t) * py--; 
+      in2 = (q15_t) * py--; 
+      input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16u); 
+ 
+      /* sum += x[srcALen - srcBLen + 3] * y[srcBLen - 3] */ 
+      /* sum += x[srcALen - srcBLen + 4] * y[srcBLen - 4] */ 
+      sum = __SMLAD(input1, input2, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the blockSize3 is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = blockSize3 % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum += ((q15_t) * px++ * *py--); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut++ = (q7_t) (__SSAT(sum >> 7u, 8)); 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pSrc2; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Conv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_correlate_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,617 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_correlate_f32.c  
+*  
+* Description:	 Correlation for floating-point sequences.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup Corr Correlation  
+ *  
+ * Correlation is a mathematical operation that is similar to convolution.  
+ * As with convolution, correlation uses two signals to produce a third signal.  
+ * The underlying algorithms in correlation and convolution are identical except that one of the inputs is flipped in convolution.  
+ * Correlation is commonly used to measure the similarity between two signals.  
+ * It has applications in pattern recognition, cryptanalysis, and searching.  
+ * The CMSIS library provides correlation functions for Q7, Q15, Q31 and floating-point data types.  
+ * Fast versions of the Q15 and Q31 functions are also provided.  
+ *  
+ * \par Algorithm  
+ * Let <code>a[n]</code> and <code>b[n]</code> be sequences of length <code>srcALen</code> and <code>srcBLen</code> samples respectively.  
+ * The convolution of the two signals is denoted by  
+ * <pre>  
+ *                   c[n] = a[n] * b[n]  
+ * </pre>  
+ * In correlation, one of the signals is flipped in time  
+ * <pre>  
+ *                   c[n] = a[n] * b[-n]  
+ * </pre>  
+ *  
+ * \par  
+ * and this is mathematically defined as  
+ * \image html CorrelateEquation.gif  
+ * \par  
+ * The <code>pSrcA</code> points to the first input vector of length <code>srcALen</code> and <code>pSrcB</code> points to the second input vector of length <code>srcBLen</code>.  
+ * The result <code>c[n]</code> is of length <code>2 * max(srcALen, srcBLen) - 1</code> and is defined over the interval <code>n=0, 1, 2, ..., (2 * max(srcALen, srcBLen) - 2)</code>.  
+ * The output result is written to <code>pDst</code> and the calling function must allocate <code>2 * max(srcALen, srcBLen) - 1</code> words for the result.  
+ *  
+ * <b>Fixed-Point Behavior</b>  
+ * \par  
+ * Correlation requires summing up a large number of intermediate products.  
+ * As such, the Q7, Q15, and Q31 functions run a risk of overflow and saturation.  
+ * Refer to the function specific documentation below for further details of the particular algorithm used.  
+ */ 
+ 
+/**  
+ * @addtogroup Corr  
+ * @{  
+ */ 
+/**  
+ * @brief Correlation of floating-point sequences  
+ * @param[in]  *pSrcA points to the first input sequence.  
+ * @param[in]  srcALen length of the first input sequence.  
+ * @param[in]  *pSrcB points to the second input sequence.  
+ * @param[in]  srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.  
+ * @return none.  
+ */ 
+ 
+void arm_correlate_f32( 
+  float32_t * pSrcA, 
+  uint32_t srcALen, 
+  float32_t * pSrcB, 
+  uint32_t srcBLen, 
+  float32_t * pDst) 
+{ 
+  float32_t *pIn1;                               /* inputA pointer */ 
+  float32_t *pIn2;                               /* inputB pointer */ 
+  float32_t *pOut = pDst;                        /* output pointer */ 
+  float32_t *px;                                 /* Intermediate inputA pointer */ 
+  float32_t *py;                                 /* Intermediate inputB pointer */ 
+  float32_t *pSrc1;                              /* Intermediate pointers */ 
+  float32_t sum, acc0, acc1, acc2, acc3;         /* Accumulators */ 
+  float32_t x0, x1, x2, x3, c0;                  /* temporary variables for holding input and coefficient values */ 
+  uint32_t j, k = 0u, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3;  /* loop counters */ 
+  int32_t inc = 1;                               /* Destination address modifier */ 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  /* But CORR(x, y) is reverse of CORR(y, x) */ 
+  /* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer */ 
+  /* and the destination pointer modifier, inc is set to -1 */ 
+  /* If srcALen > srcBLen, zero pad has to be done to srcB to make the two inputs of same length */ 
+  /* But to improve the performance,  
+   * we include zeroes in the output instead of zero padding either of the the inputs*/ 
+  /* If srcALen > srcBLen,  
+   * (srcALen - srcBLen) zeroes has to included in the starting of the output buffer */ 
+  /* If srcALen < srcBLen,  
+   * (srcALen - srcBLen) zeroes has to included in the ending of the output buffer */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcA; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcB; 
+ 
+    /* Number of output samples is calculated */ 
+    outBlockSize = (2u * srcALen) - 1u; 
+ 
+    /* When srcALen > srcBLen, zero padding has to be done to srcB  
+     * to make their lengths equal.  
+     * Instead, (outBlockSize - (srcALen + srcBLen - 1))  
+     * number of output samples are made zero */ 
+    j = outBlockSize - (srcALen + (srcBLen - 1u)); 
+ 
+    while(j > 0u) 
+    { 
+      /* Zero is stored in the destination buffer */ 
+      *pOut++ = 0.0f; 
+ 
+      /* Decrement the loop counter */ 
+      j--; 
+    } 
+ 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = pSrcB; 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = pSrcA; 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+ 
+    /* CORR(x, y) = Reverse order(CORR(y, x)) */ 
+    /* Hence set the destination pointer to point to the last output sample */ 
+    pOut = pDst + ((srcALen + srcBLen) - 2u); 
+ 
+    /* Destination address modifier is set to -1 */ 
+    inc = -1; 
+ 
+  } 
+ 
+  /* The function is internally  
+   * divided into three parts according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first part of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second part of the algorithm, srcBLen number of multiplications are done.  
+   * In the third part of the algorithm, the multiplications decrease by one  
+   * for every iteration.*/ 
+  /* The algorithm is implemented in three stages.  
+   * The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = srcALen - (srcBLen - 1u); 
+  blockSize3 = blockSize1; 
+ 
+  /* --------------------------  
+   * Initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBlen - 1]  
+   * sum = x[0] * y[srcBlen-2] + x[1] * y[srcBlen - 1]  
+   * ....  
+   * sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen - 1] * y[srcBLen - 1]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc1 = pIn2 + (srcBLen - 1u); 
+  py = pSrc1; 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* The first stage starts here */ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0.0f; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[0] * y[srcBLen - 4] */ 
+      sum += *px++ * *py++; 
+      /* x[1] * y[srcBLen - 3] */ 
+      sum += *px++ * *py++; 
+      /* x[2] * y[srcBLen - 2] */ 
+      sum += *px++ * *py++; 
+      /* x[3] * y[srcBLen - 1] */ 
+      sum += *px++ * *py++; 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      /* x[0] * y[srcBLen - 1] */ 
+      sum += *px++ * *py++; 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = sum; 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pSrc1 - count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen-1] * y[srcBLen-1]  
+   * sum = x[1] * y[0] + x[2] * y[1] +...+ x[srcBLen] * y[srcBLen-1]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[0] + x[srcALen-srcBLen-1] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* count is index by which the pointer pIn1 to be incremented */ 
+  count = 1u; 
+ 
+  /* -------------------  
+   * Stage2 process  
+   * ------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4, to loop unroll the srcBLen loop */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0.0f; 
+      acc1 = 0.0f; 
+      acc2 = 0.0f; 
+      acc3 = 0.0f; 
+ 
+      /* read x[0], x[1], x[2] samples */ 
+      x0 = *(px++); 
+      x1 = *(px++); 
+      x2 = *(px++); 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read y[0] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[3] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        /* acc0 +=  x[0] * y[0] */ 
+        acc0 += x0 * c0; 
+        /* acc1 +=  x[1] * y[0] */ 
+        acc1 += x1 * c0; 
+        /* acc2 +=  x[2] * y[0] */ 
+        acc2 += x2 * c0; 
+        /* acc3 +=  x[3] * y[0] */ 
+        acc3 += x3 * c0; 
+ 
+        /* Read y[1] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[4] sample */ 
+        x0 = *(px++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        /* acc0 +=  x[1] * y[1] */ 
+        acc0 += x1 * c0; 
+        /* acc1 +=  x[2] * y[1] */ 
+        acc1 += x2 * c0; 
+        /* acc2 +=  x[3] * y[1] */ 
+        acc2 += x3 * c0; 
+        /* acc3 +=  x[4] * y[1] */ 
+        acc3 += x0 * c0; 
+ 
+        /* Read y[2] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[5] sample */ 
+        x1 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[2] * y[2] */ 
+        acc0 += x2 * c0; 
+        /* acc1 +=  x[3] * y[2] */ 
+        acc1 += x3 * c0; 
+        /* acc2 +=  x[4] * y[2] */ 
+        acc2 += x0 * c0; 
+        /* acc3 +=  x[5] * y[2] */ 
+        acc3 += x1 * c0; 
+ 
+        /* Read y[3] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[6] sample */ 
+        x2 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[3] * y[3] */ 
+        acc0 += x3 * c0; 
+        /* acc1 +=  x[4] * y[3] */ 
+        acc1 += x0 * c0; 
+        /* acc2 +=  x[5] * y[3] */ 
+        acc2 += x1 * c0; 
+        /* acc3 +=  x[6] * y[3] */ 
+        acc3 += x2 * c0; 
+ 
+ 
+      } while(--k); 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Read y[4] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[7] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[4] * y[4] */ 
+        acc0 += x0 * c0; 
+        /* acc1 +=  x[5] * y[4] */ 
+        acc1 += x1 * c0; 
+        /* acc2 +=  x[6] * y[4] */ 
+        acc2 += x2 * c0; 
+        /* acc3 +=  x[7] * y[4] */ 
+        acc3 += x3 * c0; 
+ 
+        /* Reuse the present samples for the next MAC */ 
+        x0 = x1; 
+        x1 = x2; 
+        x2 = x3; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = acc0; 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      *pOut = acc1; 
+      pOut += inc; 
+ 
+      *pOut = acc2; 
+      pOut += inc; 
+ 
+      *pOut = acc3; 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + (count * 4u); 
+      py = pIn2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0.0f; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += *px++ * *py++; 
+        sum += *px++ * *py++; 
+        sum += *px++ * *py++; 
+        sum += *px++ * *py++; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += *px++ * *py++; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = sum; 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0.0f; 
+ 
+      /* Loop over srcBLen */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += *px++ * *py++; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = sum; 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[0] + x[srcALen-srcBLen+2] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * sum += x[srcALen-srcBLen+2] * y[0] + x[srcALen-srcBLen+3] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[0] + x[srcALen-1] * y[1]  
+   * sum +=  x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = srcBLen - 1u; 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = pIn1 + (srcALen - (srcBLen - 1u)); 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0.0f; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* sum += x[srcALen - srcBLen + 4] * y[3] */ 
+      sum += *px++ * *py++; 
+      /* sum += x[srcALen - srcBLen + 3] * y[2] */ 
+      sum += *px++ * *py++; 
+      /* sum += x[srcALen - srcBLen + 2] * y[1] */ 
+      sum += *px++ * *py++; 
+      /* sum += x[srcALen - srcBLen + 1] * y[0] */ 
+      sum += *px++ * *py++; 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum += *px++ * *py++; 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = sum; 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pIn2; 
+ 
+    /* Decrement the MAC count */ 
+    count--; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Corr group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_correlate_fast_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,610 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_correlate_fast_q15.c  
+*  
+* Description:	Fast Q15 Correlation.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup Corr  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Correlation of Q15 sequences (fast version).  
+ * @param[in] *pSrcA points to the first input sequence.  
+ * @param[in] srcALen length of the first input sequence.  
+ * @param[in] *pSrcB points to the second input sequence.  
+ * @param[in] srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * This fast version uses a 32-bit accumulator with 2.30 format.  
+ * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * There is no saturation on intermediate additions.  
+ * Thus, if the accumulator overflows it wraps around and distorts the result.  
+ * The input signals should be scaled down to avoid intermediate overflows.  
+ * Scale down one of the inputs by 1/min(srcALen, srcBLen) to avoid overflow since a  
+ * maximum of min(srcALen, srcBLen) number of additions is carried internally.  
+ * The 2.30 accumulator is right shifted by 15 bits and then saturated to 1.15 format to yield the final result.  
+ *  
+ * \par  
+ * See <code>arm_correlate_q15()</code> for a slower implementation of this function which uses a 64-bit accumulator to avoid wrap around distortion.  
+ */ 
+ 
+void arm_correlate_fast_q15( 
+  q15_t * pSrcA, 
+  uint32_t srcALen, 
+  q15_t * pSrcB, 
+  uint32_t srcBLen, 
+  q15_t * pDst) 
+{ 
+  q15_t *pIn1;                                   /* inputA pointer               */ 
+  q15_t *pIn2;                                   /* inputB pointer               */ 
+  q15_t *pOut = pDst;                            /* output pointer               */ 
+  q31_t sum, acc0, acc1, acc2, acc3;             /* Accumulators                  */ 
+  q15_t *px;                                     /* Intermediate inputA pointer  */ 
+  q15_t *py;                                     /* Intermediate inputB pointer  */ 
+  q15_t *pSrc1;                                  /* Intermediate pointers        */ 
+  q31_t x0, x1, x2, x3, c0;                      /* temporary variables for holding input and coefficient values */ 
+  uint32_t j, k = 0u, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3;  /* loop counter                 */ 
+  int32_t inc = 1;                               /* Destination address modifier */ 
+  q31_t *pb;                                     /* 32 bit pointer for inputB buffer */ 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  /* But CORR(x, y) is reverse of CORR(y, x) */ 
+  /* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer */ 
+  /* and the destination pointer modifier, inc is set to -1 */ 
+  /* If srcALen > srcBLen, zero pad has to be done to srcB to make the two inputs of same length */ 
+  /* But to improve the performance,  
+   * we include zeroes in the output instead of zero padding either of the the inputs*/ 
+  /* If srcALen > srcBLen,  
+   * (srcALen - srcBLen) zeroes has to included in the starting of the output buffer */ 
+  /* If srcALen < srcBLen,  
+   * (srcALen - srcBLen) zeroes has to included in the ending of the output buffer */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = (pSrcA); 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = (pSrcB); 
+ 
+    /* Number of output samples is calculated */ 
+    outBlockSize = (2u * srcALen) - 1u; 
+ 
+    /* When srcALen > srcBLen, zero padding is done to srcB  
+     * to make their lengths equal.  
+     * Instead, (outBlockSize - (srcALen + srcBLen - 1))  
+     * number of output samples are made zero */ 
+    j = outBlockSize - (srcALen + (srcBLen - 1u)); 
+ 
+    while(j > 0u) 
+    { 
+      /* Zero is stored in the destination buffer */ 
+      *pOut++ = 0; 
+ 
+      /* Decrement the loop counter */ 
+      j--; 
+    } 
+ 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = (pSrcB); 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = (pSrcA); 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+ 
+    /* CORR(x, y) = Reverse order(CORR(y, x)) */ 
+    /* Hence set the destination pointer to point to the last output sample */ 
+    pOut = pDst + ((srcALen + srcBLen) - 2u); 
+ 
+    /* Destination address modifier is set to -1 */ 
+    inc = -1; 
+ 
+  } 
+ 
+  /* The function is internally  
+   * divided into three parts according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first part of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second part of the algorithm, srcBLen number of multiplications are done.  
+   * In the third part of the algorithm, the multiplications decrease by one  
+   * for every iteration.*/ 
+  /* The algorithm is implemented in three stages.  
+   * The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = srcALen - (srcBLen - 1u); 
+  blockSize3 = blockSize1; 
+ 
+  /* --------------------------  
+   * Initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBlen - 1]  
+   * sum = x[0] * y[srcBlen - 2] + x[1] * y[srcBlen - 1]  
+   * ....  
+   * sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen - 1] * y[srcBLen - 1]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc1 = pIn2 + (srcBLen - 1u); 
+  py = pSrc1; 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* The first loop starts here */ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[0] * y[srcBLen - 4] , x[1] * y[srcBLen - 3] */ 
+      sum = __SMLAD(*__SIMD32(px)++, *__SIMD32(py)++, sum); 
+      /* x[3] * y[srcBLen - 1] , x[2] * y[srcBLen - 2] */ 
+      sum = __SMLAD(*__SIMD32(px)++, *__SIMD32(py)++, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* x[0] * y[srcBLen - 1] */ 
+      sum = __SMLAD(*px++, *py++, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = (q15_t) (sum >> 15); 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pSrc1 - count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen-1] * y[srcBLen-1]  
+   * sum = x[1] * y[0] + x[2] * y[1] +...+ x[srcBLen] * y[srcBLen-1]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[0] + x[srcALen-srcBLen-1] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* Initialize inputB pointer of type q31 */ 
+  pb = (q31_t *) (py); 
+ 
+  /* count is index by which the pointer pIn1 to be incremented */ 
+  count = 0u; 
+ 
+  /* -------------------  
+   * Stage2 process  
+   * ------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4, to loop unroll the srcBLen loop */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0; 
+      acc1 = 0; 
+      acc2 = 0; 
+      acc3 = 0; 
+ 
+      /* read x[0], x[1] samples */ 
+      x0 = *(q31_t *) (px++); 
+      /* read x[1], x[2] samples */ 
+      x1 = *(q31_t *) (px++); 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read the first two inputB samples using SIMD:  
+         * y[0] and y[1] */ 
+        c0 = *(pb++); 
+ 
+        /* acc0 +=  x[0] * y[0] + x[1] * y[1] */ 
+        acc0 = __SMLAD(x0, c0, acc0); 
+ 
+        /* acc1 +=  x[1] * y[0] + x[2] * y[1] */ 
+        acc1 = __SMLAD(x1, c0, acc1); 
+ 
+        /* Read x[2], x[3] */ 
+        x2 = *(q31_t *) (px++); 
+ 
+        /* Read x[3], x[4] */ 
+        x3 = *(q31_t *) (px++); 
+ 
+        /* acc2 +=  x[2] * y[0] + x[3] * y[1] */ 
+        acc2 = __SMLAD(x2, c0, acc2); 
+ 
+        /* acc3 +=  x[3] * y[0] + x[4] * y[1] */ 
+        acc3 = __SMLAD(x3, c0, acc3); 
+ 
+        /* Read y[2] and y[3] */ 
+        c0 = *(pb++); 
+ 
+        /* acc0 +=  x[2] * y[2] + x[3] * y[3] */ 
+        acc0 = __SMLAD(x2, c0, acc0); 
+ 
+        /* acc1 +=  x[3] * y[2] + x[4] * y[3] */ 
+        acc1 = __SMLAD(x3, c0, acc1); 
+ 
+        /* Read x[4], x[5] */ 
+        x0 = *(q31_t *) (px++); 
+ 
+        /* Read x[5], x[6] */ 
+        x1 = *(q31_t *) (px++); 
+ 
+        /* acc2 +=  x[4] * y[2] + x[5] * y[3] */ 
+        acc2 = __SMLAD(x0, c0, acc2); 
+ 
+        /* acc3 +=  x[5] * y[2] + x[6] * y[3] */ 
+        acc3 = __SMLAD(x1, c0, acc3); 
+ 
+      } while(--k); 
+ 
+      /* For the next MAC operations, SIMD is not used  
+       * So, the 16 bit pointer if inputB, py is updated */ 
+      py = (q15_t *) (pb); 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      if(k == 1u) 
+      { 
+        /* Read y[4] */ 
+        c0 = *py; 
+        c0 = c0 & 0x0000FFFF; 
+ 
+        /* Read x[7] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLAD(x0, c0, acc0); 
+        acc1 = __SMLAD(x1, c0, acc1); 
+        acc2 = __SMLADX(x1, c0, acc2); 
+        acc3 = __SMLADX(x3, c0, acc3); 
+      } 
+ 
+      if(k == 2u) 
+      { 
+        /* Read y[4], y[5] */ 
+        c0 = *(pb); 
+ 
+        /* Read x[7], x[8] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Read x[9] */ 
+        x2 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLAD(x0, c0, acc0); 
+        acc1 = __SMLAD(x1, c0, acc1); 
+        acc2 = __SMLAD(x3, c0, acc2); 
+        acc3 = __SMLAD(x2, c0, acc3); 
+      } 
+ 
+      if(k == 3u) 
+      { 
+        /* Read y[4], y[5] */ 
+        c0 = *pb++; 
+ 
+        /* Read x[7], x[8] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Read x[9] */ 
+        x2 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLAD(x0, c0, acc0); 
+        acc1 = __SMLAD(x1, c0, acc1); 
+        acc2 = __SMLAD(x3, c0, acc2); 
+        acc3 = __SMLAD(x2, c0, acc3); 
+ 
+        /* Read y[6] */ 
+        c0 = (q15_t) (*pb); 
+        c0 = c0 & 0x0000FFFF; 
+ 
+        /* Read x[10] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLADX(x1, c0, acc0); 
+        acc1 = __SMLAD(x2, c0, acc1); 
+        acc2 = __SMLADX(x2, c0, acc2); 
+        acc3 = __SMLADX(x3, c0, acc3); 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q15_t) (acc0 >> 15); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      *pOut = (q15_t) (acc1 >> 15); 
+      pOut += inc; 
+ 
+      *pOut = (q15_t) (acc2 >> 15); 
+      pOut += inc; 
+ 
+      *pOut = (q15_t) (acc3 >> 15); 
+      pOut += inc; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count += 4u; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+      pb = (q31_t *) (py); 
+ 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += ((q31_t) * px++ * *py++); 
+        sum += ((q31_t) * px++ * *py++); 
+        sum += ((q31_t) * px++ * *py++); 
+        sum += ((q31_t) * px++ * *py++); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += ((q31_t) * px++ * *py++); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q15_t) (sum >> 15); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Loop over srcBLen */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += ((q31_t) * px++ * *py++); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q15_t) (sum >> 15); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[0] + x[srcALen-srcBLen+2] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * sum += x[srcALen-srcBLen+2] * y[0] + x[srcALen-srcBLen+3] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[0] + x[srcALen-1] * y[1]  
+   * sum +=  x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = srcBLen - 1u; 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* sum += x[srcALen - srcBLen + 4] * y[3] , sum += x[srcALen - srcBLen + 3] * y[2] */ 
+      sum = __SMLAD(*__SIMD32(px)++, *__SIMD32(py)++, sum); 
+      /* sum += x[srcALen - srcBLen + 2] * y[1] , sum += x[srcALen - srcBLen + 1] * y[0] */ 
+      sum = __SMLAD(*__SIMD32(px)++, *__SIMD32(py)++, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum = __SMLAD(*px++, *py++, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = (q15_t) (sum >> 15); 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pIn2; 
+ 
+    /* Decrement the MAC count */ 
+    count--; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Corr group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_correlate_fast_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,602 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_correlate_fast_q31.c  
+*  
+* Description:	Fast Q31 Correlation.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup Corr  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Correlate Q31 sequences (fast version)  
+ * @param[in] *pSrcA points to the first input sequence.  
+ * @param[in] srcALen length of the first input sequence.  
+ * @param[in] *pSrcB points to the second input sequence.  
+ * @param[in] srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * This function is optimized for speed at the expense of fixed-point precision and overflow protection.  
+ * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.  
+ * These intermediate results are accumulated in a 32-bit register in 2.30 format.  
+ * Finally, the accumulator is saturated and converted to a 1.31 result.  
+ *  
+ * \par  
+ * The fast version has the same overflow behavior as the standard version but provides less precision since it discards the low 32 bits of each multiplication result.  
+ * In order to avoid overflows completely the input signals must be scaled down.  
+ * The input signals should be scaled down to avoid intermediate overflows.  
+ * Scale down one of the inputs by 1/min(srcALen, srcBLen)to avoid overflows since a  
+ * maximum of min(srcALen, srcBLen) number of additions is carried internally.  
+ *  
+ * \par  
+ * See <code>arm_correlate_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision.  
+ */ 
+ 
+void arm_correlate_fast_q31( 
+  q31_t * pSrcA, 
+  uint32_t srcALen, 
+  q31_t * pSrcB, 
+  uint32_t srcBLen, 
+  q31_t * pDst) 
+{ 
+  q31_t *pIn1;                                   /* inputA pointer               */ 
+  q31_t *pIn2;                                   /* inputB pointer               */ 
+  q31_t *pOut = pDst;                            /* output pointer               */ 
+  q31_t *px;                                     /* Intermediate inputA pointer  */ 
+  q31_t *py;                                     /* Intermediate inputB pointer  */ 
+  q31_t *pSrc1;                                  /* Intermediate pointers        */ 
+  q31_t sum, acc0, acc1, acc2, acc3;             /* Accumulators                  */ 
+  q31_t x0, x1, x2, x3, c0;                      /* temporary variables for holding input and coefficient values */ 
+  uint32_t j, k = 0u, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3;  /* loop counter                 */ 
+  int32_t inc = 1;                               /* Destination address modifier */ 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = (pSrcA); 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = (pSrcB); 
+ 
+    /* Number of output samples is calculated */ 
+    outBlockSize = (2u * srcALen) - 1u; 
+ 
+    /* When srcALen > srcBLen, zero padding is done to srcB  
+     * to make their lengths equal.  
+     * Instead, (outBlockSize - (srcALen + srcBLen - 1))  
+     * number of output samples are made zero */ 
+    j = outBlockSize - (srcALen + (srcBLen - 1u)); 
+ 
+    while(j > 0u) 
+    { 
+      /* Zero is stored in the destination buffer */ 
+      *pOut++ = 0; 
+ 
+      /* Decrement the loop counter */ 
+      j--; 
+    } 
+ 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = (pSrcB); 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = (pSrcA); 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+ 
+    /* CORR(x, y) = Reverse order(CORR(y, x)) */ 
+    /* Hence set the destination pointer to point to the last output sample */ 
+    pOut = pDst + ((srcALen + srcBLen) - 2u); 
+ 
+    /* Destination address modifier is set to -1 */ 
+    inc = -1; 
+ 
+  } 
+ 
+  /* The function is internally  
+   * divided into three parts according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first part of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second part of the algorithm, srcBLen number of multiplications are done.  
+   * In the third part of the algorithm, the multiplications decrease by one  
+   * for every iteration.*/ 
+  /* The algorithm is implemented in three stages.  
+   * The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = srcALen - (srcBLen - 1u); 
+  blockSize3 = blockSize1; 
+ 
+  /* --------------------------  
+   * Initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBlen - 1]  
+   * sum = x[0] * y[srcBlen - 2] + x[1] * y[srcBlen - 1]  
+   * ....  
+   * sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen - 1] * y[srcBLen - 1]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc1 = pIn2 + (srcBLen - 1u); 
+  py = pSrc1; 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* The first stage starts here */ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[0] * y[srcBLen - 4] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+                      ((q63_t) * px++ * (*py++))) >> 32); 
+      /* x[1] * y[srcBLen - 3] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+                      ((q63_t) * px++ * (*py++))) >> 32); 
+      /* x[2] * y[srcBLen - 2] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+                      ((q63_t) * px++ * (*py++))) >> 32); 
+      /* x[3] * y[srcBLen - 1] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+                      ((q63_t) * px++ * (*py++))) >> 32); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* x[0] * y[srcBLen - 1] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+                      ((q63_t) * px++ * (*py++))) >> 32); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = sum << 1; 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pSrc1 - count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen-1] * y[srcBLen-1]  
+   * sum = x[1] * y[0] + x[2] * y[1] +...+ x[srcBLen] * y[srcBLen-1]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[0] + x[srcALen-srcBLen-1] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* count is index by which the pointer pIn1 to be incremented */ 
+  count = 1u; 
+ 
+  /* -------------------  
+   * Stage2 process  
+   * ------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4 */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0; 
+      acc1 = 0; 
+      acc2 = 0; 
+      acc3 = 0; 
+ 
+      /* read x[0], x[1], x[2] samples */ 
+      x0 = *(px++); 
+      x1 = *(px++); 
+      x2 = *(px++); 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read y[0] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[3] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        /* acc0 +=  x[0] * y[0] */ 
+        acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32); 
+        /* acc1 +=  x[1] * y[0] */ 
+        acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32); 
+        /* acc2 +=  x[2] * y[0] */ 
+        acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x2 * c0)) >> 32); 
+        /* acc3 +=  x[3] * y[0] */ 
+        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32); 
+ 
+        /* Read y[1] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[4] sample */ 
+        x0 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[1] * y[1] */ 
+        acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x1 * c0)) >> 32); 
+        /* acc1 +=  x[2] * y[1] */ 
+        acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x2 * c0)) >> 32); 
+        /* acc2 +=  x[3] * y[1] */ 
+        acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x3 * c0)) >> 32); 
+        /* acc3 +=  x[4] * y[1] */ 
+        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x0 * c0)) >> 32); 
+ 
+        /* Read y[2] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[5] sample */ 
+        x1 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[2] * y[2] */ 
+        acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x2 * c0)) >> 32); 
+        /* acc1 +=  x[3] * y[2] */ 
+        acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x3 * c0)) >> 32); 
+        /* acc2 +=  x[4] * y[2] */ 
+        acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x0 * c0)) >> 32); 
+        /* acc3 +=  x[5] * y[2] */ 
+        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x1 * c0)) >> 32); 
+ 
+        /* Read y[3] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[6] sample */ 
+        x2 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[3] * y[3] */ 
+        acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x3 * c0)) >> 32); 
+        /* acc1 +=  x[4] * y[3] */ 
+        acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x0 * c0)) >> 32); 
+        /* acc2 +=  x[5] * y[3] */ 
+        acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x1 * c0)) >> 32); 
+        /* acc3 +=  x[6] * y[3] */ 
+        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x2 * c0)) >> 32); 
+ 
+ 
+      } while(--k); 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Read y[4] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[7] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[4] * y[4] */ 
+        acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32); 
+        /* acc1 +=  x[5] * y[4] */ 
+        acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32); 
+        /* acc2 +=  x[6] * y[4] */ 
+        acc2 = (q31_t) ((((q63_t) acc2 << 32) + ((q63_t) x2 * c0)) >> 32); 
+        /* acc3 +=  x[7] * y[4] */ 
+        acc3 = (q31_t) ((((q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32); 
+ 
+        /* Reuse the present samples for the next MAC */ 
+        x0 = x1; 
+        x1 = x2; 
+        x2 = x3; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q31_t) (acc0 << 1); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      *pOut = (q31_t) (acc1 << 1); 
+      pOut += inc; 
+ 
+      *pOut = (q31_t) (acc2 << 1); 
+      pOut += inc; 
+ 
+      *pOut = (q31_t) (acc3 << 1); 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + (count * 4u); 
+      py = pIn2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum = (q31_t) ((((q63_t) sum << 32) + 
+                        ((q63_t) * px++ * (*py++))) >> 32); 
+        sum = (q31_t) ((((q63_t) sum << 32) + 
+                        ((q63_t) * px++ * (*py++))) >> 32); 
+        sum = (q31_t) ((((q63_t) sum << 32) + 
+                        ((q63_t) * px++ * (*py++))) >> 32); 
+        sum = (q31_t) ((((q63_t) sum << 32) + 
+                        ((q63_t) * px++ * (*py++))) >> 32); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum = (q31_t) ((((q63_t) sum << 32) + 
+                        ((q63_t) * px++ * (*py++))) >> 32); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = sum << 1; 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Loop over srcBLen */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum = (q31_t) ((((q63_t) sum << 32) + 
+                        ((q63_t) * px++ * (*py++))) >> 32); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = sum << 1; 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[0] + x[srcALen-srcBLen+2] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * sum += x[srcALen-srcBLen+2] * y[0] + x[srcALen-srcBLen+3] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[0] + x[srcALen-1] * y[1]  
+   * sum +=  x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = srcBLen - 1u; 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = ((pIn1 + srcALen) - srcBLen) + 1u; 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* sum += x[srcALen - srcBLen + 4] * y[3] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+                      ((q63_t) * px++ * (*py++))) >> 32); 
+      /* sum += x[srcALen - srcBLen + 3] * y[2] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+                      ((q63_t) * px++ * (*py++))) >> 32); 
+      /* sum += x[srcALen - srcBLen + 2] * y[1] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+                      ((q63_t) * px++ * (*py++))) >> 32); 
+      /* sum += x[srcALen - srcBLen + 1] * y[0] */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+                      ((q63_t) * px++ * (*py++))) >> 32); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum = (q31_t) ((((q63_t) sum << 32) + 
+                      ((q63_t) * px++ * (*py++))) >> 32); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = sum << 1; 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pIn2; 
+ 
+    /* Decrement the MAC count */ 
+    count--; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Corr group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_correlate_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,612 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_correlate_q15.c  
+*  
+* Description:	Q15 Correlation.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup Corr  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Correlation of Q15 sequences  
+ * @param[in] *pSrcA points to the first input sequence.  
+ * @param[in] srcALen length of the first input sequence.  
+ * @param[in] *pSrcB points to the second input sequence.  
+ * @param[in] srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * Both inputs are in 1.15 format and multiplications yield a 2.30 result.  
+ * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.  
+ * This approach provides 33 guard bits and there is no risk of overflow.  
+ * The 34.30 result is then truncated to 34.15 format by discarding the low 15 bits and then saturated to 1.15 format.  
+ *  
+ * \par  
+ * Refer to <code>arm_correlate_fast_q15()</code> for a faster but less precise version of this function.  
+ */ 
+ 
+void arm_correlate_q15( 
+  q15_t * pSrcA, 
+  uint32_t srcALen, 
+  q15_t * pSrcB, 
+  uint32_t srcBLen, 
+  q15_t * pDst) 
+{ 
+  q15_t *pIn1;                                   /* inputA pointer               */ 
+  q15_t *pIn2;                                   /* inputB pointer               */ 
+  q15_t *pOut = pDst;                            /* output pointer               */ 
+  q63_t sum, acc0, acc1, acc2, acc3;             /* Accumulators                  */ 
+  q15_t *px;                                     /* Intermediate inputA pointer  */ 
+  q15_t *py;                                     /* Intermediate inputB pointer  */ 
+  q15_t *pSrc1;                                  /* Intermediate pointers        */ 
+  q31_t x0, x1, x2, x3, c0;                      /* temporary variables for holding input and coefficient values */ 
+  uint32_t j, k = 0u, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3;  /* loop counter                 */ 
+  int32_t inc = 1;                               /* Destination address modifier */ 
+  q31_t *pb;                                     /* 32 bit pointer for inputB buffer */ 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  /* But CORR(x, y) is reverse of CORR(y, x) */ 
+  /* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer */ 
+  /* and the destination pointer modifier, inc is set to -1 */ 
+  /* If srcALen > srcBLen, zero pad has to be done to srcB to make the two inputs of same length */ 
+  /* But to improve the performance,  
+   * we include zeroes in the output instead of zero padding either of the the inputs*/ 
+  /* If srcALen > srcBLen,  
+   * (srcALen - srcBLen) zeroes has to included in the starting of the output buffer */ 
+  /* If srcALen < srcBLen,  
+   * (srcALen - srcBLen) zeroes has to included in the ending of the output buffer */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = (pSrcA); 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = (pSrcB); 
+ 
+    /* Number of output samples is calculated */ 
+    outBlockSize = (2u * srcALen) - 1u; 
+ 
+    /* When srcALen > srcBLen, zero padding is done to srcB  
+     * to make their lengths equal.  
+     * Instead, (outBlockSize - (srcALen + srcBLen - 1))  
+     * number of output samples are made zero */ 
+    j = outBlockSize - (srcALen + (srcBLen - 1u)); 
+ 
+    while(j > 0u) 
+    { 
+      /* Zero is stored in the destination buffer */ 
+      *pOut++ = 0; 
+ 
+      /* Decrement the loop counter */ 
+      j--; 
+    } 
+ 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = (pSrcB); 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = (pSrcA); 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+ 
+    /* CORR(x, y) = Reverse order(CORR(y, x)) */ 
+    /* Hence set the destination pointer to point to the last output sample */ 
+    pOut = pDst + ((srcALen + srcBLen) - 2u); 
+ 
+    /* Destination address modifier is set to -1 */ 
+    inc = -1; 
+ 
+  } 
+ 
+  /* The function is internally  
+   * divided into three parts according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first part of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second part of the algorithm, srcBLen number of multiplications are done.  
+   * In the third part of the algorithm, the multiplications decrease by one  
+   * for every iteration.*/ 
+  /* The algorithm is implemented in three stages.  
+   * The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = srcALen - (srcBLen - 1u); 
+  blockSize3 = blockSize1; 
+ 
+  /* --------------------------  
+   * Initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBlen - 1]  
+   * sum = x[0] * y[srcBlen - 2] + x[1] * y[srcBlen - 1]  
+   * ....  
+   * sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen - 1] * y[srcBLen - 1]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc1 = pIn2 + (srcBLen - 1u); 
+  py = pSrc1; 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* The first loop starts here */ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[0] * y[srcBLen - 4] , x[1] * y[srcBLen - 3] */ 
+      sum = __SMLALD(*__SIMD32(px)++, *__SIMD32(py)++, sum); 
+      /* x[3] * y[srcBLen - 1] , x[2] * y[srcBLen - 2] */ 
+      sum = __SMLALD(*__SIMD32(px)++, *__SIMD32(py)++, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* x[0] * y[srcBLen - 1] */ 
+      sum = __SMLALD(*px++, *py++, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = (q15_t) (__SSAT((sum >> 15), 16)); 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pSrc1 - count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen-1] * y[srcBLen-1]  
+   * sum = x[1] * y[0] + x[2] * y[1] +...+ x[srcBLen] * y[srcBLen-1]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[0] + x[srcALen-srcBLen-1] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* Initialize inputB pointer of type q31 */ 
+  pb = (q31_t *) (py); 
+ 
+  /* count is index by which the pointer pIn1 to be incremented */ 
+  count = 0u; 
+ 
+  /* -------------------  
+   * Stage2 process  
+   * ------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4, to loop unroll the srcBLen loop */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0; 
+      acc1 = 0; 
+      acc2 = 0; 
+      acc3 = 0; 
+ 
+      /* read x[0], x[1] samples */ 
+      x0 = *(q31_t *) (px++); 
+      /* read x[1], x[2] samples */ 
+      x1 = *(q31_t *) (px++); 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read the first two inputB samples using SIMD:  
+         * y[0] and y[1] */ 
+        c0 = *(pb++); 
+ 
+        /* acc0 +=  x[0] * y[0] + x[1] * y[1] */ 
+        acc0 = __SMLALD(x0, c0, acc0); 
+ 
+        /* acc1 +=  x[1] * y[0] + x[2] * y[1] */ 
+        acc1 = __SMLALD(x1, c0, acc1); 
+ 
+        /* Read x[2], x[3] */ 
+        x2 = *(q31_t *) (px++); 
+ 
+        /* Read x[3], x[4] */ 
+        x3 = *(q31_t *) (px++); 
+ 
+        /* acc2 +=  x[2] * y[0] + x[3] * y[1] */ 
+        acc2 = __SMLALD(x2, c0, acc2); 
+ 
+        /* acc3 +=  x[3] * y[0] + x[4] * y[1] */ 
+        acc3 = __SMLALD(x3, c0, acc3); 
+ 
+        /* Read y[2] and y[3] */ 
+        c0 = *(pb++); 
+ 
+        /* acc0 +=  x[2] * y[2] + x[3] * y[3] */ 
+        acc0 = __SMLALD(x2, c0, acc0); 
+ 
+        /* acc1 +=  x[3] * y[2] + x[4] * y[3] */ 
+        acc1 = __SMLALD(x3, c0, acc1); 
+ 
+        /* Read x[4], x[5] */ 
+        x0 = *(q31_t *) (px++); 
+ 
+        /* Read x[5], x[6] */ 
+        x1 = *(q31_t *) (px++); 
+ 
+        /* acc2 +=  x[4] * y[2] + x[5] * y[3] */ 
+        acc2 = __SMLALD(x0, c0, acc2); 
+ 
+        /* acc3 +=  x[5] * y[2] + x[6] * y[3] */ 
+        acc3 = __SMLALD(x1, c0, acc3); 
+ 
+      } while(--k); 
+ 
+      /* For the next MAC operations, SIMD is not used  
+       * So, the 16 bit pointer if inputB, py is updated */ 
+      py = (q15_t *) (pb); 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      if(k == 1u) 
+      { 
+        /* Read y[4] */ 
+        c0 = *py; 
+        c0 = c0 & 0x0000FFFF; 
+ 
+        /* Read x[7] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLALD(x0, c0, acc0); 
+        acc1 = __SMLALD(x1, c0, acc1); 
+        acc2 = __SMLALDX(x1, c0, acc2); 
+        acc3 = __SMLALDX(x3, c0, acc3); 
+      } 
+ 
+      if(k == 2u) 
+      { 
+        /* Read y[4], y[5] */ 
+        c0 = *(pb); 
+ 
+        /* Read x[7], x[8] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Read x[9] */ 
+        x2 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLALD(x0, c0, acc0); 
+        acc1 = __SMLALD(x1, c0, acc1); 
+        acc2 = __SMLALD(x3, c0, acc2); 
+        acc3 = __SMLALD(x2, c0, acc3); 
+      } 
+ 
+      if(k == 3u) 
+      { 
+        /* Read y[4], y[5] */ 
+        c0 = *pb++; 
+ 
+        /* Read x[7], x[8] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Read x[9] */ 
+        x2 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLALD(x0, c0, acc0); 
+        acc1 = __SMLALD(x1, c0, acc1); 
+        acc2 = __SMLALD(x3, c0, acc2); 
+        acc3 = __SMLALD(x2, c0, acc3); 
+ 
+        /* Read y[6] */ 
+        c0 = (q15_t) (*pb); 
+        c0 = c0 & 0x0000FFFF; 
+ 
+        /* Read x[10] */ 
+        x3 = *(q31_t *) px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        acc0 = __SMLALDX(x1, c0, acc0); 
+        acc1 = __SMLALD(x2, c0, acc1); 
+        acc2 = __SMLALDX(x2, c0, acc2); 
+        acc3 = __SMLALDX(x3, c0, acc3); 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q15_t) (__SSAT(acc0 >> 15, 16)); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      *pOut = (q15_t) (__SSAT(acc1 >> 15, 16)); 
+      pOut += inc; 
+ 
+      *pOut = (q15_t) (__SSAT(acc2 >> 15, 16)); 
+      pOut += inc; 
+ 
+      *pOut = (q15_t) (__SSAT(acc3 >> 15, 16)); 
+      pOut += inc; 
+ 
+      /* Increment the count by 4 as 4 output values are computed */ 
+      count += 4u; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+      pb = (q31_t *) (py); 
+ 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += ((q63_t) * px++ * *py++); 
+        sum += ((q63_t) * px++ * *py++); 
+        sum += ((q63_t) * px++ * *py++); 
+        sum += ((q63_t) * px++ * *py++); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += ((q63_t) * px++ * *py++); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q15_t) (__SSAT(sum >> 15, 16)); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Increment count by 1, as one output value is computed */ 
+      count++; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Loop over srcBLen */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += ((q63_t) * px++ * *py++); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q15_t) (__SSAT(sum >> 15, 16)); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[0] + x[srcALen-srcBLen+2] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * sum += x[srcALen-srcBLen+2] * y[0] + x[srcALen-srcBLen+3] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[0] + x[srcALen-1] * y[1]  
+   * sum +=  x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = srcBLen - 1u; 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u); 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* sum += x[srcALen - srcBLen + 4] * y[3] , sum += x[srcALen - srcBLen + 3] * y[2] */ 
+      sum = __SMLALD(*__SIMD32(px)++, *__SIMD32(py)++, sum); 
+      /* sum += x[srcALen - srcBLen + 2] * y[1] , sum += x[srcALen - srcBLen + 1] * y[0] */ 
+      sum = __SMLALD(*__SIMD32(px)++, *__SIMD32(py)++, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum = __SMLALD(*px++, *py++, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = (q15_t) (__SSAT((sum >> 15), 16)); 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pIn2; 
+ 
+    /* Decrement the MAC count */ 
+    count--; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Corr group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_correlate_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,596 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_correlate_q31.c  
+*  
+* Description:	Q31 Correlation.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup Corr  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Correlate Q31 sequences  
+ * @param[in] *pSrcA points to the first input sequence.  
+ * @param[in] srcALen length of the first input sequence.  
+ * @param[in] *pSrcB points to the second input sequence.  
+ * @param[in] srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * There is no saturation on intermediate additions.  
+ * Thus, if the accumulator overflows it wraps around and distorts the result.  
+ * The input signals should be scaled down to avoid intermediate overflows.  
+ * Scale down one of the inputs by 1/min(srcALen, srcBLen)to avoid overflows since a  
+ * maximum of min(srcALen, srcBLen) number of additions is carried internally.  
+ * The 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result.  
+ *  
+ * \par  
+ * See <code>arm_correlate_fast_q31()</code> for a faster but less precise implementation of this function.  
+ */ 
+ 
+void arm_correlate_q31( 
+  q31_t * pSrcA, 
+  uint32_t srcALen, 
+  q31_t * pSrcB, 
+  uint32_t srcBLen, 
+  q31_t * pDst) 
+{ 
+  q31_t *pIn1;                                   /* inputA pointer               */ 
+  q31_t *pIn2;                                   /* inputB pointer               */ 
+  q31_t *pOut = pDst;                            /* output pointer               */ 
+  q31_t *px;                                     /* Intermediate inputA pointer  */ 
+  q31_t *py;                                     /* Intermediate inputB pointer  */ 
+  q31_t *pSrc1;                                  /* Intermediate pointers        */ 
+  q63_t sum, acc0, acc1, acc2, acc3;             /* Accumulators                  */ 
+  q31_t x0, x1, x2, x3, c0;                      /* temporary variables for holding input and coefficient values */ 
+  uint32_t j, k = 0u, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3;  /* loop counter                 */ 
+  int32_t inc = 1;                               /* Destination address modifier */ 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  /* But CORR(x, y) is reverse of CORR(y, x) */ 
+  /* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer */ 
+  /* and the destination pointer modifier, inc is set to -1 */ 
+  /* If srcALen > srcBLen, zero pad has to be done to srcB to make the two inputs of same length */ 
+  /* But to improve the performance,  
+   * we include zeroes in the output instead of zero padding either of the the inputs*/ 
+  /* If srcALen > srcBLen,  
+   * (srcALen - srcBLen) zeroes has to included in the starting of the output buffer */ 
+  /* If srcALen < srcBLen,  
+   * (srcALen - srcBLen) zeroes has to included in the ending of the output buffer */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = (pSrcA); 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = (pSrcB); 
+ 
+    /* Number of output samples is calculated */ 
+    outBlockSize = (2u * srcALen) - 1u; 
+ 
+    /* When srcALen > srcBLen, zero padding is done to srcB  
+     * to make their lengths equal.  
+     * Instead, (outBlockSize - (srcALen + srcBLen - 1))  
+     * number of output samples are made zero */ 
+    j = outBlockSize - (srcALen + (srcBLen - 1u)); 
+ 
+    while(j > 0u) 
+    { 
+      /* Zero is stored in the destination buffer */ 
+      *pOut++ = 0; 
+ 
+      /* Decrement the loop counter */ 
+      j--; 
+    } 
+ 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = (pSrcB); 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = (pSrcA); 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+ 
+    /* CORR(x, y) = Reverse order(CORR(y, x)) */ 
+    /* Hence set the destination pointer to point to the last output sample */ 
+    pOut = pDst + ((srcALen + srcBLen) - 2u); 
+ 
+    /* Destination address modifier is set to -1 */ 
+    inc = -1; 
+ 
+  } 
+ 
+  /* The function is internally  
+   * divided into three parts according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first part of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second part of the algorithm, srcBLen number of multiplications are done.  
+   * In the third part of the algorithm, the multiplications decrease by one  
+   * for every iteration.*/ 
+  /* The algorithm is implemented in three stages.  
+   * The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = srcALen - (srcBLen - 1u); 
+  blockSize3 = blockSize1; 
+ 
+  /* --------------------------  
+   * Initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBlen - 1]  
+   * sum = x[0] * y[srcBlen - 2] + x[1] * y[srcBlen - 1]  
+   * ....  
+   * sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen - 1] * y[srcBLen - 1]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc1 = pIn2 + (srcBLen - 1u); 
+  py = pSrc1; 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* The first stage starts here */ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[0] * y[srcBLen - 4] */ 
+      sum += (q63_t) * px++ * (*py++); 
+      /* x[1] * y[srcBLen - 3] */ 
+      sum += (q63_t) * px++ * (*py++); 
+      /* x[2] * y[srcBLen - 2] */ 
+      sum += (q63_t) * px++ * (*py++); 
+      /* x[3] * y[srcBLen - 1] */ 
+      sum += (q63_t) * px++ * (*py++); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* x[0] * y[srcBLen - 1] */ 
+      sum += (q63_t) * px++ * (*py++); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = (q31_t) (sum >> 31); 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pSrc1 - count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen-1] * y[srcBLen-1]  
+   * sum = x[1] * y[0] + x[2] * y[1] +...+ x[srcBLen] * y[srcBLen-1]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[0] + x[srcALen-srcBLen-1] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* count is index by which the pointer pIn1 to be incremented */ 
+  count = 1u; 
+ 
+  /* -------------------  
+   * Stage2 process  
+   * ------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4 */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0; 
+      acc1 = 0; 
+      acc2 = 0; 
+      acc3 = 0; 
+ 
+      /* read x[0], x[1], x[2] samples */ 
+      x0 = *(px++); 
+      x1 = *(px++); 
+      x2 = *(px++); 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read y[0] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[3] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        /* acc0 +=  x[0] * y[0] */ 
+        acc0 += ((q63_t) x0 * c0); 
+        /* acc1 +=  x[1] * y[0] */ 
+        acc1 += ((q63_t) x1 * c0); 
+        /* acc2 +=  x[2] * y[0] */ 
+        acc2 += ((q63_t) x2 * c0); 
+        /* acc3 +=  x[3] * y[0] */ 
+        acc3 += ((q63_t) x3 * c0); 
+ 
+        /* Read y[1] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[4] sample */ 
+        x0 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[1] * y[1] */ 
+        acc0 += ((q63_t) x1 * c0); 
+        /* acc1 +=  x[2] * y[1] */ 
+        acc1 += ((q63_t) x2 * c0); 
+        /* acc2 +=  x[3] * y[1] */ 
+        acc2 += ((q63_t) x3 * c0); 
+        /* acc3 +=  x[4] * y[1] */ 
+        acc3 += ((q63_t) x0 * c0); 
+        /* Read y[2] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[5] sample */ 
+        x1 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[2] * y[2] */ 
+        acc0 += ((q63_t) x2 * c0); 
+        /* acc1 +=  x[3] * y[2] */ 
+        acc1 += ((q63_t) x3 * c0); 
+        /* acc2 +=  x[4] * y[2] */ 
+        acc2 += ((q63_t) x0 * c0); 
+        /* acc3 +=  x[5] * y[2] */ 
+        acc3 += ((q63_t) x1 * c0); 
+ 
+        /* Read y[3] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[6] sample */ 
+        x2 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[3] * y[3] */ 
+        acc0 += ((q63_t) x3 * c0); 
+        /* acc1 +=  x[4] * y[3] */ 
+        acc1 += ((q63_t) x0 * c0); 
+        /* acc2 +=  x[5] * y[3] */ 
+        acc2 += ((q63_t) x1 * c0); 
+        /* acc3 +=  x[6] * y[3] */ 
+        acc3 += ((q63_t) x2 * c0); 
+ 
+ 
+      } while(--k); 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Read y[4] sample */ 
+        c0 = *(py++); 
+ 
+        /* Read x[7] sample */ 
+        x3 = *(px++); 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[4] * y[4] */ 
+        acc0 += ((q63_t) x0 * c0); 
+        /* acc1 +=  x[5] * y[4] */ 
+        acc1 += ((q63_t) x1 * c0); 
+        /* acc2 +=  x[6] * y[4] */ 
+        acc2 += ((q63_t) x2 * c0); 
+        /* acc3 +=  x[7] * y[4] */ 
+        acc3 += ((q63_t) x3 * c0); 
+ 
+        /* Reuse the present samples for the next MAC */ 
+        x0 = x1; 
+        x1 = x2; 
+        x2 = x3; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q31_t) (acc0 >> 31); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      *pOut = (q31_t) (acc1 >> 31); 
+      pOut += inc; 
+ 
+      *pOut = (q31_t) (acc2 >> 31); 
+      pOut += inc; 
+ 
+      *pOut = (q31_t) (acc3 >> 31); 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + (count * 4u); 
+      py = pIn2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += (q63_t) * px++ * (*py++); 
+        sum += (q63_t) * px++ * (*py++); 
+        sum += (q63_t) * px++ * (*py++); 
+        sum += (q63_t) * px++ * (*py++); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += (q63_t) * px++ * (*py++); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q31_t) (sum >> 31); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Loop over srcBLen */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += (q63_t) * px++ * (*py++); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q31_t) (sum >> 31); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[0] + x[srcALen-srcBLen+2] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * sum += x[srcALen-srcBLen+2] * y[0] + x[srcALen-srcBLen+3] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[0] + x[srcALen-1] * y[1]  
+   * sum +=  x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = srcBLen - 1u; 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = pIn1 + (srcALen - (srcBLen - 1u)); 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* sum += x[srcALen - srcBLen + 4] * y[3] */ 
+      sum += (q63_t) * px++ * (*py++); 
+      /* sum += x[srcALen - srcBLen + 3] * y[2] */ 
+      sum += (q63_t) * px++ * (*py++); 
+      /* sum += x[srcALen - srcBLen + 2] * y[1] */ 
+      sum += (q63_t) * px++ * (*py++); 
+      /* sum += x[srcALen - srcBLen + 1] * y[0] */ 
+      sum += (q63_t) * px++ * (*py++); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum += (q63_t) * px++ * (*py++); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = (q31_t) (sum >> 31); 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pIn2; 
+ 
+    /* Decrement the MAC count */ 
+    count--; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Corr group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_correlate_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,692 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_correlate_q7.c  
+*  
+* Description:	Process function for Q7 Correlation.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup Corr  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Correlation of Q7 sequences.  
+ * @param[in] *pSrcA points to the first input sequence.  
+ * @param[in] srcALen length of the first input sequence.  
+ * @param[in] *pSrcB points to the second input sequence.  
+ * @param[in] srcBLen length of the second input sequence.  
+ * @param[out] *pDst points to the location where the output result is written.  Length 2 * max(srcALen, srcBLen) - 1.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using a 32-bit internal accumulator.  
+ * Both the inputs are represented in 1.7 format and multiplications yield a 2.14 result.  
+ * The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format.  
+ * This approach provides 17 guard bits and there is no risk of overflow as long as <code>max(srcALen, srcBLen)<131072</code>.  
+ * The 18.14 result is then truncated to 18.7 format by discarding the low 7 bits and saturated to 1.7 format.  
+ */ 
+ 
+void arm_correlate_q7( 
+  q7_t * pSrcA, 
+  uint32_t srcALen, 
+  q7_t * pSrcB, 
+  uint32_t srcBLen, 
+  q7_t * pDst) 
+{ 
+  q7_t *pIn1;                                    /* inputA pointer               */ 
+  q7_t *pIn2;                                    /* inputB pointer               */ 
+  q7_t *pOut = pDst;                             /* output pointer               */ 
+  q7_t *px;                                      /* Intermediate inputA pointer  */ 
+  q7_t *py;                                      /* Intermediate inputB pointer  */ 
+  q7_t *pSrc1;                                   /* Intermediate pointers        */ 
+  q31_t sum, acc0, acc1, acc2, acc3;             /* Accumulators                  */ 
+  q31_t input1, input2;                          /* temporary variables */ 
+  q15_t in1, in2;                                /* temporary variables */ 
+  q7_t x0, x1, x2, x3, c0, c1;                   /* temporary variables for holding input and coefficient values */ 
+  uint32_t j, k = 0u, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3;  /* loop counter                 */ 
+  int32_t inc = 1; 
+ 
+ 
+  /* The algorithm implementation is based on the lengths of the inputs. */ 
+  /* srcB is always made to slide across srcA. */ 
+  /* So srcBLen is always considered as shorter or equal to srcALen */ 
+  /* But CORR(x, y) is reverse of CORR(y, x) */ 
+  /* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer */ 
+  /* and the destination pointer modifier, inc is set to -1 */ 
+  /* If srcALen > srcBLen, zero pad has to be done to srcB to make the two inputs of same length */ 
+  /* But to improve the performance,  
+   * we include zeroes in the output instead of zero padding either of the the inputs*/ 
+  /* If srcALen > srcBLen,  
+   * (srcALen - srcBLen) zeroes has to included in the starting of the output buffer */ 
+  /* If srcALen < srcBLen,  
+   * (srcALen - srcBLen) zeroes has to included in the ending of the output buffer */ 
+  if(srcALen >= srcBLen) 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = (pSrcA); 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = (pSrcB); 
+ 
+    /* Number of output samples is calculated */ 
+    outBlockSize = (2u * srcALen) - 1u; 
+ 
+    /* When srcALen > srcBLen, zero padding is done to srcB  
+     * to make their lengths equal.  
+     * Instead, (outBlockSize - (srcALen + srcBLen - 1))  
+     * number of output samples are made zero */ 
+    j = outBlockSize - (srcALen + (srcBLen - 1u)); 
+ 
+    while(j > 0u) 
+    { 
+      /* Zero is stored in the destination buffer */ 
+      *pOut++ = 0; 
+ 
+      /* Decrement the loop counter */ 
+      j--; 
+    } 
+ 
+  } 
+  else 
+  { 
+    /* Initialization of inputA pointer */ 
+    pIn1 = (pSrcB); 
+ 
+    /* Initialization of inputB pointer */ 
+    pIn2 = (pSrcA); 
+ 
+    /* srcBLen is always considered as shorter or equal to srcALen */ 
+    j = srcBLen; 
+    srcBLen = srcALen; 
+    srcALen = j; 
+ 
+    /* CORR(x, y) = Reverse order(CORR(y, x)) */ 
+    /* Hence set the destination pointer to point to the last output sample */ 
+    pOut = pDst + ((srcALen + srcBLen) - 2u); 
+ 
+    /* Destination address modifier is set to -1 */ 
+    inc = -1; 
+ 
+  } 
+ 
+  /* The function is internally  
+   * divided into three parts according to the number of multiplications that has to be  
+   * taken place between inputA samples and inputB samples. In the first part of the  
+   * algorithm, the multiplications increase by one for every iteration.  
+   * In the second part of the algorithm, srcBLen number of multiplications are done.  
+   * In the third part of the algorithm, the multiplications decrease by one  
+   * for every iteration.*/ 
+  /* The algorithm is implemented in three stages.  
+   * The loop counters of each stage is initiated here. */ 
+  blockSize1 = srcBLen - 1u; 
+  blockSize2 = srcALen - (srcBLen - 1u); 
+  blockSize3 = blockSize1; 
+ 
+  /* --------------------------  
+   * Initializations of stage1  
+   * -------------------------*/ 
+ 
+  /* sum = x[0] * y[srcBlen - 1]  
+   * sum = x[0] * y[srcBlen - 2] + x[1] * y[srcBlen - 1]  
+   * ....  
+   * sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen - 1] * y[srcBLen - 1]  
+   */ 
+ 
+  /* In this stage the MAC operations are increased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = 1u; 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  pSrc1 = pIn2 + (srcBLen - 1u); 
+  py = pSrc1; 
+ 
+  /* ------------------------  
+   * Stage1 process  
+   * ----------------------*/ 
+ 
+  /* The first stage starts here */ 
+  while(blockSize1 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[0] , x[1] */ 
+      in1 = (q15_t) * px++; 
+      in2 = (q15_t) * px++; 
+      input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* y[srcBLen - 4] , y[srcBLen - 3] */ 
+      in1 = (q15_t) * py++; 
+      in2 = (q15_t) * py++; 
+      input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* x[0] * y[srcBLen - 4] */ 
+      /* x[1] * y[srcBLen - 3] */ 
+      sum = __SMLAD(input1, input2, sum); 
+ 
+      /* x[2] , x[3] */ 
+      in1 = (q15_t) * px++; 
+      in2 = (q15_t) * px++; 
+      input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* y[srcBLen - 2] , y[srcBLen - 1] */ 
+      in1 = (q15_t) * py++; 
+      in2 = (q15_t) * py++; 
+      input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* x[2] * y[srcBLen - 2] */ 
+      /* x[3] * y[srcBLen - 1] */ 
+      sum = __SMLAD(input1, input2, sum); 
+ 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      /* x[0] * y[srcBLen - 1] */ 
+      sum += (q31_t) ((q15_t) * px++ * *py++); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = (q7_t) (__SSAT(sum >> 7, 8)); 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    py = pSrc1 - count; 
+    px = pIn1; 
+ 
+    /* Increment the MAC count */ 
+    count++; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize1--; 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage2  
+   * ------------------------*/ 
+ 
+  /* sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen-1] * y[srcBLen-1]  
+   * sum = x[1] * y[0] + x[2] * y[1] +...+ x[srcBLen] * y[srcBLen-1]  
+   * ....  
+   * sum = x[srcALen-srcBLen-2] * y[0] + x[srcALen-srcBLen-1] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   */ 
+ 
+  /* Working pointer of inputA */ 
+  px = pIn1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* count is index by which the pointer pIn1 to be incremented */ 
+  count = 1u; 
+ 
+  /* -------------------  
+   * Stage2 process  
+   * ------------------*/ 
+ 
+  /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed.  
+   * So, to loop unroll over blockSize2,  
+   * srcBLen should be greater than or equal to 4 */ 
+  if(srcBLen >= 4u) 
+  { 
+    /* Loop unroll over blockSize2, by 4 */ 
+    blkCnt = blockSize2 >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Set all accumulators to zero */ 
+      acc0 = 0; 
+      acc1 = 0; 
+      acc2 = 0; 
+      acc3 = 0; 
+ 
+      /* read x[0], x[1], x[2] samples */ 
+      x0 = *px++; 
+      x1 = *px++; 
+      x2 = *px++; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      do 
+      { 
+        /* Read y[0] sample */ 
+        c0 = *py++; 
+        /* Read y[1] sample */ 
+        c1 = *py++; 
+ 
+        /* Read x[3] sample */ 
+        x3 = *px++; 
+ 
+        /* x[0] and x[1] are packed */ 
+        in1 = (q15_t) x0; 
+        in2 = (q15_t) x1; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* y[0] and y[1] are packed */ 
+        in1 = (q15_t) c0; 
+        in2 = (q15_t) c1; 
+ 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* acc0 += x[0] * y[0] + x[1] * y[1]  */ 
+        acc0 = __SMLAD(input1, input2, acc0); 
+ 
+        /* x[1] and x[2] are packed */ 
+        in1 = (q15_t) x1; 
+        in2 = (q15_t) x2; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* acc1 += x[1] * y[0] + x[2] * y[1] */ 
+        acc1 = __SMLAD(input1, input2, acc1); 
+ 
+        /* x[2] and x[3] are packed */ 
+        in1 = (q15_t) x2; 
+        in2 = (q15_t) x3; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* acc2 += x[2] * y[0] + x[3] * y[1]  */ 
+        acc2 = __SMLAD(input1, input2, acc2); 
+ 
+        /* Read x[4] sample */ 
+        x0 = *(px++); 
+ 
+        /* x[3] and x[4] are packed */ 
+        in1 = (q15_t) x3; 
+        in2 = (q15_t) x0; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* acc3 += x[3] * y[0] + x[4] * y[1]  */ 
+        acc3 = __SMLAD(input1, input2, acc3); 
+ 
+        /* Read y[2] sample */ 
+        c0 = *py++; 
+        /* Read y[3] sample */ 
+        c1 = *py++; 
+ 
+        /* Read x[5] sample */ 
+        x1 = *px++; 
+ 
+        /* x[2] and x[3] are packed */ 
+        in1 = (q15_t) x2; 
+        in2 = (q15_t) x3; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* y[2] and y[3] are packed */ 
+        in1 = (q15_t) c0; 
+        in2 = (q15_t) c1; 
+ 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* acc0 += x[2] * y[2] + x[3] * y[3]  */ 
+        acc0 = __SMLAD(input1, input2, acc0); 
+ 
+        /* x[3] and x[4] are packed */ 
+        in1 = (q15_t) x3; 
+        in2 = (q15_t) x0; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* acc1 += x[3] * y[2] + x[4] * y[3]  */ 
+        acc1 = __SMLAD(input1, input2, acc1); 
+ 
+        /* x[4] and x[5] are packed */ 
+        in1 = (q15_t) x0; 
+        in2 = (q15_t) x1; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* acc2 += x[4] * y[2] + x[5] * y[3]  */ 
+        acc2 = __SMLAD(input1, input2, acc2); 
+ 
+        /* Read x[6] sample */ 
+        x2 = *px++; 
+ 
+        /* x[5] and x[6] are packed */ 
+        in1 = (q15_t) x1; 
+        in2 = (q15_t) x2; 
+ 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* acc3 += x[5] * y[2] + x[6] * y[3]  */ 
+        acc3 = __SMLAD(input1, input2, acc3); 
+ 
+      } while(--k); 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Read y[4] sample */ 
+        c0 = *py++; 
+ 
+        /* Read x[7] sample */ 
+        x3 = *px++; 
+ 
+        /* Perform the multiply-accumulates */ 
+        /* acc0 +=  x[4] * y[4] */ 
+        acc0 += ((q15_t) x0 * c0); 
+        /* acc1 +=  x[5] * y[4] */ 
+        acc1 += ((q15_t) x1 * c0); 
+        /* acc2 +=  x[6] * y[4] */ 
+        acc2 += ((q15_t) x2 * c0); 
+        /* acc3 +=  x[7] * y[4] */ 
+        acc3 += ((q15_t) x3 * c0); 
+ 
+        /* Reuse the present samples for the next MAC */ 
+        x0 = x1; 
+        x1 = x2; 
+        x2 = x3; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q7_t) (__SSAT(acc0 >> 7, 8)); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      *pOut = (q7_t) (__SSAT(acc1 >> 7, 8)); 
+      pOut += inc; 
+ 
+      *pOut = (q7_t) (__SSAT(acc2 >> 7, 8)); 
+      pOut += inc; 
+ 
+      *pOut = (q7_t) (__SSAT(acc3 >> 7, 8)); 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + (count * 4u); 
+      py = pIn2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = blockSize2 % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+      k = srcBLen >> 2u; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+       ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+      while(k > 0u) 
+      { 
+        /* Reading two inputs of SrcA buffer and packing */ 
+        in1 = (q15_t) * px++; 
+        in2 = (q15_t) * px++; 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* Reading two inputs of SrcB buffer and packing */ 
+        in1 = (q15_t) * py++; 
+        in2 = (q15_t) * py++; 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* Perform the multiply-accumulates */ 
+        sum = __SMLAD(input1, input2, sum); 
+ 
+        /* Reading two inputs of SrcA buffer and packing */ 
+        in1 = (q15_t) * px++; 
+        in2 = (q15_t) * px++; 
+        input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* Reading two inputs of SrcB buffer and packing */ 
+        in1 = (q15_t) * py++; 
+        in2 = (q15_t) * py++; 
+        input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+        /* Perform the multiply-accumulates */ 
+        sum = __SMLAD(input1, input2, sum); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* If the srcBLen is not a multiple of 4, compute any remaining MACs here.  
+       ** No loop unrolling is used. */ 
+      k = srcBLen % 0x4u; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulates */ 
+        sum += ((q15_t) * px++ * *py++); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q7_t) (__SSAT(sum >> 7, 8)); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Increment the pointer pIn1 index, count by 1 */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+  else 
+  { 
+    /* If the srcBLen is not a multiple of 4,  
+     * the blockSize2 loop cannot be unrolled by 4 */ 
+    blkCnt = blockSize2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Accumulator is made zero for every iteration */ 
+      sum = 0; 
+ 
+      /* Loop over srcBLen */ 
+      k = srcBLen; 
+ 
+      while(k > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum += ((q15_t) * px++ * *py++); 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+      } 
+ 
+      /* Store the result in the accumulator in the destination buffer. */ 
+      *pOut = (q7_t) (__SSAT(sum >> 7, 8)); 
+      /* Destination pointer is updated according to the address modifier, inc */ 
+      pOut += inc; 
+ 
+      /* Update the inputA and inputB pointers for next MAC calculation */ 
+      px = pIn1 + count; 
+      py = pIn2; 
+ 
+      /* Increment the MAC count */ 
+      count++; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+  } 
+ 
+  /* --------------------------  
+   * Initializations of stage3  
+   * -------------------------*/ 
+ 
+  /* sum += x[srcALen-srcBLen+1] * y[0] + x[srcALen-srcBLen+2] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * sum += x[srcALen-srcBLen+2] * y[0] + x[srcALen-srcBLen+3] * y[1] +...+ x[srcALen-1] * y[srcBLen-1]  
+   * ....  
+   * sum +=  x[srcALen-2] * y[0] + x[srcALen-1] * y[1]  
+   * sum +=  x[srcALen-1] * y[0]  
+   */ 
+ 
+  /* In this stage the MAC operations are decreased by 1 for every iteration.  
+     The count variable holds the number of MAC operations performed */ 
+  count = srcBLen - 1u; 
+ 
+  /* Working pointer of inputA */ 
+  pSrc1 = pIn1 + (srcALen - (srcBLen - 1u)); 
+  px = pSrc1; 
+ 
+  /* Working pointer of inputB */ 
+  py = pIn2; 
+ 
+  /* -------------------  
+   * Stage3 process  
+   * ------------------*/ 
+ 
+  while(blockSize3 > 0u) 
+  { 
+    /* Accumulator is made zero for every iteration */ 
+    sum = 0; 
+ 
+    /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+    k = count >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 MACs at a time.  
+     ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 
+    while(k > 0u) 
+    { 
+      /* x[srcALen - srcBLen + 1] , x[srcALen - srcBLen + 2]  */ 
+      in1 = (q15_t) * px++; 
+      in2 = (q15_t) * px++; 
+      input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* y[0] , y[1] */ 
+      in1 = (q15_t) * py++; 
+      in2 = (q15_t) * py++; 
+      input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* sum += x[srcALen - srcBLen + 1] * y[0] */ 
+      /* sum += x[srcALen - srcBLen + 2] * y[1] */ 
+      sum = __SMLAD(input1, input2, sum); 
+ 
+      /* x[srcALen - srcBLen + 3] , x[srcALen - srcBLen + 4] */ 
+      in1 = (q15_t) * px++; 
+      in2 = (q15_t) * px++; 
+      input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* y[2] , y[3] */ 
+      in1 = (q15_t) * py++; 
+      in2 = (q15_t) * py++; 
+      input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* sum += x[srcALen - srcBLen + 3] * y[2] */ 
+      /* sum += x[srcALen - srcBLen + 4] * y[3] */ 
+      sum = __SMLAD(input1, input2, sum); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* If the count is not a multiple of 4, compute any remaining MACs here.  
+     ** No loop unrolling is used. */ 
+    k = count % 0x4u; 
+ 
+    while(k > 0u) 
+    { 
+      /* Perform the multiply-accumulates */ 
+      sum += ((q15_t) * px++ * *py++); 
+ 
+      /* Decrement the loop counter */ 
+      k--; 
+    } 
+ 
+    /* Store the result in the accumulator in the destination buffer. */ 
+    *pOut = (q7_t) (__SSAT(sum >> 7, 8)); 
+    /* Destination pointer is updated according to the address modifier, inc */ 
+    pOut += inc; 
+ 
+    /* Update the inputA and inputB pointers for next MAC calculation */ 
+    px = ++pSrc1; 
+    py = pIn2; 
+ 
+    /* Decrement the MAC count */ 
+    count--; 
+ 
+    /* Decrement the loop counter */ 
+    blockSize3--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of Corr group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,282 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_decimate_f32.c  
+*  
+* Description:	FIR decimation for floating-point sequences.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup FIR_decimate Finite Impulse Response (FIR) Decimator  
+ *  
+ * These functions combine an FIR filter together with a decimator.  
+ * They are used in multirate systems for reducing the sample rate of a signal without introducing aliasing distortion.  
+ * Conceptually, the functions are equivalent to the block diagram below:  
+ * \image html FIRDecimator.gif "Components included in the FIR Decimator functions"  
+ * When decimating by a factor of <code>M</code>, the signal should be prefiltered by a lowpass filter with a normalized  
+ * cutoff frequency of <code>1/M</code> in order to prevent aliasing distortion.  
+ * The user of the function is responsible for providing the filter coefficients.  
+ *  
+ * The FIR decimator functions provided in the CMSIS DSP Library combine the FIR filter and the decimator in an efficient manner.  
+ * Instead of calculating all of the FIR filter outputs and discarding <code>M-1</code> out of every <code>M</code>, only the  
+ * samples output by the decimator are computed.  
+ * The functions operate on blocks of input and output data.  
+ * <code>pSrc</code> points to an array of <code>blockSize</code> input values and  
+ * <code>pDst</code> points to an array of <code>blockSize/M</code> output values.  
+ * In order to have an integer number of output samples <code>blockSize</code>  
+ * must always be a multiple of the decimation factor <code>M</code>.  
+ *  
+ * The library provides separate functions for Q15, Q31 and floating-point data types.  
+ *  
+ * \par Algorithm:  
+ * The FIR portion of the algorithm uses the standard form filter:  
+ * <pre>  
+ *    y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]  
+ * </pre>  
+ * where, <code>b[n]</code> are the filter coefficients.  
+ * \par 
+ * The <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.  
+ * Coefficients are stored in time reversed order.  
+ * \par  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.  
+ * Samples in the state buffer are stored in the order:  
+ * \par  
+ * <pre>  
+ *    {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}  
+ * </pre>  
+ * The state variables are updated after each block of data is processed, the coefficients are untouched.  
+ *  
+ * \par Instance Structure  
+ * The coefficients and state variables for a filter are stored together in an instance data structure.  
+ * A separate instance structure must be defined for each filter.  
+ * Coefficient arrays may be shared among several instances while state variable array should be allocated separately.  
+ * There are separate instance structure declarations for each of the 3 supported data types.  
+ *  
+ * \par Initialization Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Zeros out the values in the state buffer.  
+ * - Checks to make sure that the size of the input is a multiple of the decimation factor.  
+ *  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * The code below statically initializes each of the 3 different data type filter instance structures  
+ * <pre>  
+ *arm_fir_decimate_instance_f32 S = {M, numTaps, pCoeffs, pState};  
+ *arm_fir_decimate_instance_q31 S = {M, numTaps, pCoeffs, pState};  
+ *arm_fir_decimate_instance_q15 S = {M, numTaps, pCoeffs, pState};  
+ * </pre>  
+ * where <code>M</code> is the decimation factor; <code>numTaps</code> is the number of filter coefficients in the filter;  
+ * <code>pCoeffs</code> is the address of the coefficient buffer;  
+ * <code>pState</code> is the address of the state buffer.  
+ * Be sure to set the values in the state buffer to zeros when doing static initialization.  
+ *  
+ * \par Fixed-Point Behavior  
+ * Care must be taken when using the fixed-point versions of the FIR decimate filter functions.  
+ * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.  
+ * Refer to the function specific documentation below for usage guidelines.  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_decimate  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief Processing function for the floating-point FIR decimator.  
+   * @param[in] *S        points to an instance of the floating-point FIR decimator structure.  
+   * @param[in] *pSrc     points to the block of input data.  
+   * @param[out] *pDst    points to the block of output data.  
+   * @param[in] blockSize number of input samples to process per call.  
+   * @return none.  
+   */ 
+ 
+void arm_fir_decimate_f32( 
+  const arm_fir_decimate_instance_f32 * S, 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  float32_t *pState = S->pState;                 /* State pointer */ 
+  float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */ 
+  float32_t *pStateCurnt;                        /* Points to the current sample of the state */ 
+  float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */ 
+  float32_t sum0;                                /* Accumulator */ 
+  float32_t x0, c0;                              /* Temporary variables to hold state and coefficient values */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */ 
+  uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M;  /* Loop counters */ 
+ 
+  /* S->pState buffer contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = S->pState + (numTaps - 1u); 
+ 
+  /* Total number of output samples to be computed */ 
+  blkCnt = outBlockSize; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy decimation factor number of new input samples into the state buffer */ 
+    i = S->M; 
+ 
+    do 
+    { 
+      *pStateCurnt++ = *pSrc++; 
+ 
+    } while(--i); 
+ 
+    /* Set accumulator to zero */ 
+    sum0 = 0.0f; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = pCoeffs; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numTaps-4 coefficients. */ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read the b[numTaps-1] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-1] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 += x0 * c0; 
+ 
+      /* Read the b[numTaps-2] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-2] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 += x0 * c0; 
+ 
+      /* Read the b[numTaps-3] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-3] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 += x0 * c0; 
+ 
+      /* Read the b[numTaps-4] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-4] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 += x0 * c0; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read coefficients */ 
+      c0 = *(pb++); 
+ 
+      /* Fetch 1 state variable */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 += x0 * c0; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Advance the state pointer by the decimation factor  
+     * to process the next group of decimation factor number samples */ 
+    pState = pState + S->M; 
+ 
+    /* The result is in the accumulator, store in the destination buffer. */ 
+    *pDst++ = sum0; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  i = (numTaps - 1u) >> 2; 
+ 
+  /* copy data */ 
+  while(i > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+ 
+  i = (numTaps - 1u) % 0x04u; 
+ 
+  /* copy data */ 
+  while(i > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR_decimate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_fast_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,196 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_decimate_fast_q15.c  
+*  
+* Description:	Fast Q15 FIR Decimator.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_decimate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q15 FIR decimator (fast variant).  
+ * @param[in] *S points to an instance of the Q15 FIR decimator structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data  
+ * @param[in] blockSize number of input samples to process per call.  
+ * @return none  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * This fast version uses a 32-bit accumulator with 2.30 format.  
+ * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * Thus, if the accumulator result overflows it wraps around and distorts the result.  
+ * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits (log2 is read as log to the base 2).  
+ * The 2.30 accumulator is then truncated to 2.15 format and saturated to yield the 1.15 result.  
+ *  
+ * \par  
+ * Refer to the function <code>arm_fir_decimate_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion.  
+ * Both the slow and the fast versions use the same instance structure.  
+ * Use the function <code>arm_fir_decimate_init_q15()</code> to initialize the filter structure.  
+ */ 
+ 
+void arm_fir_decimate_fast_q15( 
+  const arm_fir_decimate_instance_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pState = S->pState;                     /* State pointer */ 
+  q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q15_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q15_t *px;                                     /* Temporary pointer for state buffer */ 
+  q15_t *pb;                                     /* Temporary pointer coefficient buffer */ 
+  q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */ 
+  q31_t sum0;                                    /* Accumulators */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of taps */ 
+  uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M;  /* Loop counters */ 
+ 
+ 
+  /* S->pState buffer contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = S->pState + (numTaps - 1u); 
+ 
+  /* Total number of output samples to be computed */ 
+  blkCnt = outBlockSize; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy decimation factor number of new input samples into the state buffer */ 
+    i = S->M; 
+ 
+    do 
+    { 
+      *pStateCurnt++ = *pSrc++; 
+ 
+    } while(--i); 
+ 
+    /*Set sum to zero */ 
+    sum0 = 0; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = pCoeffs; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numTaps-4 coefficients. */ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read the Read b[numTaps-1] and b[numTaps-2]  coefficients */ 
+      c0 = *__SIMD32(pb)++; 
+ 
+      /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */ 
+      x0 = *__SIMD32(px)++; 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 = __SMLAD(x0, c0, sum0); 
+ 
+      /* Read the b[numTaps-3] and b[numTaps-4] coefficient */ 
+      c0 = *__SIMD32(pb)++; 
+ 
+      /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */ 
+      x0 = *__SIMD32(px)++; 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 = __SMLAD(x0, c0, sum0); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read coefficients */ 
+      c0 = *pb++; 
+ 
+      /* Fetch 1 state variable */ 
+      x0 = *px++; 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 = __SMLAD(x0, c0, sum0); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Advance the state pointer by the decimation factor  
+     * to process the next group of decimation factor number samples */ 
+    pState = pState + S->M; 
+ 
+    /* Store filter output , smlad returns the values in 2.14 format */ 
+    /* so downsacle by 15 to get output in 1.15 */ 
+    *pDst++ = (q15_t) ((sum0 >> 15)); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  i = (numTaps - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(i > 0u) 
+  { 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+ 
+  i = (numTaps - 1u) % 0x04u; 
+ 
+  /* copy data */ 
+  while(i > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR_decimate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_fast_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,217 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_decimate_fast_q31.c  
+*  
+* Description:	Fast Q31 FIR Decimator.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_decimate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q31 FIR decimator (fast variant).  
+ * @param[in] *S points to an instance of the Q31 FIR decimator structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data  
+ * @param[in] blockSize number of input samples to process per call.  
+ * @return none  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * This function is optimized for speed at the expense of fixed-point precision and overflow protection.  
+ * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.  
+ * These intermediate results are added to a 2.30 accumulator.  
+ * Finally, the accumulator is saturated and converted to a 1.31 result.  
+ * The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result.  
+ * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits (where log2 is read as log to the base 2).  
+ *  
+ * \par  
+ * Refer to the function <code>arm_fir_decimate_q31()</code> for a slower implementation of this function which uses a 64-bit accumulator to provide higher precision.  
+ * Both the slow and the fast versions use the same instance structure.  
+ * Use the function <code>arm_fir_decimate_init_q31()</code> to initialize the filter structure.  
+ */ 
+ 
+void arm_fir_decimate_fast_q31( 
+  arm_fir_decimate_instance_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pState = S->pState;                     /* State pointer */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q31_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */ 
+  q31_t *px;                                     /* Temporary pointers for state buffer */ 
+  q31_t *pb;                                     /* Temporary pointers for coefficient buffer */ 
+  q63_t sum0;                                    /* Accumulator */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of taps */ 
+  uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M;  /* Loop counters */ 
+ 
+ 
+  /* S->pState buffer contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = S->pState + (numTaps - 1u); 
+ 
+  /* Total number of output samples to be computed */ 
+  blkCnt = outBlockSize; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy decimation factor number of new input samples into the state buffer */ 
+    i = S->M; 
+ 
+    do 
+    { 
+      *pStateCurnt++ = *pSrc++; 
+ 
+    } while(--i); 
+ 
+    /* Set accumulator to zero */ 
+    sum0 = 0; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = pCoeffs; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numTaps-4 coefficients. */ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read the b[numTaps-1] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-1] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 
+ 
+      /* Read the b[numTaps-2] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-2] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 
+ 
+      /* Read the b[numTaps-3] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-3] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 
+ 
+      /* Read the b[numTaps-4] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-4] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read coefficients */ 
+      c0 = *(pb++); 
+ 
+      /* Fetch 1 state variable */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 = (q31_t) ((((q63_t) x0 * c0) + (sum0 << 32)) >> 32); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Advance the state pointer by the decimation factor  
+     * to process the next group of decimation factor number samples */ 
+    pState = pState + S->M; 
+ 
+    /* The result is in the accumulator, store in the destination buffer. */ 
+    *pDst++ = (q31_t) (sum0 << 1); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  i = (numTaps - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(i > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+ 
+  i = (numTaps - 1u) % 0x04u; 
+ 
+  /* copy data */ 
+  while(i > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR_decimate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,106 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_decimate_init_f32.c  
+*  
+* Description:  Floating-point FIR Decimator initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_decimate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Initialization function for the floating-point FIR decimator.  
+ * @param[in,out] *S points to an instance of the floating-point FIR decimator structure.  
+ * @param[in] numTaps  number of coefficients in the filter.  
+ * @param[in] M  decimation factor.  
+ * @param[in] *pCoeffs points to the filter coefficients.  
+ * @param[in] *pState points to the state buffer.  
+ * @param[in] blockSize number of input samples to process per call.  
+ * @return    The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_LENGTH_ERROR if  
+ * <code>blockSize</code> is not a multiple of <code>M</code>.  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to the array of state variables.  
+ * <code>pState</code> is of length <code>numTaps+blockSize-1</code> words where <code>blockSize</code> is the number of input samples passed to <code>arm_fir_decimate_f32()</code>.  
+ * <code>M</code> is the decimation factor.  
+ */ 
+ 
+arm_status arm_fir_decimate_init_f32( 
+  arm_fir_decimate_instance_f32 * S, 
+  uint16_t numTaps, 
+  uint8_t M, 
+  float32_t * pCoeffs, 
+  float32_t * pState, 
+  uint32_t blockSize) 
+{ 
+  arm_status status; 
+ 
+  /* The size of the input block must be a multiple of the decimation factor */ 
+  if((blockSize % M) != 0u) 
+  { 
+    /* Set status as ARM_MATH_LENGTH_ERROR */ 
+    status = ARM_MATH_LENGTH_ERROR; 
+  } 
+  else 
+  { 
+    /* Assign filter taps */ 
+    S->numTaps = numTaps; 
+ 
+    /* Assign coefficient pointer */ 
+    S->pCoeffs = pCoeffs; 
+ 
+    /* Clear state buffer and size is always (blockSize + numTaps - 1) */ 
+    memset(pState, 0, (numTaps + (blockSize - 1u)) * sizeof(float32_t)); 
+ 
+    /* Assign state pointer */ 
+    S->pState = pState; 
+ 
+    /* Assign Decimation Factor */ 
+    S->M = M; 
+ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  return (status); 
+ 
+} 
+ 
+/**  
+ * @} end of FIR_decimate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,108 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_decimate_init_q15.c  
+*  
+* Description:  Initialization function for the Q15 FIR Decimator.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_decimate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Initialization function for the Q15 FIR decimator.  
+ * @param[in,out] *S points to an instance of the Q15 FIR decimator structure.  
+ * @param[in] numTaps  number of coefficients in the filter.  
+ * @param[in] M  decimation factor.  
+ * @param[in] *pCoeffs points to the filter coefficients.  
+ * @param[in] *pState points to the state buffer.  
+ * @param[in] blockSize number of input samples to process per call.  
+ * @return    The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_LENGTH_ERROR if  
+ * <code>blockSize</code> is not a multiple of <code>M</code>.  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to the array of state variables.  
+ * <code>pState</code> is of length <code>numTaps+blockSize-1</code> words where <code>blockSize</code> is the number of input samples  
+ * to the call <code>arm_fir_decimate_q15()</code>.  
+ * <code>M</code> is the decimation factor.  
+ */ 
+ 
+arm_status arm_fir_decimate_init_q15( 
+  arm_fir_decimate_instance_q15 * S, 
+  uint16_t numTaps, 
+  uint8_t M, 
+  q15_t * pCoeffs, 
+  q15_t * pState, 
+  uint32_t blockSize) 
+{ 
+ 
+  arm_status status; 
+ 
+  /* The size of the input block must be a multiple of the decimation factor */ 
+  if((blockSize % M) != 0u) 
+  { 
+    /* Set status as ARM_MATH_LENGTH_ERROR */ 
+    status = ARM_MATH_LENGTH_ERROR; 
+  } 
+  else 
+  { 
+    /* Assign filter taps */ 
+    S->numTaps = numTaps; 
+ 
+    /* Assign coefficient pointer */ 
+    S->pCoeffs = pCoeffs; 
+ 
+    /* Clear the state buffer.  The size of buffer is always (blockSize + numTaps - 1) */ 
+    memset(pState, 0, (numTaps + (blockSize - 1u)) * sizeof(q15_t)); 
+ 
+    /* Assign state pointer */ 
+    S->pState = pState; 
+ 
+    /* Assign Decimation factor */ 
+    S->M = M; 
+ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  return (status); 
+ 
+} 
+ 
+/**  
+ * @} end of FIR_decimate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,106 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_decimate_init_q31.c  
+*  
+* Description:  Initialization function for Q31 FIR Decimation filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_decimate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Initialization function for the Q31 FIR decimator.  
+ * @param[in,out] *S points to an instance of the Q31 FIR decimator structure.  
+ * @param[in] numTaps  number of coefficients in the filter.  
+ * @param[in] M  decimation factor.  
+ * @param[in] *pCoeffs points to the filter coefficients.  
+ * @param[in] *pState points to the state buffer.  
+ * @param[in] blockSize number of input samples to process per call.  
+ * @return    The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_LENGTH_ERROR if  
+ * <code>blockSize</code> is not a multiple of <code>M</code>.  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to the array of state variables.  
+ * <code>pState</code> is of length <code>numTaps+blockSize-1</code> words where <code>blockSize</code> is the number of input samples passed to <code>arm_fir_decimate_q31()</code>.  
+ * <code>M</code> is the decimation factor.  
+ */ 
+ 
+arm_status arm_fir_decimate_init_q31( 
+  arm_fir_decimate_instance_q31 * S, 
+  uint16_t numTaps, 
+  uint8_t M, 
+  q31_t * pCoeffs, 
+  q31_t * pState, 
+  uint32_t blockSize) 
+{ 
+  arm_status status; 
+ 
+  /* The size of the input block must be a multiple of the decimation factor */ 
+  if((blockSize % M) != 0u) 
+  { 
+    /* Set status as ARM_MATH_LENGTH_ERROR */ 
+    status = ARM_MATH_LENGTH_ERROR; 
+  } 
+  else 
+  { 
+    /* Assign filter taps */ 
+    S->numTaps = numTaps; 
+ 
+    /* Assign coefficient pointer */ 
+    S->pCoeffs = pCoeffs; 
+ 
+    /* Clear the state buffer.  The size is always (blockSize + numTaps - 1) */ 
+    memset(pState, 0, (numTaps + (blockSize - 1)) * sizeof(q31_t)); 
+ 
+    /* Assign state pointer */ 
+    S->pState = pState; 
+ 
+    /* Assign Decimation factor */ 
+    S->M = M; 
+ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  return (status); 
+ 
+} 
+ 
+/**  
+ * @} end of FIR_decimate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,198 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_decimate_q15.c  
+*  
+* Description:	Q15 FIR Decimator.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_decimate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q15 FIR decimator.  
+ * @param[in] *S points to an instance of the Q15 FIR decimator structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the location where the output result is written.  
+ * @param[in] blockSize number of input samples to process per call.  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.  
+ * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.  
+ * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
+ * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.  
+ * Lastly, the accumulator is saturated to yield a result in 1.15 format.  
+ *  
+ * \par  
+ * Refer to the function <code>arm_fir_decimate_fast_q15()</code> for a faster but less precise implementation of this function.  
+ */ 
+ 
+void arm_fir_decimate_q15( 
+  const arm_fir_decimate_instance_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pState = S->pState;                     /* State pointer */ 
+  q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q15_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q15_t *px;                                     /* Temporary pointer for state buffer */ 
+  q15_t *pb;                                     /* Temporary pointer coefficient buffer */ 
+  q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */ 
+  q63_t sum0;                                    /* Accumulators */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of taps */ 
+  uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M;  /* Loop counters */ 
+ 
+ 
+  /* S->pState buffer contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = S->pState + (numTaps - 1u); 
+ 
+  /* Total number of output samples to be computed */ 
+  blkCnt = outBlockSize; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy decimation factor number of new input samples into the state buffer */ 
+    i = S->M; 
+ 
+    do 
+    { 
+      *pStateCurnt++ = *pSrc++; 
+ 
+    } while(--i); 
+ 
+    /*Set sum to zero */ 
+    sum0 = 0; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = pCoeffs; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numTaps-4 coefficients. */ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read the Read b[numTaps-1] and b[numTaps-2]  coefficients */ 
+      c0 = *__SIMD32(pb)++; 
+ 
+      /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */ 
+      x0 = *__SIMD32(px)++; 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 = __SMLALD(x0, c0, sum0); 
+ 
+      /* Read the b[numTaps-3] and b[numTaps-4] coefficient */ 
+      c0 = *__SIMD32(pb)++; 
+ 
+      /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */ 
+      x0 = *__SIMD32(px)++; 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 = __SMLALD(x0, c0, sum0); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read coefficients */ 
+      c0 = *pb++; 
+ 
+      /* Fetch 1 state variable */ 
+      x0 = *px++; 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 = __SMLALD(x0, c0, sum0); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Advance the state pointer by the decimation factor  
+     * to process the next group of decimation factor number samples */ 
+    pState = pState + S->M; 
+ 
+    /* Store filter output, smlad returns the values in 2.14 format */ 
+    /* so downsacle by 15 to get output in 1.15 */ 
+    *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16)); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  i = (numTaps - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(i > 0u) 
+  { 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+ 
+  i = (numTaps - 1u) % 0x04u; 
+ 
+  /* copy data */ 
+  while(i > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR_decimate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_decimate_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,216 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_decimate_q31.c  
+*  
+* Description:	Q31 FIR Decimator.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_decimate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q31 FIR decimator.  
+ * @param[in] *S points to an instance of the Q31 FIR decimator structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data  
+ * @param[in] blockSize number of input samples to process per call.  
+ * @return none  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * Thus, if the accumulator result overflows it wraps around rather than clip.  
+ * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits (where log2 is read as log to the base 2).  
+ * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.  
+ *  
+ * \par  
+ * Refer to the function <code>arm_fir_decimate_fast_q31()</code> for a faster but less precise implementation of this function.  
+ */ 
+ 
+void arm_fir_decimate_q31( 
+  const arm_fir_decimate_instance_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pState = S->pState;                     /* State pointer */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q31_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */ 
+  q31_t *px;                                     /* Temporary pointers for state buffer */ 
+  q31_t *pb;                                     /* Temporary pointers for coefficient buffer */ 
+  q63_t sum0;                                    /* Accumulator */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of taps */ 
+  uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M;  /* Loop counters */ 
+ 
+ 
+  /* S->pState buffer contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = S->pState + (numTaps - 1u); 
+ 
+  /* Total number of output samples to be computed */ 
+  blkCnt = outBlockSize; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy decimation factor number of new input samples into the state buffer */ 
+    i = S->M; 
+ 
+    do 
+    { 
+      *pStateCurnt++ = *pSrc++; 
+ 
+    } while(--i); 
+ 
+    /* Set accumulator to zero */ 
+    sum0 = 0; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = pCoeffs; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numTaps-4 coefficients. */ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read the b[numTaps-1] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-1] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 += (q63_t) x0 *c0; 
+ 
+      /* Read the b[numTaps-2] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-2] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 += (q63_t) x0 *c0; 
+ 
+      /* Read the b[numTaps-3] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-3] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 += (q63_t) x0 *c0; 
+ 
+      /* Read the b[numTaps-4] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-4] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 += (q63_t) x0 *c0; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read coefficients */ 
+      c0 = *(pb++); 
+ 
+      /* Fetch 1 state variable */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      sum0 += (q63_t) x0 *c0; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Advance the state pointer by the decimation factor  
+     * to process the next group of decimation factor number samples */ 
+    pState = pState + S->M; 
+ 
+    /* The result is in the accumulator, store in the destination buffer. */ 
+    *pDst++ = (q31_t) (sum0 >> 31); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  i = (numTaps - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(i > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+ 
+  i = (numTaps - 1u) % 0x04u; 
+ 
+  /* copy data */ 
+  while(i > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR_decimate group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,355 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_f32.c  
+*  
+* Description:	Floating-point FIR filter processing function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup FIR Finite Impulse Response (FIR) Filters  
+ *  
+ * This set of functions implements Finite Impulse Response (FIR) filters  
+ * for Q7, Q15, Q31, and floating-point data types.  Fast versions of Q15 and Q31 are also provided.  
+ * The functions operate on blocks of input and output data and each call to the function processes  
+ * <code>blockSize</code> samples through the filter.  <code>pSrc</code> and  
+ * <code>pDst</code> points to input and output arrays containing <code>blockSize</code> values.  
+ *  
+ * \par Algorithm:  
+ * The FIR filter algorithm is based upon a sequence of multiply-accumulate (MAC) operations.  
+ * Each filter coefficient <code>b[n]</code> is multiplied by a state variable which equals a previous input sample <code>x[n]</code>.  
+ * <pre>  
+ *    y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]  
+ * </pre>  
+ * \par  
+ * \image html FIR.gif "Finite Impulse Response filter"  
+ * \par  
+ * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.  
+ * Coefficients are stored in time reversed order.  
+ * \par  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.  
+ * Samples in the state buffer are stored in the following order.  
+ * \par  
+ * <pre>  
+ *    {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}  
+ * </pre>  
+ * \par  
+ * Note that the length of the state buffer exceeds the length of the coefficient array by <code>blockSize-1</code>.  
+ * The increased state buffer length allows circular addressing, which is traditionally used in the FIR filters,  
+ * to be avoided and yields a significant speed improvement.  
+ * The state variables are updated after each block of data is processed; the coefficients are untouched.  
+ * \par Instance Structure  
+ * The coefficients and state variables for a filter are stored together in an instance data structure.  
+ * A separate instance structure must be defined for each filter.  
+ * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.  
+ * There are separate instance structure declarations for each of the 4 supported data types.  
+ *  
+ * \par Initialization Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Zeros out the values in the state buffer.  
+ *  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * Set the values in the state buffer to zeros before static initialization.  
+ * The code below statically initializes each of the 4 different data type filter instance structures  
+ * <pre>  
+ *arm_fir_instance_f32 S = {numTaps, pState, pCoeffs};  
+ *arm_fir_instance_q31 S = {numTaps, pState, pCoeffs};  
+ *arm_fir_instance_q15 S = {numTaps, pState, pCoeffs};  
+ *arm_fir_instance_q7 S =  {numTaps, pState, pCoeffs};  
+ * </pre>  
+ *  
+ * where <code>numTaps</code> is the number of filter coefficients in the filter; <code>pState</code> is the address of the state buffer;  
+ * <code>pCoeffs</code> is the address of the coefficient buffer.  
+ *  
+ * \par Fixed-Point Behavior  
+ * Care must be taken when using the fixed-point versions of the FIR filter functions.  
+ * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.  
+ * Refer to the function specific documentation below for usage guidelines.  
+ */ 
+ 
+/**  
+ * @addtogroup FIR  
+ * @{  
+ */ 
+ 
+/**  
+ *  
+ * @param[in]  *S points to an instance of the floating-point FIR filter structure.  
+ * @param[in]  *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data.  
+ * @param[in]  blockSize number of samples to process per call.  
+ * @return     none.  
+ *  
+ */ 
+ 
+void arm_fir_f32( 
+  const arm_fir_instance_f32 * S, 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  float32_t *pState = S->pState;                 /* State pointer */ 
+  float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */ 
+  float32_t *pStateCurnt;                        /* Points to the current sample of the state */ 
+  float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */ 
+  float32_t acc0, acc1, acc2, acc3;              /* Accumulators */ 
+  float32_t x0, x1, x2, x3, c0;                  /* Temporary variables to hold state and coefficient values */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */ 
+  uint32_t i, tapCnt, blkCnt;                    /* Loop counters */ 
+ 
+  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = &(S->pState[(numTaps - 1u)]); 
+ 
+  /* Apply loop unrolling and compute 4 output values simultaneously.  
+   * The variables acc0 ... acc3 hold output values that are being computed:  
+   *  
+   *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]  
+   *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]  
+   *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]  
+   *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]  
+   */ 
+  blkCnt = blockSize >> 2; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy four new input samples into the state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+    *pStateCurnt++ = *pSrc++; 
+    *pStateCurnt++ = *pSrc++; 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Set all accumulators to zero */ 
+    acc0 = 0.0f; 
+    acc1 = 0.0f; 
+    acc2 = 0.0f; 
+    acc3 = 0.0f; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = (pCoeffs); 
+ 
+    /* Read the first three samples from the state buffer:  x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */ 
+    x0 = *px++; 
+    x1 = *px++; 
+    x2 = *px++; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2u; 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numTaps-4 coefficients. */ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read the b[numTaps-1] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-3] sample */ 
+      x3 = *(px++); 
+ 
+      /* acc0 +=  b[numTaps-1] * x[n-numTaps] */ 
+      acc0 += x0 * c0; 
+ 
+      /* acc1 +=  b[numTaps-1] * x[n-numTaps-1] */ 
+      acc1 += x1 * c0; 
+ 
+      /* acc2 +=  b[numTaps-1] * x[n-numTaps-2] */ 
+      acc2 += x2 * c0; 
+ 
+      /* acc3 +=  b[numTaps-1] * x[n-numTaps-3] */ 
+      acc3 += x3 * c0; 
+ 
+      /* Read the b[numTaps-2] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-4] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulate */ 
+      acc0 += x1 * c0; 
+      acc1 += x2 * c0; 
+      acc2 += x3 * c0; 
+      acc3 += x0 * c0; 
+ 
+      /* Read the b[numTaps-3] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-5] sample */ 
+      x1 = *(px++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 += x2 * c0; 
+      acc1 += x3 * c0; 
+      acc2 += x0 * c0; 
+      acc3 += x1 * c0; 
+ 
+      /* Read the b[numTaps-4] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-6] sample */ 
+      x2 = *(px++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 += x3 * c0; 
+      acc1 += x0 * c0; 
+      acc2 += x1 * c0; 
+      acc3 += x2 * c0; 
+ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Read coefficients */ 
+      c0 = *(pb++); 
+ 
+      /* Fetch 1 state variable */ 
+      x3 = *(px++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 += x0 * c0; 
+      acc1 += x1 * c0; 
+      acc2 += x2 * c0; 
+      acc3 += x3 * c0; 
+ 
+      /* Reuse the present sample states for next sample */ 
+      x0 = x1; 
+      x1 = x2; 
+      x2 = x3; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Advance the state pointer by 4 to process the next group of 4 samples */ 
+    pState = pState + 4; 
+ 
+    /* The results in the 4 accumulators, store in the destination buffer. */ 
+    *pDst++ = acc0; 
+    *pDst++ = acc1; 
+    *pDst++ = acc2; 
+    *pDst++ = acc3; 
+ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy one sample at a time into state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Set the accumulator to zero */ 
+    acc0 = 0.0f; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize Coefficient pointer */ 
+    pb = (pCoeffs); 
+ 
+    i = numTaps; 
+ 
+    /* Perform the multiply-accumulates */ 
+    do 
+    { 
+      acc0 += *px++ * *pb++; 
+      i--; 
+ 
+    } while(i > 0u); 
+ 
+    /* The result is store in the destination buffer. */ 
+    *pDst++ = acc0; 
+ 
+    /* Advance state pointer by 1 for the next sample */ 
+    pState = pState + 1; 
+ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  tapCnt = (numTaps - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  /* Calculate remaining number of copies */ 
+  tapCnt = (numTaps - 1u) % 0x4u; 
+ 
+  /* Copy the remaining q31_t data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_fast_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,267 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_fast_q15.c  
+*  
+* Description:  Q15 Fast FIR filter processing function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.9  2010/08/16   
+*    Initial version  
+*  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR  
+ * @{  
+ */ 
+ 
+/**  
+ * @param[in] *S points to an instance of the Q15 FIR filter structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data.  
+ * @param[in] blockSize number of samples to process per call.  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * This fast version uses a 32-bit accumulator with 2.30 format.  
+ * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * Thus, if the accumulator result overflows it wraps around and distorts the result.  
+ * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits.  
+ * The 2.30 accumulator is then truncated to 2.15 format and saturated to yield the 1.15 result.  
+ *  
+ * \par  
+ * Refer to the function <code>arm_fir_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion.  Both the slow and the fast versions use the same instance structure.  
+ * Use the function <code>arm_fir_init_q15()</code> to initialize the filter structure.  
+ */ 
+ 
+void arm_fir_fast_q15( 
+  const arm_fir_instance_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pState = S->pState;                     /* State pointer */ 
+  q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q15_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q15_t *px1;                                    /* Temporary q15 pointer for state buffer */ 
+  q31_t *pb;                                     /* Temporary pointer for coefficient buffer */ 
+  q31_t *px2;                                    /* Temporary q31 pointer for SIMD state buffer accesses */ 
+  q31_t x0, x1, x2, x3, c0;                      /* Temporary variables to hold SIMD state and coefficient values */ 
+  q31_t acc0, acc1, acc2, acc3;                  /* Accumulators */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of taps in the filter */ 
+  uint32_t tapCnt, blkCnt;                       /* Loop counters */ 
+ 
+  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = &(S->pState[(numTaps - 1u)]); 
+ 
+  /* Apply loop unrolling and compute 4 output values simultaneously.  
+   * The variables acc0 ... acc3 hold output values that are being computed:  
+   *  
+   *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]  
+   *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]  
+   *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]  
+   *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]  
+   */ 
+  blkCnt = blockSize >> 2; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy four new input samples into the state buffer.  
+     ** Use 32-bit SIMD to move the 16-bit data.  Only requires two copies. */ 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++; 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++; 
+ 
+    /* Set all accumulators to zero */ 
+    acc0 = 0; 
+    acc1 = 0; 
+    acc2 = 0; 
+    acc3 = 0; 
+ 
+    /* Initialize state pointer of type q15 */ 
+    px1 = pState; 
+ 
+    /* Initialize coeff pointer of type q31 */ 
+    pb = (q31_t *) (pCoeffs); 
+ 
+    /* Read the first two samples from the state buffer:  x[n-N], x[n-N-1] */ 
+    x0 = *(q31_t *) (px1++); 
+ 
+    /* Read the third and forth samples from the state buffer: x[n-N-1], x[n-N-2] */ 
+    x1 = *(q31_t *) (px1++); 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numTaps-4 coefficients. */ 
+    tapCnt = numTaps >> 2; 
+    do 
+    { 
+      /* Read the first two coefficients using SIMD:  b[N] and b[N-1] coefficients */ 
+      c0 = *(pb++); 
+ 
+      /* acc0 +=  b[N] * x[n-N] + b[N-1] * x[n-N-1] */ 
+      acc0 = __SMLAD(x0, c0, acc0); 
+ 
+      /* acc1 +=  b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */ 
+      acc1 = __SMLAD(x1, c0, acc1); 
+ 
+      /* Read state x[n-N-2], x[n-N-3] */ 
+      x2 = *(q31_t *) (px1++); 
+ 
+      /* Read state x[n-N-3], x[n-N-4] */ 
+      x3 = *(q31_t *) (px1++); 
+ 
+      /* acc2 +=  b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */ 
+      acc2 = __SMLAD(x2, c0, acc2); 
+ 
+      /* acc3 +=  b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */ 
+      acc3 = __SMLAD(x3, c0, acc3); 
+ 
+      /* Read coefficients b[N-2], b[N-3] */ 
+      c0 = *(pb++); 
+ 
+      /* acc0 +=  b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */ 
+      acc0 = __SMLAD(x2, c0, acc0); 
+ 
+      /* acc1 +=  b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */ 
+      acc1 = __SMLAD(x3, c0, acc1); 
+ 
+      /* Read state x[n-N-4], x[n-N-5] */ 
+      x0 = *(q31_t *) (px1++); 
+ 
+      /* Read state x[n-N-5], x[n-N-6] */ 
+      x1 = *(q31_t *) (px1++); 
+ 
+      /* acc2 +=  b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */ 
+      acc2 = __SMLAD(x0, c0, acc2); 
+ 
+      /* acc3 +=  b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */ 
+      acc3 = __SMLAD(x1, c0, acc3); 
+      tapCnt--; 
+ 
+    } 
+    while(tapCnt > 0u); 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps.  
+     ** This is always 2 taps since the filter length is always even. */ 
+    if((numTaps & 0x3u) != 0u) 
+    { 
+      /* Read 2 coefficients */ 
+      c0 = *(pb++); 
+      /* Fetch 4 state variables */ 
+      x2 = *(q31_t *) (px1++); 
+      x3 = *(q31_t *) (px1++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 = __SMLAD(x0, c0, acc0); 
+      acc1 = __SMLAD(x1, c0, acc1); 
+      acc2 = __SMLAD(x2, c0, acc2); 
+      acc3 = __SMLAD(x3, c0, acc3); 
+    } 
+ 
+    /* The results in the 4 accumulators are in 2.30 format.  Convert to 1.15 with saturation.  
+     ** Then store the 4 outputs in the destination buffer. */ 
+    *__SIMD32(pDst)++ = __PKHBT((acc0 >> 15), (acc1 >> 15), 16u); 
+    *__SIMD32(pDst)++ = __PKHBT((acc2 >> 15), (acc3 >> 15), 16u); 
+ 
+ 
+    /* Advance the state pointer by 4 to process the next group of 4 samples */ 
+    pState = pState + 4; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy two samples into state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Set the accumulator to zero */ 
+    acc0 = 0; 
+ 
+    /* Use SIMD to hold states and coefficients */ 
+    px2 = (q31_t *) pState; 
+    pb = (q31_t *) (pCoeffs); 
+    tapCnt = numTaps >> 1; 
+ 
+    do 
+    { 
+      acc0 = __SMLAD(*px2++, *(pb++), acc0); 
+      tapCnt--; 
+    } 
+    while(tapCnt > 0u); 
+ 
+    /* The result is in 2.30 format.  Convert to 1.15 with saturation.  
+     ** Then store the output in the destination buffer. */ 
+    *pDst++ = (q15_t) ((acc0 >> 15)); 
+ 
+    /* Advance state pointer by 1 for the next sample */ 
+    pState = pState + 1; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+  /* Calculation of count for copying integer writes */ 
+  tapCnt = (numTaps - 1u) >> 2; 
+ 
+  while(tapCnt > 0u) 
+  { 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+ 
+    tapCnt--; 
+  } 
+ 
+  /* Calculation of count for remaining q15_t data */ 
+  tapCnt = (numTaps - 1u) % 0x4u; 
+ 
+  /* copy remaining data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_fast_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,300 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_fast_q31.c  
+*  
+* Description:	Processing function for the Q31 Fast FIR filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.9  2010/08/27   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR  
+ * @{  
+ */ 
+ 
+/**  
+ * @param[in] *S points to an instance of the Q31 structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block output data.  
+ * @param[in] blockSize number of samples to process per call.  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * This function is optimized for speed at the expense of fixed-point precision and overflow protection.  
+ * The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.  
+ * These intermediate results are added to a 2.30 accumulator.  
+ * Finally, the accumulator is saturated and converted to a 1.31 result.  
+ * The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result.  
+ * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits.  
+ *  
+ * \par  
+ * Refer to the function <code>arm_fir_q31()</code> for a slower implementation of this function which uses a 64-bit accumulator to provide higher precision.  Both the slow and the fast versions use the same instance structure.  
+ * Use the function <code>arm_fir_init_q31()</code> to initialize the filter structure.  
+ */ 
+ 
+void arm_fir_fast_q31( 
+  const arm_fir_instance_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pState = S->pState;                     /* State pointer */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q31_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q31_t x0, x1, x2, x3;                          /* Temporary variables to hold state */ 
+  q31_t c0;                                      /* Temporary variable to hold coefficient value */ 
+  q31_t *px;                                     /* Temporary pointer for state */ 
+  q31_t *pb;                                     /* Temporary pointer for coefficient buffer */ 
+  q63_t acc0, acc1, acc2, acc3;                  /* Accumulators */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */ 
+  uint32_t i, tapCnt, blkCnt;                    /* Loop counters */ 
+ 
+  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = &(S->pState[(numTaps - 1u)]); 
+ 
+  /* Apply loop unrolling and compute 4 output values simultaneously.  
+   * The variables acc0 ... acc3 hold output values that are being computed:  
+   *  
+   *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]  
+   *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]  
+   *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]  
+   *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]  
+   */ 
+  blkCnt = blockSize >> 2; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy four new input samples into the state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+    *pStateCurnt++ = *pSrc++; 
+    *pStateCurnt++ = *pSrc++; 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Set all accumulators to zero */ 
+    acc0 = 0; 
+    acc1 = 0; 
+    acc2 = 0; 
+    acc3 = 0; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize coefficient pointer */ 
+    pb = pCoeffs; 
+ 
+    /* Read the first three samples from the state buffer:  
+     *  x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */ 
+    x0 = *(px++); 
+    x1 = *(px++); 
+    x2 = *(px++); 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+    i = tapCnt; 
+ 
+    while(i > 0u) 
+    { 
+      /* Read the b[numTaps] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-3] sample */ 
+      x3 = *(px++); 
+ 
+      /* acc0 +=  b[numTaps] * x[n-numTaps] */ 
+      acc0 = (q31_t) ((((q63_t) x0 * c0) + (acc0 << 32)) >> 32); 
+ 
+      /* acc1 +=  b[numTaps] * x[n-numTaps-1] */ 
+      acc1 = (q31_t) ((((q63_t) x1 * c0) + (acc1 << 32)) >> 32); 
+ 
+      /* acc2 +=  b[numTaps] * x[n-numTaps-2] */ 
+      acc2 = (q31_t) ((((q63_t) x2 * c0) + (acc2 << 32)) >> 32); 
+ 
+      /* acc3 +=  b[numTaps] * x[n-numTaps-3] */ 
+      acc3 = (q31_t) ((((q63_t) x3 * c0) + (acc3 << 32)) >> 32); 
+ 
+      /* Read the b[numTaps-1] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-4] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 = (q31_t) ((((q63_t) x1 * c0) + (acc0 << 32)) >> 32); 
+      acc1 = (q31_t) ((((q63_t) x2 * c0) + (acc1 << 32)) >> 32); 
+      acc2 = (q31_t) ((((q63_t) x3 * c0) + (acc2 << 32)) >> 32); 
+      acc3 = (q31_t) ((((q63_t) x0 * c0) + (acc3 << 32)) >> 32); 
+ 
+      /* Read the b[numTaps-2] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-5] sample */ 
+      x1 = *(px++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 = (q31_t) ((((q63_t) x2 * c0) + (acc0 << 32)) >> 32); 
+      acc1 = (q31_t) ((((q63_t) x3 * c0) + (acc1 << 32)) >> 32); 
+      acc2 = (q31_t) ((((q63_t) x0 * c0) + (acc2 << 32)) >> 32); 
+      acc3 = (q31_t) ((((q63_t) x1 * c0) + (acc3 << 32)) >> 32); 
+ 
+      /* Read the b[numTaps-3] coefficients */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-6] sample */ 
+      x2 = *(px++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 = (q31_t) ((((q63_t) x3 * c0) + (acc0 << 32)) >> 32); 
+      acc1 = (q31_t) ((((q63_t) x0 * c0) + (acc1 << 32)) >> 32); 
+      acc2 = (q31_t) ((((q63_t) x1 * c0) + (acc2 << 32)) >> 32); 
+      acc3 = (q31_t) ((((q63_t) x2 * c0) + (acc3 << 32)) >> 32); 
+      i--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+ 
+    i = numTaps - (tapCnt * 4u); 
+    while(i > 0u) 
+    { 
+      /* Read coefficients */ 
+      c0 = *(pb++); 
+ 
+      /* Fetch 1 state variable */ 
+      x3 = *(px++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 = (q31_t) ((((q63_t) x0 * c0) + (acc0 << 32)) >> 32); 
+      acc1 = (q31_t) ((((q63_t) x1 * c0) + (acc1 << 32)) >> 32); 
+      acc2 = (q31_t) ((((q63_t) x2 * c0) + (acc2 << 32)) >> 32); 
+      acc3 = (q31_t) ((((q63_t) x3 * c0) + (acc3 << 32)) >> 32); 
+ 
+      /* Reuse the present sample states for next sample */ 
+      x0 = x1; 
+      x1 = x2; 
+      x2 = x3; 
+ 
+      /* Decrement the loop counter */ 
+      i--; 
+    } 
+ 
+    /* Advance the state pointer by 4 to process the next group of 4 samples */ 
+    pState = pState + 4; 
+ 
+    /* The results in the 4 accumulators are in 2.30 format.  Convert to 1.31  
+     ** Then store the 4 outputs in the destination buffer. */ 
+    *pDst++ = (q31_t) (acc0 << 1); 
+    *pDst++ = (q31_t) (acc1 << 1); 
+    *pDst++ = (q31_t) (acc2 << 1); 
+    *pDst++ = (q31_t) (acc3 << 1); 
+ 
+    /* Decrement the samples loop counter */ 
+    blkCnt--; 
+  } 
+ 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy one sample at a time into state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Set the accumulator to zero */ 
+    acc0 = 0; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize Coefficient pointer */ 
+    pb = (pCoeffs); 
+ 
+    i = numTaps; 
+ 
+    /* Perform the multiply-accumulates */ 
+    do 
+    { 
+      acc0 = (q31_t) ((((q63_t) * (px++) * (*(pb++))) + (acc0 << 32)) >> 32); 
+      i--; 
+    } while(i > 0u); 
+ 
+    /* The result is in 2.30 format.  Convert to 1.31  
+     ** Then store the output in the destination buffer. */ 
+    *pDst++ = (q31_t) (acc0 << 1); 
+ 
+    /* Advance state pointer by 1 for the next sample */ 
+    pState = pState + 1; 
+ 
+    /* Decrement the samples loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  tapCnt = (numTaps - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  /* Calculate remaining number of copies */ 
+  tapCnt = (numTaps - 1u) % 0x4u; 
+ 
+  /* Copy the remaining q31_t data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of FIR group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,88 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_init_f32.c  
+*  
+* Description:  Floating-point FIR filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+ *  
+ * @param[in,out] *S points to an instance of the floating-point FIR filter structure.  
+ * @param[in] 	  numTaps  Number of filter coefficients in the filter.  
+ * @param[in]     *pCoeffs points to the filter coefficients buffer.  
+ * @param[in]     *pState points to the state buffer.  
+ * @param[in] 	  blockSize number of samples that are processed per call.  
+ * @return 		  none.  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to the array of state variables.  
+ * <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_f32()</code>.  
+ */ 
+ 
+void arm_fir_init_f32( 
+  arm_fir_instance_f32 * S, 
+  uint16_t numTaps, 
+  float32_t * pCoeffs, 
+  float32_t * pState, 
+  uint32_t blockSize) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and the size of state buffer is (blockSize + numTaps - 1) */ 
+  memset(pState, 0, (numTaps + (blockSize - 1u)) * sizeof(float32_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+} 
+ 
+/**  
+ * @} end of FIR group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,118 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_init_q15.c  
+*  
+* Description:  Q15 FIR filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* ------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR  
+ * @{  
+ */ 
+ 
+/**  
+ * @param[in,out]  *S points to an instance of the Q15 FIR filter structure.  
+ * @param[in] 	   numTaps  Number of filter coefficients in the filter. Must be even and greater than or equal to 4.  
+ * @param[in]      *pCoeffs points to the filter coefficients buffer.  
+ * @param[in]      *pState points to the state buffer.  
+ * @param[in]      blockSize is number of samples processed per call.  
+ * @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if  
+ * <code>numTaps</code> is not greater than or equal to 4 and even.  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * Note that <code>numTaps</code> must be even and greater than or equal to 4.  
+ * To implement an odd length filter simply increase <code>numTaps</code> by 1 and set the last coefficient to zero.  
+ * For example, to implement a filter with <code>numTaps=3</code> and coefficients  
+ * <pre>  
+ *     {0.3, -0.8, 0.3}  
+ * </pre>  
+ * set <code>numTaps=4</code> and use the coefficients:  
+ * <pre>  
+ *     {0.3, -0.8, 0.3, 0}.  
+ * </pre>  
+ * Similarly, to implement a two point filter  
+ * <pre>  
+ *     {0.3, -0.3}  
+ * </pre>  
+ * set <code>numTaps=4</code> and use the coefficients:  
+ * <pre>  
+ *     {0.3, -0.3, 0, 0}.  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to the array of state variables.  
+ * <code>pState</code> is of length <code>numTaps+blockSize-1</code>, where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_q15()</code>.  
+ */ 
+ 
+arm_status arm_fir_init_q15( 
+  arm_fir_instance_q15 * S, 
+  uint16_t numTaps, 
+  q15_t * pCoeffs, 
+  q15_t * pState, 
+  uint32_t blockSize) 
+{ 
+  arm_status status; 
+ 
+  /* The Number of filter coefficients in the filter must be even and at least 4 */ 
+  if((numTaps < 4u) || (numTaps & 0x1u)) 
+  { 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+  } 
+  else 
+  { 
+    /* Assign filter taps */ 
+    S->numTaps = numTaps; 
+ 
+    /* Assign coefficient pointer */ 
+    S->pCoeffs = pCoeffs; 
+ 
+    /* Clear the state buffer.  The size is always (blockSize + numTaps - 1) */ 
+    memset(pState, 0, (numTaps + (blockSize - 1u)) * sizeof(q15_t)); 
+ 
+    /* Assign state pointer */ 
+    S->pState = pState; 
+ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of FIR group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,88 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_init_q31.c  
+*  
+* Description:	Q31 FIR filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+ *  
+ * @param[in,out] *S points to an instance of the Q31 FIR filter structure.  
+ * @param[in] 	  numTaps  Number of filter coefficients in the filter.  
+ * @param[in] 	  *pCoeffs points to the filter coefficients buffer.  
+ * @param[in] 	  *pState points to the state buffer.  
+ * @param[in] 	  blockSize number of samples that are processed per call.  
+ * @return        none.  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to the array of state variables.  
+ * <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_q31()</code>.  
+ */ 
+ 
+void arm_fir_init_q31( 
+  arm_fir_instance_q31 * S, 
+  uint16_t numTaps, 
+  q31_t * pCoeffs, 
+  q31_t * pState, 
+  uint32_t blockSize) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and state array size is (blockSize + numTaps - 1) */ 
+  memset(pState, 0, (blockSize + ((uint32_t) numTaps - 1u)) * sizeof(q31_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+} 
+ 
+/**  
+ * @} end of FIR group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_init_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,86 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_init_q7.c  
+*  
+* Description:  Q7 FIR filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* ------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR  
+ * @{  
+ */ 
+/**  
+ * @param[in,out] *S points to an instance of the Q7 FIR filter structure.  
+ * @param[in] 	  numTaps  Number of filter coefficients in the filter.  
+ * @param[in] 	  *pCoeffs points to the filter coefficients buffer.  
+ * @param[in]     *pState points to the state buffer.  
+ * @param[in]     blockSize number of samples that are processed per call.  
+ * @return     	  none  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to the array of state variables.  
+ * <code>pState</code> is of length <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_q7()</code>.  
+ */ 
+ 
+void arm_fir_init_q7( 
+  arm_fir_instance_q7 * S, 
+  uint16_t numTaps, 
+  q7_t * pCoeffs, 
+  q7_t * pState, 
+  uint32_t blockSize) 
+{ 
+ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear the state buffer.  The size is always (blockSize + numTaps - 1) */ 
+  memset(pState, 0, (numTaps + (blockSize - 1u)) * sizeof(q7_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+} 
+ 
+/**  
+ * @} end of FIR group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,304 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_interpolate_f32.c  
+*  
+* Description:	FIR interpolation for floating-point sequences.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @defgroup FIR_Interpolate Finite Impulse Response (FIR) Interpolator  
+ *  
+ * These functions combine an upsampler (zero stuffer) and an FIR filter.  
+ * They are used in multirate systems for increasing the sample rate of a signal without introducing high frequency images.  
+ * Conceptually, the functions are equivalent to the block diagram below:  
+ * \image html FIRInterpolator.gif "Components included in the FIR Interpolator functions"  
+ * After upsampling by a factor of <code>L</code>, the signal should be filtered by a lowpass filter with a normalized  
+ * cutoff frequency of <code>1/L</code> in order to eliminate high frequency copies of the spectrum.  
+ * The user of the function is responsible for providing the filter coefficients.  
+ *  
+ * The FIR interpolator functions provided in the CMSIS DSP Library combine the upsampler and FIR filter in an efficient manner.  
+ * The upsampler inserts <code>L-1</code> zeros between each sample.  
+ * Instead of multiplying by these zero values, the FIR filter is designed to skip them.  
+ * This leads to an efficient implementation without any wasted effort.  
+ * The functions operate on blocks of input and output data.  
+ * <code>pSrc</code> points to an array of <code>blockSize</code> input values and  
+ * <code>pDst</code> points to an array of <code>blockSize*L</code> output values.  
+ *  
+ * The library provides separate functions for Q15, Q31, and floating-point data types.  
+ *  
+ * \par Algorithm:  
+ * The functions use a polyphase filter structure:  
+ * <pre>  
+ *    y[n] = b[0] * x[n] + b[L]   * x[n-1] + ... + b[L*(phaseLength-1)] * x[n-phaseLength+1]  
+ *    y[n+1] = b[1] * x[n] + b[L+1] * x[n-1] + ... + b[L*(phaseLength-1)+1] * x[n-phaseLength+1]  
+ *    ...  
+ *    y[n+(L-1)] = b[L-1] * x[n] + b[2*L-1] * x[n-1] + ....+ b[L*(phaseLength-1)+(L-1)] * x[n-phaseLength+1]  
+ * </pre>  
+ * This approach is more efficient than straightforward upsample-then-filter algorithms.  
+ * With this method the computation is reduced by a factor of <code>1/L</code> when compared to using a standard FIR filter.  
+ * \par  
+ * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.  
+ * <code>numTaps</code> must be a multiple of the interpolation factor <code>L</code> and this is checked by the  
+ * initialization functions.  
+ * Internally, the function divides the FIR filter's impulse response into shorter filters of length  
+ * <code>phaseLength=numTaps/L</code>.  
+ * Coefficients are stored in time reversed order.  
+ * \par  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to a state array of size <code>blockSize + phaseLength - 1</code>.  
+ * Samples in the state buffer are stored in the order:  
+ * \par  
+ * <pre>  
+ *    {x[n-phaseLength+1], x[n-phaseLength], x[n-phaseLength-1], x[n-phaseLength-2]....x[0], x[1], ..., x[blockSize-1]}  
+ * </pre>  
+ * The state variables are updated after each block of data is processed, the coefficients are untouched.  
+ *  
+ * \par Instance Structure  
+ * The coefficients and state variables for a filter are stored together in an instance data structure.  
+ * A separate instance structure must be defined for each filter.  
+ * Coefficient arrays may be shared among several instances while state variable array should be allocated separately.  
+ * There are separate instance structure declarations for each of the 3 supported data types.  
+ *  
+ * \par Initialization Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Zeros out the values in the state buffer.  
+ * - Checks to make sure that the length of the filter is a multiple of the interpolation factor.  
+ *  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * The code below statically initializes each of the 3 different data type filter instance structures  
+ * <pre>  
+ * arm_fir_interpolate_instance_f32 S = {L, phaseLength, pCoeffs, pState};  
+ * arm_fir_interpolate_instance_q31 S = {L, phaseLength, pCoeffs, pState};  
+ * arm_fir_interpolate_instance_q15 S = {L, phaseLength, pCoeffs, pState};  
+ * </pre>  
+ * where <code>L</code> is the interpolation factor; <code>phaseLength=numTaps/L</code> is the  
+ * length of each of the shorter FIR filters used internally,  
+ * <code>pCoeffs</code> is the address of the coefficient buffer;  
+ * <code>pState</code> is the address of the state buffer.  
+ * Be sure to set the values in the state buffer to zeros when doing static initialization.  
+ *  
+ * \par Fixed-Point Behavior  
+ * Care must be taken when using the fixed-point versions of the FIR interpolate filter functions.  
+ * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.  
+ * Refer to the function specific documentation below for usage guidelines.  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Interpolate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the floating-point FIR interpolator.  
+ * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.  
+ * @param[in] *pSrc     points to the block of input data.  
+ * @param[out] *pDst    points to the block of output data.  
+ * @param[in] blockSize number of input samples to process per call.  
+ * @return none.  
+ */ 
+ 
+void arm_fir_interpolate_f32( 
+  const arm_fir_interpolate_instance_f32 * S, 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  float32_t *pState = S->pState;                 /* State pointer */ 
+  float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */ 
+  float32_t *pStateCurnt;                        /* Points to the current sample of the state */ 
+  float32_t *ptr1, *ptr2;                        /* Temporary pointers for state and coefficient buffers */ 
+  float32_t sum0;                                /* Accumulators */ 
+  float32_t x0, c0;                              /* Temporary variables to hold state and coefficient values */ 
+  uint32_t i, blkCnt, j;                         /* Loop counters */ 
+  uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */ 
+ 
+ 
+  /* S->pState buffer contains previous frame (phaseLen - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = S->pState + (phaseLen - 1u); 
+ 
+  /* Total number of intput samples */ 
+  blkCnt = blockSize; 
+ 
+  /* Loop over the blockSize. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy new input sample into the state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Address modifier index of coefficient buffer */ 
+    j = 1u; 
+ 
+    /* Loop over the Interpolation factor. */ 
+    i = S->L; 
+    while(i > 0u) 
+    { 
+      /* Set accumulator to zero */ 
+      sum0 = 0.0f; 
+ 
+      /* Initialize state pointer */ 
+      ptr1 = pState; 
+ 
+      /* Initialize coefficient pointer */ 
+      ptr2 = pCoeffs + (S->L - j); 
+ 
+      /* Loop over the polyPhase length. Unroll by a factor of 4.  
+       ** Repeat until we've computed numTaps-(4*S->L) coefficients. */ 
+      tapCnt = phaseLen >> 2u; 
+      while(tapCnt > 0u) 
+      { 
+ 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Upsampling is done by stuffing L-1 zeros between each sample.  
+         * So instead of multiplying zeros with coefficients,  
+         * Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the input sample */ 
+        x0 = *(ptr1++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 += x0 * c0; 
+ 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the input sample */ 
+        x0 = *(ptr1++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 += x0 * c0; 
+ 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the input sample */ 
+        x0 = *(ptr1++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 += x0 * c0; 
+ 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the input sample */ 
+        x0 = *(ptr1++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 += x0 * c0; 
+ 
+        /* Decrement the loop counter */ 
+        tapCnt--; 
+      } 
+ 
+      /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */ 
+      tapCnt = phaseLen % 0x4u; 
+ 
+      while(tapCnt > 0u) 
+      { 
+        /* Perform the multiply-accumulate */ 
+        sum0 += *(ptr1++) * (*ptr2); 
+ 
+        /* Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Decrement the loop counter */ 
+        tapCnt--; 
+      } 
+ 
+      /* The result is in the accumulator, store in the destination buffer. */ 
+      *pDst++ = sum0; 
+ 
+      /* Increment the address modifier index of coefficient buffer */ 
+      j++; 
+ 
+      /* Decrement the loop counter */ 
+      i--; 
+    } 
+ 
+    /* Advance the state pointer by 1  
+     * to process the next group of interpolation factor number samples */ 
+    pState = pState + 1; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  tapCnt = (phaseLen - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  tapCnt = (phaseLen - 1u) % 0x04u; 
+ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+} 
+ 
+ /**  
+  * @} end of FIR_Interpolate group  
+  */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,110 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_interpolate_init_f32.c  
+*  
+* Description:  Floating-point FIR interpolator initialization function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Interpolate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Initialization function for the floating-point FIR interpolator.  
+ * @param[in,out] *S        points to an instance of the floating-point FIR interpolator structure.  
+ * @param[in]     L         upsample factor.  
+ * @param[in]     numTaps   number of filter coefficients in the filter.  
+ * @param[in]     *pCoeffs  points to the filter coefficient buffer.  
+ * @param[in]     *pState   points to the state buffer.  
+ * @param[in]     blockSize number of input samples to process per call.  
+ * @return        The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_LENGTH_ERROR if  
+ * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[numTaps-2], ..., b[1], b[0]}  
+ * </pre>  
+ * The length of the filter <code>numTaps</code> must be a multiple of the interpolation factor <code>L</code>.  
+ * \par  
+ * <code>pState</code> points to the array of state variables.  
+ * <code>pState</code> is of length <code>(numTaps/L)+blockSize-1</code> words  
+ * where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_interpolate_f32()</code>.  
+ */ 
+ 
+arm_status arm_fir_interpolate_init_f32( 
+  arm_fir_interpolate_instance_f32 * S, 
+  uint8_t L, 
+  uint16_t numTaps, 
+  float32_t * pCoeffs, 
+  float32_t * pState, 
+  uint32_t blockSize) 
+{ 
+  arm_status status; 
+ 
+  /* The filter length must be a multiple of the interpolation factor */ 
+  if((numTaps % L) != 0u) 
+  { 
+    /* Set status as ARM_MATH_LENGTH_ERROR */ 
+    status = ARM_MATH_LENGTH_ERROR; 
+  } 
+  else 
+  { 
+ 
+    /* Assign coefficient pointer */ 
+    S->pCoeffs = pCoeffs; 
+ 
+    /* Assign Interpolation factor */ 
+    S->L = L; 
+ 
+    /* Assign polyPhaseLength */ 
+    S->phaseLength = numTaps / L; 
+ 
+    /* Clear state buffer and size of state array is always phaseLength + blockSize - 1 */ 
+    memset(pState, 0, 
+           (blockSize + 
+            ((uint32_t) S->phaseLength - 1u)) * sizeof(float32_t)); 
+ 
+    /* Assign state pointer */ 
+    S->pState = pState; 
+ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  return (status); 
+ 
+} 
+ 
+ /**  
+  * @} end of FIR_Interpolate group  
+  */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,109 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_interpolate_init_q15.c  
+*  
+* Description:  Q15 FIR interpolator initialization function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Interpolate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief  Initialization function for the Q15 FIR interpolator.  
+ * @param[in,out] *S        points to an instance of the Q15 FIR interpolator structure.  
+ * @param[in]     L         upsample factor.  
+ * @param[in]     numTaps   number of filter coefficients in the filter.  
+ * @param[in]     *pCoeffs  points to the filter coefficient buffer.  
+ * @param[in]     *pState   points to the state buffer.  
+ * @param[in]     blockSize number of input samples to process per call.  
+ * @return        The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_LENGTH_ERROR if  
+ * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[numTaps-2], ..., b[1], b[0]}  
+ * </pre>  
+ * The length of the filter <code>numTaps</code> must be a multiple of the interpolation factor <code>L</code>.  
+ * \par  
+ * <code>pState</code> points to the array of state variables.  
+ * <code>pState</code> is of length <code>(numTaps/L)+blockSize-1</code> words  
+ * where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_interpolate_q15()</code>.  
+ */ 
+ 
+arm_status arm_fir_interpolate_init_q15( 
+  arm_fir_interpolate_instance_q15 * S, 
+  uint8_t L, 
+  uint16_t numTaps, 
+  q15_t * pCoeffs, 
+  q15_t * pState, 
+  uint32_t blockSize) 
+{ 
+  arm_status status; 
+ 
+  /* The filter length must be a multiple of the interpolation factor */ 
+  if((numTaps % L) != 0u) 
+  { 
+    /* Set status as ARM_MATH_LENGTH_ERROR */ 
+    status = ARM_MATH_LENGTH_ERROR; 
+  } 
+  else 
+  { 
+ 
+    /* Assign coefficient pointer */ 
+    S->pCoeffs = pCoeffs; 
+ 
+    /* Assign Interpolation factor */ 
+    S->L = L; 
+ 
+    /* Assign polyPhaseLength */ 
+    S->phaseLength = numTaps / L; 
+ 
+    /* Clear state buffer and size of buffer is always phaseLength + blockSize - 1 */ 
+    memset(pState, 0, 
+           (blockSize + ((uint32_t) S->phaseLength - 1u)) * sizeof(q15_t)); 
+ 
+    /* Assign state pointer */ 
+    S->pState = pState; 
+ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  return (status); 
+ 
+} 
+ 
+ /**  
+  * @} end of FIR_Interpolate group  
+  */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,110 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_interpolate_init_q31.c  
+*  
+* Description:  Q31 FIR interpolator initialization function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Interpolate  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief  Initialization function for the Q31 FIR interpolator.  
+ * @param[in,out] *S        points to an instance of the Q31 FIR interpolator structure.  
+ * @param[in]     L         upsample factor.  
+ * @param[in]     numTaps   number of filter coefficients in the filter.  
+ * @param[in]     *pCoeffs  points to the filter coefficient buffer.  
+ * @param[in]     *pState   points to the state buffer.  
+ * @param[in]     blockSize number of input samples to process per call.  
+ * @return        The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_LENGTH_ERROR if  
+ * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[numTaps-2], ..., b[1], b[0]}  
+ * </pre>  
+ * The length of the filter <code>numTaps</code> must be a multiple of the interpolation factor <code>L</code>.  
+ * \par  
+ * <code>pState</code> points to the array of state variables.  
+ * <code>pState</code> is of length <code>(numTaps/L)+blockSize-1</code> words  
+ * where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_fir_interpolate_q31()</code>.  
+ */ 
+ 
+arm_status arm_fir_interpolate_init_q31( 
+  arm_fir_interpolate_instance_q31 * S, 
+  uint8_t L, 
+  uint16_t numTaps, 
+  q31_t * pCoeffs, 
+  q31_t * pState, 
+  uint32_t blockSize) 
+{ 
+  arm_status status; 
+ 
+  /* The filter length must be a multiple of the interpolation factor */ 
+  if((numTaps % L) != 0u) 
+  { 
+    /* Set status as ARM_MATH_LENGTH_ERROR */ 
+    status = ARM_MATH_LENGTH_ERROR; 
+  } 
+  else 
+  { 
+ 
+    /* Assign coefficient pointer */ 
+    S->pCoeffs = pCoeffs; 
+ 
+    /* Assign Interpolation factor */ 
+    S->L = L; 
+ 
+    /* Assign polyPhaseLength */ 
+    S->phaseLength = numTaps / L; 
+ 
+    /* Clear state buffer and size of buffer is always phaseLength + blockSize - 1 */ 
+    memset(pState, 0, 
+           (blockSize + ((uint32_t) S->phaseLength - 1u)) * sizeof(q31_t)); 
+ 
+    /* Assign state pointer */ 
+    S->pState = pState; 
+ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  return (status); 
+ 
+} 
+ 
+ /**  
+  * @} end of FIR_Interpolate group  
+  */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,232 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_fir_interpolate_q15.c  
+*  
+* Description:	Q15 FIR interpolation.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Interpolate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q15 FIR interpolator.  
+ * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.  
+ * @param[in] *pSrc     points to the block of input data.  
+ * @param[out] *pDst    points to the block of output data.  
+ * @param[in] blockSize number of input samples to process per call.  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.  
+ * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.  
+ * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
+ * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.  
+ * Lastly, the accumulator is saturated to yield a result in 1.15 format.  
+ */ 
+ 
+void arm_fir_interpolate_q15( 
+  const arm_fir_interpolate_instance_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pState = S->pState;                     /* State pointer                                            */ 
+  q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer                                      */ 
+  q15_t *pStateCurnt;                            /* Points to the current sample of the state                */ 
+  q15_t *ptr1, *ptr2;                            /* Temporary pointers for state and coefficient buffers     */ 
+  q63_t sum0;                                    /* Accumulators                                             */ 
+  q15_t x0, c0, c1;                              /* Temporary variables to hold state and coefficient values */ 
+  q31_t c, x; 
+  uint32_t i, blkCnt, j, tapCnt;                 /* Loop counters                                            */ 
+  uint16_t phaseLen = S->phaseLength;            /* Length of each polyphase filter component */ 
+ 
+ 
+  /* S->pState buffer contains previous frame (phaseLen - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = S->pState + (phaseLen - 1u); 
+ 
+  /* Total number of intput samples */ 
+  blkCnt = blockSize; 
+ 
+  /* Loop over the blockSize. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy new input sample into the state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Address modifier index of coefficient buffer */ 
+    j = 1u; 
+ 
+    /* Loop over the Interpolation factor. */ 
+    i = S->L; 
+    while(i > 0u) 
+    { 
+      /* Set accumulator to zero */ 
+      sum0 = 0; 
+ 
+      /* Initialize state pointer */ 
+      ptr1 = pState; 
+ 
+      /* Initialize coefficient pointer */ 
+      ptr2 = pCoeffs + (S->L - j); 
+ 
+      /* Loop over the polyPhase length. Unroll by a factor of 4.  
+       ** Repeat until we've computed numTaps-(4*S->L) coefficients. */ 
+      tapCnt = (uint32_t) phaseLen >> 2u; 
+      while(tapCnt > 0u) 
+      { 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Upsampling is done by stuffing L-1 zeros between each sample.  
+         * So instead of multiplying zeros with coefficients,  
+         * Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the coefficient */ 
+        c1 = *(ptr2); 
+ 
+        /* Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Pack the coefficients */ 
+        c = __PKHBT(c0, c1, 16); 
+ 
+        /* Read twp consecutive input samples */ 
+        x = *__SIMD32(ptr1)++; 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 = __SMLALD(x, c, sum0); 
+ 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Upsampling is done by stuffing L-1 zeros between each sample.  
+         * So insted of multiplying zeros with coefficients,  
+         * Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the coefficient */ 
+        c1 = *(ptr2); 
+ 
+        /* Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Pack the coefficients */ 
+        c = __PKHBT(c0, c1, 16); 
+ 
+        /* Read twp consecutive input samples */ 
+        x = *__SIMD32(ptr1)++; 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 = __SMLALD(x, c, sum0); 
+ 
+        /* Decrement the loop counter */ 
+        tapCnt--; 
+      } 
+ 
+      /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */ 
+      tapCnt = (uint32_t) phaseLen & 0x3u; 
+ 
+      while(tapCnt > 0u) 
+      { 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the input sample */ 
+        x0 = *(ptr1++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 = __SMLALD(x0, c0, sum0); 
+ 
+        /* Decrement the loop counter */ 
+        tapCnt--; 
+      } 
+ 
+      /* The result is in the accumulator, store in the destination buffer. */ 
+      *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16)); 
+ 
+      /* Increment the address modifier index of coefficient buffer */ 
+      j++; 
+ 
+      /* Decrement the loop counter */ 
+      i--; 
+    } 
+ 
+    /* Advance the state pointer by 1  
+     * to process the next group of interpolation factor number samples */ 
+    pState = pState + 1; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  i = ((uint32_t) phaseLen - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(i > 0u) 
+  { 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+ 
+  i = ((uint32_t) phaseLen - 1u) % 0x04u; 
+ 
+  while(i > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+ 
+} 
+ 
+ /**  
+  * @} end of FIR_Interpolate group  
+  */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_interpolate_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,239 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_fir_interpolate_q31.c  
+*  
+* Description:	Q31 FIR interpolation.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Interpolate  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q31 FIR interpolator.  
+ * @param[in] *S        points to an instance of the Q31 FIR interpolator structure.  
+ * @param[in] *pSrc     points to the block of input data.  
+ * @param[out] *pDst    points to the block of output data.  
+ * @param[in] blockSize number of input samples to process per call.  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * Thus, if the accumulator result overflows it wraps around rather than clip.  
+ * In order to avoid overflows completely the input signal must be scaled down by <code>1/(numTaps/L)</code>.  
+ * since <code>numTaps/L</code> additions occur per output sample.  
+ * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.  
+ */ 
+ 
+ 
+void arm_fir_interpolate_q31( 
+  const arm_fir_interpolate_instance_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pState = S->pState;                     /* State pointer */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q31_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q31_t *ptr1, *ptr2;                            /* Temporary pointers for state and coefficient buffers */ 
+  q63_t sum0;                                    /* Accumulators */ 
+  q31_t x0, c0;                                  /* Temporary variables to hold state and coefficient values */ 
+  uint32_t i, blkCnt, j;                         /* Loop counters */ 
+  uint16_t phaseLen = S->phaseLength, tapCnt;    /* Length of each polyphase filter component */ 
+ 
+ 
+  /* S->pState buffer contains previous frame (phaseLen - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = S->pState + ((q31_t) phaseLen - 1); 
+ 
+  /* Total number of intput samples */ 
+  blkCnt = blockSize; 
+ 
+  /* Loop over the blockSize. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy new input sample into the state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Address modifier index of coefficient buffer */ 
+    j = 1u; 
+ 
+    /* Loop over the Interpolation factor. */ 
+    i = S->L; 
+    while(i > 0u) 
+    { 
+      /* Set accumulator to zero */ 
+      sum0 = 0; 
+ 
+      /* Initialize state pointer */ 
+      ptr1 = pState; 
+ 
+      /* Initialize coefficient pointer */ 
+      ptr2 = pCoeffs + (S->L - j); 
+ 
+      /* Loop over the polyPhase length. Unroll by a factor of 4.  
+       ** Repeat until we've computed numTaps-(4*S->L) coefficients. */ 
+      tapCnt = phaseLen >> 2; 
+      while(tapCnt > 0u) 
+      { 
+ 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Upsampling is done by stuffing L-1 zeros between each sample.  
+         * So instead of multiplying zeros with coefficients,  
+         * Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the input sample */ 
+        x0 = *(ptr1++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 += (q63_t) x0 *c0; 
+ 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the input sample */ 
+        x0 = *(ptr1++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 += (q63_t) x0 *c0; 
+ 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the input sample */ 
+        x0 = *(ptr1++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 += (q63_t) x0 *c0; 
+ 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the input sample */ 
+        x0 = *(ptr1++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 += (q63_t) x0 *c0; 
+ 
+        /* Decrement the loop counter */ 
+        tapCnt--; 
+      } 
+ 
+      /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */ 
+      tapCnt = phaseLen & 0x3u; 
+ 
+      while(tapCnt > 0u) 
+      { 
+        /* Read the coefficient */ 
+        c0 = *(ptr2); 
+ 
+        /* Increment the coefficient pointer by interpolation factor times. */ 
+        ptr2 += S->L; 
+ 
+        /* Read the input sample */ 
+        x0 = *(ptr1++); 
+ 
+        /* Perform the multiply-accumulate */ 
+        sum0 += (q63_t) x0 *c0; 
+ 
+        /* Decrement the loop counter */ 
+        tapCnt--; 
+      } 
+ 
+      /* The result is in the accumulator, store in the destination buffer. */ 
+      *pDst++ = (q31_t) (sum0 >> 31); 
+ 
+      /* Increment the address modifier index of coefficient buffer */ 
+      j++; 
+ 
+      /* Decrement the loop counter */ 
+      i--; 
+    } 
+ 
+    /* Advance the state pointer by 1  
+     * to process the next group of interpolation factor number samples */ 
+    pState = pState + 1; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  tapCnt = (phaseLen - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  tapCnt = (phaseLen - 1u) % 0x04u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+} 
+ 
+ /**  
+  * @} end of FIR_Interpolate group  
+  */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,413 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_lattice_f32.c  
+*  
+* Description:	Processing function for the floating-point FIR Lattice filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup FIR_Lattice Finite Impulse Response (FIR) Lattice Filters  
+ *  
+ * This set of functions implements Finite Impulse Response (FIR) lattice filters  
+ * for Q15, Q31 and floating-point data types.  Lattice filters are used in a   
+ * variety of adaptive filter applications.  The filter structure is feedforward and  
+ * the net impulse response is finite length.  
+ * The functions operate on blocks  
+ * of input and output data and each call to the function processes  
+ * <code>blockSize</code> samples through the filter.  <code>pSrc</code> and  
+ * <code>pDst</code> point to input and output arrays containing <code>blockSize</code> values.  
+ *  
+ * \par Algorithm:  
+ * \image html FIRLattice.gif "Finite Impulse Response Lattice filter"  
+ * The following difference equation is implemented:  
+ * <pre>  
+ *    f0[n] = g0[n] = x[n]  
+ *    fm[n] = fm-1[n] + km * gm-1[n-1] for m = 1, 2, ...M  
+ *    gm[n] = km * fm-1[n] + gm-1[n-1] for m = 1, 2, ...M  
+ *    y[n] = fM[n]  
+ * </pre>  
+ * \par  
+ * <code>pCoeffs</code> points to tha array of reflection coefficients of size <code>numStages</code>.  
+ * Reflection Coefficients are stored in the following order.  
+ * \par  
+ * <pre>  
+ *    {k1, k2, ..., kM}  
+ * </pre>  
+ * where M is number of stages  
+ * \par  
+ * <code>pState</code> points to a state array of size <code>numStages</code>.  
+ * The state variables (g values) hold previous inputs and are stored in the following order.  
+ * <pre>  
+ *    {g0[n], g1[n], g2[n] ...gM-1[n]}  
+ * </pre>  
+ * The state variables are updated after each block of data is processed; the coefficients are untouched.  
+ * \par Instance Structure  
+ * The coefficients and state variables for a filter are stored together in an instance data structure.  
+ * A separate instance structure must be defined for each filter.  
+ * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.  
+ * There are separate instance structure declarations for each of the 3 supported data types.  
+ *  
+ * \par Initialization Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Zeros out the values in the state buffer.  
+ *  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * Set the values in the state buffer to zeros and then manually initialize the instance structure as follows:  
+ * <pre>  
+ *arm_fir_lattice_instance_f32 S = {numStages, pState, pCoeffs};  
+ *arm_fir_lattice_instance_q31 S = {numStages, pState, pCoeffs};  
+ *arm_fir_lattice_instance_q15 S = {numStages, pState, pCoeffs};  
+ * </pre>  
+ * \par  
+ * where <code>numStages</code> is the number of stages in the filter; <code>pState</code> is the address of the state buffer;  
+ * <code>pCoeffs</code> is the address of the coefficient buffer.  
+ * \par Fixed-Point Behavior  
+ * Care must be taken when using the fixed-point versions of the FIR Lattice filter functions.  
+ * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.  
+ * Refer to the function specific documentation below for usage guidelines.  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Lattice  
+ * @{  
+ */ 
+ 
+ 
+  /**  
+   * @brief Processing function for the floating-point FIR lattice filter.  
+   * @param[in]  *S        points to an instance of the floating-point FIR lattice structure.  
+   * @param[in]  *pSrc     points to the block of input data.  
+   * @param[out] *pDst     points to the block of output data  
+   * @param[in]  blockSize number of samples to process.  
+   * @return none.  
+   */ 
+ 
+void arm_fir_lattice_f32( 
+  const arm_fir_lattice_instance_f32 * S, 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  float32_t *pState;                             /* State pointer */ 
+  float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */ 
+  float32_t *px;                                 /* temporary state pointer */ 
+  float32_t *pk;                                 /* temporary coefficient pointer */ 
+  float32_t fcurr1, fnext1, gcurr1, gnext1;      /* temporary variables for first sample in loop unrolling */ 
+  float32_t fcurr2, fnext2, gnext2;              /* temporary variables for second sample in loop unrolling */ 
+  float32_t fcurr3, fnext3, gnext3;              /* temporary variables for third sample in loop unrolling */ 
+  float32_t fcurr4, fnext4, gnext4;              /* temporary variables for fourth sample in loop unrolling */ 
+  uint32_t numStages = S->numStages;             /* Number of stages in the filter */ 
+  uint32_t blkCnt, stageCnt;                     /* temporary variables for counts */ 
+ 
+  gcurr1 = 0.0f; 
+  pState = &S->pState[0]; 
+ 
+  blkCnt = blockSize >> 2; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+ 
+    /* Read two samples from input buffer */ 
+    /* f0(n) = x(n) */ 
+    fcurr1 = *pSrc++; 
+    fcurr2 = *pSrc++; 
+ 
+    /* Initialize coeff pointer */ 
+    pk = (pCoeffs); 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Read g0(n-1) from state */ 
+    gcurr1 = *px; 
+ 
+    /* Process first sample for first tap */ 
+    /* f1(n) = f0(n) +  K1 * g0(n-1) */ 
+    fnext1 = fcurr1 + ((*pk) * gcurr1); 
+    /* g1(n) = f0(n) * K1  +  g0(n-1) */ 
+    gnext1 = (fcurr1 * (*pk)) + gcurr1; 
+ 
+    /* Process second sample for first tap */ 
+    /* for sample 2 processing */ 
+    fnext2 = fcurr2 + ((*pk) * fcurr1); 
+    gnext2 = (fcurr2 * (*pk)) + fcurr1; 
+ 
+    /* Read next two samples from input buffer */ 
+    /* f0(n+2) = x(n+2) */ 
+    fcurr3 = *pSrc++; 
+    fcurr4 = *pSrc++; 
+ 
+    /* Copy only last input samples into the state buffer  
+       which will be used for next four samples processing */ 
+    *px++ = fcurr4; 
+ 
+    /* Process third sample for first tap */ 
+    fnext3 = fcurr3 + ((*pk) * fcurr2); 
+    gnext3 = (fcurr3 * (*pk)) + fcurr2; 
+ 
+    /* Process fourth sample for first tap */ 
+    fnext4 = fcurr4 + ((*pk) * fcurr3); 
+    gnext4 = (fcurr4 * (*pk++)) + fcurr3; 
+ 
+    /* Update of f values for next coefficient set processing */ 
+    fcurr1 = fnext1; 
+    fcurr2 = fnext2; 
+    fcurr3 = fnext3; 
+    fcurr4 = fnext4; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time . */ 
+    stageCnt = (numStages - 1u) >> 2u; 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numStages-3 coefficients. */ 
+ 
+    /* Process 2nd, 3rd, 4th and 5th taps ... here */ 
+    while(stageCnt > 0u) 
+    { 
+      /* Read g1(n-1), g3(n-1) .... from state */ 
+      gcurr1 = *px; 
+ 
+      /* save g1(n) in state buffer */ 
+      *px++ = gnext4; 
+ 
+      /* Process first sample for 2nd, 6th .. tap */ 
+      /* Sample processing for K2, K6.... */ 
+      /* f2(n) = f1(n) +  K2 * g1(n-1) */ 
+      fnext1 = fcurr1 + ((*pk) * gcurr1); 
+      /* Process second sample for 2nd, 6th .. tap */ 
+      /* for sample 2 processing */ 
+      fnext2 = fcurr2 + ((*pk) * gnext1); 
+      /* Process third sample for 2nd, 6th .. tap */ 
+      fnext3 = fcurr3 + ((*pk) * gnext2); 
+      /* Process fourth sample for 2nd, 6th .. tap */ 
+      fnext4 = fcurr4 + ((*pk) * gnext3); 
+ 
+      /* g2(n) = f1(n) * K2  +  g1(n-1) */ 
+      /* Calculation of state values for next stage */ 
+      gnext4 = (fcurr4 * (*pk)) + gnext3; 
+      gnext3 = (fcurr3 * (*pk)) + gnext2; 
+      gnext2 = (fcurr2 * (*pk)) + gnext1; 
+      gnext1 = (fcurr1 * (*pk++)) + gcurr1; 
+ 
+ 
+      /* Read g2(n-1), g4(n-1) .... from state */ 
+      gcurr1 = *px; 
+ 
+      /* save g2(n) in state buffer */ 
+      *px++ = gnext4; 
+ 
+      /* Sample processing for K3, K7.... */ 
+      /* Process first sample for 3rd, 7th .. tap */ 
+      /* f3(n) = f2(n) +  K3 * g2(n-1) */ 
+      fcurr1 = fnext1 + ((*pk) * gcurr1); 
+      /* Process second sample for 3rd, 7th .. tap */ 
+      fcurr2 = fnext2 + ((*pk) * gnext1); 
+      /* Process third sample for 3rd, 7th .. tap */ 
+      fcurr3 = fnext3 + ((*pk) * gnext2); 
+      /* Process fourth sample for 3rd, 7th .. tap */ 
+      fcurr4 = fnext4 + ((*pk) * gnext3); 
+ 
+      /* Calculation of state values for next stage */ 
+      /* g3(n) = f2(n) * K3  +  g2(n-1) */ 
+      gnext4 = (fnext4 * (*pk)) + gnext3; 
+      gnext3 = (fnext3 * (*pk)) + gnext2; 
+      gnext2 = (fnext2 * (*pk)) + gnext1; 
+      gnext1 = (fnext1 * (*pk++)) + gcurr1; 
+ 
+ 
+      /* Read g1(n-1), g3(n-1) .... from state */ 
+      gcurr1 = *px; 
+ 
+      /* save g3(n) in state buffer */ 
+      *px++ = gnext4; 
+ 
+      /* Sample processing for K4, K8.... */ 
+      /* Process first sample for 4th, 8th .. tap */ 
+      /* f4(n) = f3(n) +  K4 * g3(n-1) */ 
+      fnext1 = fcurr1 + ((*pk) * gcurr1); 
+      /* Process second sample for 4th, 8th .. tap */ 
+      /* for sample 2 processing */ 
+      fnext2 = fcurr2 + ((*pk) * gnext1); 
+      /* Process third sample for 4th, 8th .. tap */ 
+      fnext3 = fcurr3 + ((*pk) * gnext2); 
+      /* Process fourth sample for 4th, 8th .. tap */ 
+      fnext4 = fcurr4 + ((*pk) * gnext3); 
+ 
+      /* g4(n) = f3(n) * K4  +  g3(n-1) */ 
+      /* Calculation of state values for next stage */ 
+      gnext4 = (fcurr4 * (*pk)) + gnext3; 
+      gnext3 = (fcurr3 * (*pk)) + gnext2; 
+      gnext2 = (fcurr2 * (*pk)) + gnext1; 
+      gnext1 = (fcurr1 * (*pk++)) + gcurr1; 
+ 
+      /* Read g2(n-1), g4(n-1) .... from state */ 
+      gcurr1 = *px; 
+ 
+      /* save g4(n) in state buffer */ 
+      *px++ = gnext4; 
+ 
+      /* Sample processing for K5, K9.... */ 
+      /* Process first sample for 5th, 9th .. tap */ 
+      /* f5(n) = f4(n) +  K5 * g4(n-1) */ 
+      fcurr1 = fnext1 + ((*pk) * gcurr1); 
+      /* Process second sample for 5th, 9th .. tap */ 
+      fcurr2 = fnext2 + ((*pk) * gnext1); 
+      /* Process third sample for 5th, 9th .. tap */ 
+      fcurr3 = fnext3 + ((*pk) * gnext2); 
+      /* Process fourth sample for 5th, 9th .. tap */ 
+      fcurr4 = fnext4 + ((*pk) * gnext3); 
+ 
+      /* Calculation of state values for next stage */ 
+      /* g5(n) = f4(n) * K5  +  g4(n-1) */ 
+      gnext4 = (fnext4 * (*pk)) + gnext3; 
+      gnext3 = (fnext3 * (*pk)) + gnext2; 
+      gnext2 = (fnext2 * (*pk)) + gnext1; 
+      gnext1 = (fnext1 * (*pk++)) + gcurr1; 
+ 
+      stageCnt--; 
+    } 
+ 
+    /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */ 
+    stageCnt = (numStages - 1u) % 0x4u; 
+ 
+    while(stageCnt > 0u) 
+    { 
+      gcurr1 = *px; 
+ 
+      /* save g value in state buffer */ 
+      *px++ = gnext4; 
+ 
+      /* Process four samples for last three taps here */ 
+      fnext1 = fcurr1 + ((*pk) * gcurr1); 
+      fnext2 = fcurr2 + ((*pk) * gnext1); 
+      fnext3 = fcurr3 + ((*pk) * gnext2); 
+      fnext4 = fcurr4 + ((*pk) * gnext3); 
+ 
+      /* g1(n) = f0(n) * K1  +  g0(n-1) */ 
+      gnext4 = (fcurr4 * (*pk)) + gnext3; 
+      gnext3 = (fcurr3 * (*pk)) + gnext2; 
+      gnext2 = (fcurr2 * (*pk)) + gnext1; 
+      gnext1 = (fcurr1 * (*pk++)) + gcurr1; 
+ 
+      /* Update of f values for next coefficient set processing */ 
+      fcurr1 = fnext1; 
+      fcurr2 = fnext2; 
+      fcurr3 = fnext3; 
+      fcurr4 = fnext4; 
+ 
+      stageCnt--; 
+ 
+    } 
+ 
+    /* The results in the 4 accumulators, store in the destination buffer. */ 
+    /* y(n) = fN(n) */ 
+    *pDst++ = fcurr1; 
+    *pDst++ = fcurr2; 
+    *pDst++ = fcurr3; 
+    *pDst++ = fcurr4; 
+ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* f0(n) = x(n) */ 
+    fcurr1 = *pSrc++; 
+ 
+    /* Initialize coeff pointer */ 
+    pk = (pCoeffs); 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* read g2(n) from state buffer */ 
+    gcurr1 = *px; 
+ 
+    /* for sample 1 processing */ 
+    /* f1(n) = f0(n) +  K1 * g0(n-1) */ 
+    fnext1 = fcurr1 + ((*pk) * gcurr1); 
+    /* g1(n) = f0(n) * K1  +  g0(n-1) */ 
+    gnext1 = (fcurr1 * (*pk++)) + gcurr1; 
+ 
+    /* save g1(n) in state buffer */ 
+    *px++ = fcurr1; 
+ 
+    /* f1(n) is saved in fcurr1  
+       for next stage processing */ 
+    fcurr1 = fnext1; 
+ 
+    stageCnt = (numStages - 1u); 
+ 
+    /* stage loop */ 
+    while(stageCnt > 0u) 
+    { 
+      /* read g2(n) from state buffer */ 
+      gcurr1 = *px; 
+ 
+      /* save g1(n) in state buffer */ 
+      *px++ = gnext1; 
+ 
+      /* Sample processing for K2, K3.... */ 
+      /* f2(n) = f1(n) +  K2 * g1(n-1) */ 
+      fnext1 = fcurr1 + ((*pk) * gcurr1); 
+      /* g2(n) = f1(n) * K2  +  g1(n-1) */ 
+      gnext1 = (fcurr1 * (*pk++)) + gcurr1; 
+ 
+      /* f1(n) is saved in fcurr1  
+         for next stage processing */ 
+      fcurr1 = fnext1; 
+ 
+      stageCnt--; 
+ 
+    } 
+ 
+    /* y(n) = fN(n) */ 
+    *pDst++ = fcurr1; 
+ 
+    blkCnt--; 
+ 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR_Lattice group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,72 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_lattice_init_f32.c  
+*  
+* Description:  Floating-point FIR Lattice filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Lattice  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Initialization function for the floating-point FIR lattice filter.  
+ * @param[in] *S points to an instance of the floating-point FIR lattice structure.  
+ * @param[in] numStages  number of filter stages.  
+ * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.  
+ * @param[in] *pState points to the state buffer.  The array is of length numStages.  
+ * @return none.  
+ */ 
+ 
+void arm_fir_lattice_init_f32( 
+  arm_fir_lattice_instance_f32 * S, 
+  uint16_t numStages, 
+  float32_t * pCoeffs, 
+  float32_t * pState) 
+{ 
+  /* Assign filter taps */ 
+  S->numStages = numStages; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always numStages */ 
+  memset(pState, 0, (numStages) * sizeof(float32_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+} 
+ 
+/**  
+ * @} end of FIR_Lattice group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,72 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_lattice_init_q15.c  
+*  
+* Description:  Q15 FIR Lattice filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Lattice  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief Initialization function for the Q15 FIR lattice filter.  
+   * @param[in] *S points to an instance of the Q15 FIR lattice structure.  
+   * @param[in] numStages  number of filter stages.  
+   * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.   
+   * @param[in] *pState points to the state buffer.  The array is of length numStages.   
+   * @return none.  
+   */ 
+ 
+void arm_fir_lattice_init_q15( 
+  arm_fir_lattice_instance_q15 * S, 
+  uint16_t numStages, 
+  q15_t * pCoeffs, 
+  q15_t * pState) 
+{ 
+  /* Assign filter taps */ 
+  S->numStages = numStages; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always numStages */ 
+  memset(pState, 0, (numStages) * sizeof(q15_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+} 
+ 
+/**  
+ * @} end of FIR_Lattice group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,72 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_lattice_init_q31.c  
+*  
+* Description:  Q31 FIR lattice filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Lattice  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief Initialization function for the Q31 FIR lattice filter.  
+   * @param[in] *S points to an instance of the Q31 FIR lattice structure.  
+   * @param[in] numStages  number of filter stages.  
+   * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.  
+   * @param[in] *pState points to the state buffer.   The array is of length numStages.  
+   * @return none.  
+   */ 
+ 
+void arm_fir_lattice_init_q31( 
+  arm_fir_lattice_instance_q31 * S, 
+  uint16_t numStages, 
+  q31_t * pCoeffs, 
+  q31_t * pState) 
+{ 
+  /* Assign filter taps */ 
+  S->numStages = numStages; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always numStages */ 
+  memset(pState, 0, (numStages) * sizeof(q31_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+} 
+ 
+/**  
+ * @} end of FIR_Lattice group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,426 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_lattice_q15.c  
+*  
+* Description:	Q15 FIR lattice filter processing function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Lattice  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Processing function for the Q15 FIR lattice filter.  
+ * @param[in]  *S        points to an instance of the Q15 FIR lattice structure.  
+ * @param[in]  *pSrc     points to the block of input data.  
+ * @param[out] *pDst     points to the block of output data  
+ * @param[in]  blockSize number of samples to process.  
+ * @return none.  
+ */ 
+ 
+void arm_fir_lattice_q15( 
+  const arm_fir_lattice_instance_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pState;                                 /* State pointer */ 
+  q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q15_t *px;                                     /* temporary state pointer */ 
+  q15_t *pk;                                     /* temporary coefficient pointer */ 
+  q31_t fcurnt1, fnext1, gcurnt1 = 0, gnext1;    /* temporary variables for first sample in loop unrolling */ 
+  q31_t fcurnt2, fnext2, gnext2;                 /* temporary variables for second sample in loop unrolling */ 
+  q31_t fcurnt3, fnext3, gnext3;                 /* temporary variables for third sample in loop unrolling */ 
+  q31_t fcurnt4, fnext4, gnext4;                 /* temporary variables for fourth sample in loop unrolling */ 
+  uint32_t numStages = S->numStages;             /* Number of stages in the filter */ 
+  uint32_t blkCnt, stageCnt;                     /* temporary variables for counts */ 
+ 
+  pState = &S->pState[0]; 
+ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+ 
+    /* Read two samples from input buffer */ 
+    /* f0(n) = x(n) */ 
+    fcurnt1 = *pSrc++; 
+    fcurnt2 = *pSrc++; 
+ 
+    /* Initialize coeff pointer */ 
+    pk = (pCoeffs); 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Read g0(n-1) from state */ 
+    gcurnt1 = *px; 
+ 
+    /* Process first sample for first tap */ 
+    /* f1(n) = f0(n) +  K1 * g0(n-1) */ 
+    fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1; 
+    fnext1 = __SSAT(fnext1, 16); 
+ 
+    /* g1(n) = f0(n) * K1  +  g0(n-1) */ 
+    gnext1 = (q31_t) ((fcurnt1 * (*pk)) >> 15u) + gcurnt1; 
+    gnext1 = __SSAT(gnext1, 16); 
+ 
+    /* Process second sample for first tap */ 
+    /* for sample 2 processing */ 
+    fnext2 = (q31_t) ((fcurnt1 * (*pk)) >> 15u) + fcurnt2; 
+    fnext2 = __SSAT(fnext2, 16); 
+ 
+    gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + fcurnt1; 
+    gnext2 = __SSAT(gnext2, 16); 
+ 
+ 
+    /* Read next two samples from input buffer */ 
+    /* f0(n+2) = x(n+2) */ 
+    fcurnt3 = *pSrc++; 
+    fcurnt4 = *pSrc++; 
+ 
+    /* Copy only last input samples into the state buffer  
+       which is used for next four samples processing */ 
+    *px++ = (q15_t) fcurnt4; 
+ 
+    /* Process third sample for first tap */ 
+    fnext3 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + fcurnt3; 
+    fnext3 = __SSAT(fnext3, 16); 
+    gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + fcurnt2; 
+    gnext3 = __SSAT(gnext3, 16); 
+ 
+    /* Process fourth sample for first tap */ 
+    fnext4 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + fcurnt4; 
+    fnext4 = __SSAT(fnext4, 16); 
+    gnext4 = (q31_t) ((fcurnt4 * (*pk++)) >> 15u) + fcurnt3; 
+    gnext4 = __SSAT(gnext4, 16); 
+ 
+    /* Update of f values for next coefficient set processing */ 
+    fcurnt1 = fnext1; 
+    fcurnt2 = fnext2; 
+    fcurnt3 = fnext3; 
+    fcurnt4 = fnext4; 
+ 
+ 
+    /* Loop unrolling.  Process 4 taps at a time . */ 
+    stageCnt = (numStages - 1u) >> 2; 
+ 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numStages-3 coefficients. */ 
+ 
+    /* Process 2nd, 3rd, 4th and 5th taps ... here */ 
+    while(stageCnt > 0u) 
+    { 
+      /* Read g1(n-1), g3(n-1) .... from state */ 
+      gcurnt1 = *px; 
+ 
+      /* save g1(n) in state buffer */ 
+      *px++ = (q15_t) gnext4; 
+ 
+      /* Process first sample for 2nd, 6th .. tap */ 
+      /* Sample processing for K2, K6.... */ 
+      /* f1(n) = f0(n) +  K1 * g0(n-1) */ 
+      fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1; 
+      fnext1 = __SSAT(fnext1, 16); 
+ 
+ 
+      /* Process second sample for 2nd, 6th .. tap */ 
+      /* for sample 2 processing */ 
+      fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fcurnt2; 
+      fnext2 = __SSAT(fnext2, 16); 
+      /* Process third sample for 2nd, 6th .. tap */ 
+      fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fcurnt3; 
+      fnext3 = __SSAT(fnext3, 16); 
+      /* Process fourth sample for 2nd, 6th .. tap */ 
+      /* fnext4 = fcurnt4 + (*pk) * gnext3; */ 
+      fnext4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fcurnt4; 
+      fnext4 = __SSAT(fnext4, 16); 
+ 
+      /* g1(n) = f0(n) * K1  +  g0(n-1) */ 
+      /* Calculation of state values for next stage */ 
+      gnext4 = (q31_t) ((fcurnt4 * (*pk)) >> 15u) + gnext3; 
+      gnext4 = __SSAT(gnext4, 16); 
+      gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + gnext2; 
+      gnext3 = __SSAT(gnext3, 16); 
+ 
+      gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + gnext1; 
+      gnext2 = __SSAT(gnext2, 16); 
+ 
+      gnext1 = (q31_t) ((fcurnt1 * (*pk++)) >> 15u) + gcurnt1; 
+      gnext1 = __SSAT(gnext1, 16); 
+ 
+ 
+      /* Read g2(n-1), g4(n-1) .... from state */ 
+      gcurnt1 = *px; 
+ 
+      /* save g1(n) in state buffer */ 
+      *px++ = (q15_t) gnext4; 
+ 
+      /* Sample processing for K3, K7.... */ 
+      /* Process first sample for 3rd, 7th .. tap */ 
+      /* f3(n) = f2(n) +  K3 * g2(n-1) */ 
+      fcurnt1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fnext1; 
+      fcurnt1 = __SSAT(fcurnt1, 16); 
+ 
+      /* Process second sample for 3rd, 7th .. tap */ 
+      fcurnt2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fnext2; 
+      fcurnt2 = __SSAT(fcurnt2, 16); 
+ 
+      /* Process third sample for 3rd, 7th .. tap */ 
+      fcurnt3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fnext3; 
+      fcurnt3 = __SSAT(fcurnt3, 16); 
+ 
+      /* Process fourth sample for 3rd, 7th .. tap */ 
+      fcurnt4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fnext4; 
+      fcurnt4 = __SSAT(fcurnt4, 16); 
+ 
+      /* Calculation of state values for next stage */ 
+      /* g3(n) = f2(n) * K3  +  g2(n-1) */ 
+      gnext4 = (q31_t) ((fnext4 * (*pk)) >> 15u) + gnext3; 
+      gnext4 = __SSAT(gnext4, 16); 
+ 
+      gnext3 = (q31_t) ((fnext3 * (*pk)) >> 15u) + gnext2; 
+      gnext3 = __SSAT(gnext3, 16); 
+ 
+      gnext2 = (q31_t) ((fnext2 * (*pk)) >> 15u) + gnext1; 
+      gnext2 = __SSAT(gnext2, 16); 
+ 
+      gnext1 = (q31_t) ((fnext1 * (*pk++)) >> 15u) + gcurnt1; 
+      gnext1 = __SSAT(gnext1, 16); 
+ 
+      /* Read g1(n-1), g3(n-1) .... from state */ 
+      gcurnt1 = *px; 
+ 
+      /* save g1(n) in state buffer */ 
+      *px++ = (q15_t) gnext4; 
+ 
+      /* Sample processing for K4, K8.... */ 
+      /* Process first sample for 4th, 8th .. tap */ 
+      /* f4(n) = f3(n) +  K4 * g3(n-1) */ 
+      fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1; 
+      fnext1 = __SSAT(fnext1, 16); 
+ 
+      /* Process second sample for 4th, 8th .. tap */ 
+      /* for sample 2 processing */ 
+      fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fcurnt2; 
+      fnext2 = __SSAT(fnext2, 16); 
+ 
+      /* Process third sample for 4th, 8th .. tap */ 
+      fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fcurnt3; 
+      fnext3 = __SSAT(fnext3, 16); 
+ 
+      /* Process fourth sample for 4th, 8th .. tap */ 
+      fnext4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fcurnt4; 
+      fnext4 = __SSAT(fnext4, 16); 
+ 
+      /* g4(n) = f3(n) * K4  +  g3(n-1) */ 
+      /* Calculation of state values for next stage */ 
+      gnext4 = (q31_t) ((fcurnt4 * (*pk)) >> 15u) + gnext3; 
+      gnext4 = __SSAT(gnext4, 16); 
+ 
+      gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + gnext2; 
+      gnext3 = __SSAT(gnext3, 16); 
+ 
+      gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + gnext1; 
+      gnext2 = __SSAT(gnext2, 16); 
+      gnext1 = (q31_t) ((fcurnt1 * (*pk++)) >> 15u) + gcurnt1; 
+      gnext1 = __SSAT(gnext1, 16); 
+ 
+ 
+      /* Read g2(n-1), g4(n-1) .... from state */ 
+      gcurnt1 = *px; 
+ 
+      /* save g4(n) in state buffer */ 
+      *px++ = (q15_t) gnext4; 
+ 
+      /* Sample processing for K5, K9.... */ 
+      /* Process first sample for 5th, 9th .. tap */ 
+      /* f5(n) = f4(n) +  K5 * g4(n-1) */ 
+      fcurnt1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fnext1; 
+      fcurnt1 = __SSAT(fcurnt1, 16); 
+ 
+      /* Process second sample for 5th, 9th .. tap */ 
+      fcurnt2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fnext2; 
+      fcurnt2 = __SSAT(fcurnt2, 16); 
+ 
+      /* Process third sample for 5th, 9th .. tap */ 
+      fcurnt3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fnext3; 
+      fcurnt3 = __SSAT(fcurnt3, 16); 
+ 
+      /* Process fourth sample for 5th, 9th .. tap */ 
+      fcurnt4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fnext4; 
+      fcurnt4 = __SSAT(fcurnt4, 16); 
+ 
+      /* Calculation of state values for next stage */ 
+      /* g5(n) = f4(n) * K5  +  g4(n-1) */ 
+      gnext4 = (q31_t) ((fnext4 * (*pk)) >> 15u) + gnext3; 
+      gnext4 = __SSAT(gnext4, 16); 
+      gnext3 = (q31_t) ((fnext3 * (*pk)) >> 15u) + gnext2; 
+      gnext3 = __SSAT(gnext3, 16); 
+      gnext2 = (q31_t) ((fnext2 * (*pk)) >> 15u) + gnext1; 
+      gnext2 = __SSAT(gnext2, 16); 
+      gnext1 = (q31_t) ((fnext1 * (*pk++)) >> 15u) + gcurnt1; 
+      gnext1 = __SSAT(gnext1, 16); 
+ 
+      stageCnt--; 
+    } 
+ 
+    /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */ 
+    stageCnt = (numStages - 1u) % 0x4u; 
+ 
+    while(stageCnt > 0u) 
+    { 
+      gcurnt1 = *px; 
+ 
+      /* save g value in state buffer */ 
+      *px++ = (q15_t) gnext4; 
+ 
+      /* Process four samples for last three taps here */ 
+      fnext1 = (q31_t) ((gcurnt1 * (*pk)) >> 15u) + fcurnt1; 
+      fnext1 = __SSAT(fnext1, 16); 
+      fnext2 = (q31_t) ((gnext1 * (*pk)) >> 15u) + fcurnt2; 
+      fnext2 = __SSAT(fnext2, 16); 
+ 
+      fnext3 = (q31_t) ((gnext2 * (*pk)) >> 15u) + fcurnt3; 
+      fnext3 = __SSAT(fnext3, 16); 
+ 
+      fnext4 = (q31_t) ((gnext3 * (*pk)) >> 15u) + fcurnt4; 
+      fnext4 = __SSAT(fnext4, 16); 
+ 
+      /* g1(n) = f0(n) * K1  +  g0(n-1) */ 
+      gnext4 = (q31_t) ((fcurnt4 * (*pk)) >> 15u) + gnext3; 
+      gnext4 = __SSAT(gnext4, 16); 
+      gnext3 = (q31_t) ((fcurnt3 * (*pk)) >> 15u) + gnext2; 
+      gnext3 = __SSAT(gnext3, 16); 
+      gnext2 = (q31_t) ((fcurnt2 * (*pk)) >> 15u) + gnext1; 
+      gnext2 = __SSAT(gnext2, 16); 
+      gnext1 = (q31_t) ((fcurnt1 * (*pk++)) >> 15u) + gcurnt1; 
+      gnext1 = __SSAT(gnext1, 16); 
+ 
+      /* Update of f values for next coefficient set processing */ 
+      fcurnt1 = fnext1; 
+      fcurnt2 = fnext2; 
+      fcurnt3 = fnext3; 
+      fcurnt4 = fnext4; 
+ 
+      stageCnt--; 
+ 
+    } 
+ 
+    /* The results in the 4 accumulators, store in the destination buffer. */ 
+    /* y(n) = fN(n) */ 
+    *__SIMD32(pDst)++ = __PKHBT(fcurnt1, fcurnt2, 16); 
+    *__SIMD32(pDst)++ = __PKHBT(fcurnt3, fcurnt4, 16); 
+ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* f0(n) = x(n) */ 
+    fcurnt1 = *pSrc++; 
+ 
+    /* Initialize coeff pointer */ 
+    pk = (pCoeffs); 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* read g2(n) from state buffer */ 
+    gcurnt1 = *px; 
+ 
+    /* for sample 1 processing */ 
+    /* f1(n) = f0(n) +  K1 * g0(n-1) */ 
+    fnext1 = (((q31_t) gcurnt1 * (*pk)) >> 15u) + fcurnt1; 
+    fnext1 = __SSAT(fnext1, 16); 
+ 
+ 
+    /* g1(n) = f0(n) * K1  +  g0(n-1) */ 
+    gnext1 = (((q31_t) fcurnt1 * (*pk++)) >> 15u) + gcurnt1; 
+    gnext1 = __SSAT(gnext1, 16); 
+ 
+    /* save g1(n) in state buffer */ 
+    *px++ = (q15_t) fcurnt1; 
+ 
+    /* f1(n) is saved in fcurnt1  
+       for next stage processing */ 
+    fcurnt1 = fnext1; 
+ 
+    stageCnt = (numStages - 1u); 
+ 
+    /* stage loop */ 
+    while(stageCnt > 0u) 
+    { 
+      /* read g2(n) from state buffer */ 
+      gcurnt1 = *px; 
+ 
+      /* save g1(n) in state buffer */ 
+      *px++ = (q15_t) gnext1; 
+ 
+      /* Sample processing for K2, K3.... */ 
+      /* f2(n) = f1(n) +  K2 * g1(n-1) */ 
+      fnext1 = (((q31_t) gcurnt1 * (*pk)) >> 15u) + fcurnt1; 
+      fnext1 = __SSAT(fnext1, 16); 
+ 
+      /* g2(n) = f1(n) * K2  +  g1(n-1) */ 
+      gnext1 = (((q31_t) fcurnt1 * (*pk++)) >> 15u) + gcurnt1; 
+      gnext1 = __SSAT(gnext1, 16); 
+ 
+ 
+      /* f1(n) is saved in fcurnt1  
+         for next stage processing */ 
+      fcurnt1 = fnext1; 
+ 
+      stageCnt--; 
+ 
+    } 
+ 
+    /* y(n) = fN(n) */ 
+    *pDst++ = __SSAT(fcurnt1, 16); 
+ 
+ 
+    blkCnt--; 
+ 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR_Lattice group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_lattice_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,358 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_lattice_q31.c  
+*  
+* Description:	Q31 FIR lattice filter processing function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Lattice  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Processing function for the Q31 FIR lattice filter.  
+ * @param[in]  *S        points to an instance of the Q31 FIR lattice structure.  
+ * @param[in]  *pSrc     points to the block of input data.  
+ * @param[out] *pDst     points to the block of output data  
+ * @param[in]  blockSize number of samples to process.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * In order to avoid overflows the input signal must be scaled down by 2*log2(numStages) bits.  
+ */ 
+ 
+void arm_fir_lattice_q31( 
+  const arm_fir_lattice_instance_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pState;                                 /* State pointer */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q31_t *px;                                     /* temporary state pointer */ 
+  q31_t *pk;                                     /* temporary coefficient pointer */ 
+  q31_t fcurr1, fnext1, gcurr1 = 0, gnext1;      /* temporary variables for first sample in loop unrolling */ 
+  q63_t fcurr2, fnext2, gnext2;                  /* temporary variables for second sample in loop unrolling */ 
+  q63_t fcurr3, fnext3, gnext3;                  /* temporary variables for third sample in loop unrolling */ 
+  q63_t fcurr4, fnext4, gnext4;                  /* temporary variables for fourth sample in loop unrolling */ 
+  uint32_t numStages = S->numStages;             /* Length of the filter */ 
+  uint32_t blkCnt, stageCnt;                     /* temporary variables for counts */ 
+ 
+  pState = &S->pState[0]; 
+ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+ 
+    /* Read two samples from input buffer */ 
+    /* f0(n) = x(n) */ 
+    fcurr1 = *pSrc++; 
+    /* f0(n) = x(n) */ 
+    fcurr2 = *pSrc++; 
+ 
+    /* Initialize coeff pointer */ 
+    pk = (pCoeffs); 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Read g0(n-1) from state */ 
+    gcurr1 = *px; 
+ 
+    /* Process first sample for first tap */ 
+    /* f1(n) = f0(n) +  K1 * g0(n-1) */ 
+    fnext1 = (q31_t) (((q63_t) gcurr1 * (*pk)) >> 31) + fcurr1; 
+    /* g1(n) = f0(n) * K1  +  g0(n-1) */ 
+    gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 31) + gcurr1; 
+ 
+    /* Process second sample for first tap */ 
+    /* for sample 2 processing */ 
+    fnext2 = (q31_t) (((q63_t) fcurr1 * (*pk)) >> 31) + fcurr2; 
+    gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 31) + fcurr1; 
+ 
+ 
+    /* Read next two samples from input buffer */ 
+    /* f0(n+2) = x(n+2) */ 
+    fcurr3 = *pSrc++; 
+    fcurr4 = *pSrc++; 
+ 
+    /* Copy only last input samples into the state buffer  
+       which will be used for next four samples processing */ 
+    *px++ = (q31_t) fcurr4; 
+ 
+    /* Process third sample for first tap */ 
+    fnext3 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 31) + fcurr3; 
+    gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk)) >> 31) + fcurr2; 
+ 
+    /* Process fourth sample for first tap */ 
+    fnext4 = (q31_t) (((q63_t) fcurr3 * (*pk)) >> 31) + fcurr4; 
+    gnext4 = (q31_t) (((q63_t) fcurr4 * (*pk++)) >> 31) + fcurr3; 
+ 
+    /* save g1(n) in state buffer for next sample processing */ 
+    /* *px++ = gnext4;       */ 
+ 
+    /* Update of f values for next coefficient set processing */ 
+    fcurr1 = fnext1; 
+    fcurr2 = fnext2; 
+    fcurr3 = fnext3; 
+    fcurr4 = fnext4; 
+ 
+ 
+    /* Loop unrolling.  Process 4 taps at a time . */ 
+    stageCnt = (numStages - 1u) >> 2u; 
+ 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numStages-3 coefficients. */ 
+ 
+    /* Process 2nd, 3rd, 4th and 5th taps ... here */ 
+    while(stageCnt > 0u) 
+    { 
+      /* Read g1(n-1), g3(n-1) .... from state */ 
+      gcurr1 = *px; 
+ 
+      /* save g1(n) in state buffer */ 
+      *px++ = (q31_t) gnext4; 
+ 
+      /* Process first sample for 2nd, 6th .. tap */ 
+      /* Sample processing for K2, K6.... */ 
+      /* f2(n) = f1(n) +  K2 * g1(n-1) */ 
+      fnext1 = (q31_t) (((q63_t) gcurr1 * (*pk)) >> 31) + fcurr1; 
+      /* Process second sample for 2nd, 6th .. tap */ 
+      /* for sample 2 processing */ 
+      fnext2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 31) + fcurr2; 
+      /* Process third sample for 2nd, 6th .. tap */ 
+      fnext3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 31) + fcurr3; 
+      /* Process fourth sample for 2nd, 6th .. tap */ 
+      fnext4 = (q31_t) (((q63_t) gnext3 * (*pk)) >> 31) + fcurr4; 
+ 
+      /* g2(n) = f1(n) * K2  +  g1(n-1) */ 
+      /* Calculation of state values for next stage */ 
+      gnext4 = (q31_t) (((q63_t) fcurr4 * (*pk)) >> 31) + gnext3; 
+      gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk)) >> 31) + gnext2; 
+      gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 31) + gnext1; 
+      gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk++)) >> 31) + gcurr1; 
+ 
+ 
+      /* Read g2(n-1), g4(n-1) .... from state */ 
+      gcurr1 = *px; 
+ 
+      /* save g2(n) in state buffer */ 
+      *px++ = (q31_t) gnext4; 
+ 
+      /* Sample processing for K3, K7.... */ 
+      /* Process first sample for 3rd, 7th .. tap */ 
+      /* f3(n) = f2(n) +  K3 * g2(n-1) */ 
+      fcurr1 = (q31_t) (((q63_t) gcurr1 * (*pk)) >> 31) + fnext1; 
+      /* Process second sample for 3rd, 7th .. tap */ 
+      fcurr2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 31) + fnext2; 
+      /* Process third sample for 3rd, 7th .. tap */ 
+      fcurr3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 31) + fnext3; 
+      /* Process fourth sample for 3rd, 7th .. tap */ 
+      fcurr4 = (q31_t) (((q63_t) gnext3 * (*pk)) >> 31) + fnext4; 
+ 
+      /* Calculation of state values for next stage */ 
+      /*  gnext4 = fnext4 * (*pk) +  gnext3; */ 
+      gnext4 = (q31_t) (((q63_t) fnext4 * (*pk)) >> 31) + gnext3; 
+      gnext3 = (q31_t) (((q63_t) fnext3 * (*pk)) >> 31) + gnext2; 
+      /*  gnext2 = fnext2 * (*pk) +  gnext1; */ 
+      gnext2 = (q31_t) (((q63_t) fnext2 * (*pk)) >> 31) + gnext1; 
+ 
+      /* g1(n) = f0(n) * K1  +  g0(n-1) */ 
+      /*  gnext1 = fnext1 * (*pk++) +  gcurr1; */ 
+      gnext1 = (q31_t) (((q63_t) fnext1 * (*pk++)) >> 31) + gcurr1; 
+ 
+      /* Read g1(n-1), g3(n-1) .... from state */ 
+      gcurr1 = *px; 
+ 
+      /* save g1(n) in state buffer */ 
+      *px++ = (q31_t) gnext4; 
+ 
+      /* Sample processing for K4, K8.... */ 
+      /* Process first sample for 4th, 8th .. tap */ 
+      /* f4(n) = f3(n) +  K4 * g3(n-1) */ 
+      fnext1 = (q31_t) (((q63_t) gcurr1 * (*pk)) >> 31) + fcurr1; 
+      /* Process second sample for 4th, 8th .. tap */ 
+      /* for sample 2 processing */ 
+      fnext2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 31) + fcurr2; 
+      /* Process third sample for 4th, 8th .. tap */ 
+      fnext3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 31) + fcurr3; 
+      /* Process fourth sample for 4th, 8th .. tap */ 
+      fnext4 = (q31_t) (((q63_t) gnext3 * (*pk)) >> 31) + fcurr4; 
+ 
+      /* g4(n) = f3(n) * K4  +  g3(n-1) */ 
+      /* Calculation of state values for next stage */ 
+      gnext4 = (q31_t) (((q63_t) fcurr4 * (*pk)) >> 31) + gnext3; 
+      gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk)) >> 31) + gnext2; 
+      gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 31) + gnext1; 
+      gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk++)) >> 31) + gcurr1; 
+ 
+      /* Read g2(n-1), g4(n-1) .... from state */ 
+      gcurr1 = *px; 
+ 
+      /* save g4(n) in state buffer */ 
+      *px++ = (q31_t) gnext4; 
+ 
+      /* Sample processing for K5, K9.... */ 
+      /* Process first sample for 5th, 9th .. tap */ 
+      /* f5(n) = f4(n) +  K5 * g4(n-1) */ 
+      fcurr1 = (q31_t) (((q63_t) gcurr1 * (*pk)) >> 31) + fnext1; 
+      /* Process second sample for 5th, 9th .. tap */ 
+      fcurr2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 31) + fnext2; 
+      /* Process third sample for 5th, 9th .. tap */ 
+      fcurr3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 31) + fnext3; 
+      /* Process fourth sample for 5th, 9th .. tap */ 
+      fcurr4 = (q31_t) (((q63_t) gnext3 * (*pk)) >> 31) + fnext4; 
+ 
+      /* Calculation of state values for next stage */ 
+      /* g5(n) = f4(n) * K5  +  g4(n-1) */ 
+      gnext4 = (q31_t) (((q63_t) fnext4 * (*pk)) >> 31) + gnext3; 
+      gnext3 = (q31_t) (((q63_t) fnext3 * (*pk)) >> 31) + gnext2; 
+      gnext2 = (q31_t) (((q63_t) fnext2 * (*pk)) >> 31) + gnext1; 
+      gnext1 = (q31_t) (((q63_t) fnext1 * (*pk++)) >> 31) + gcurr1; 
+ 
+      stageCnt--; 
+    } 
+ 
+    /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */ 
+    stageCnt = (numStages - 1u) % 0x4u; 
+ 
+    while(stageCnt > 0u) 
+    { 
+      gcurr1 = *px; 
+ 
+      /* save g value in state buffer */ 
+      *px++ = (q31_t) gnext4; 
+ 
+      /* Process four samples for last three taps here */ 
+      fnext1 = (q31_t) (((q63_t) gcurr1 * (*pk)) >> 31) + fcurr1; 
+      fnext2 = (q31_t) (((q63_t) gnext1 * (*pk)) >> 31) + fcurr2; 
+      fnext3 = (q31_t) (((q63_t) gnext2 * (*pk)) >> 31) + fcurr3; 
+      fnext4 = (q31_t) (((q63_t) gnext3 * (*pk)) >> 31) + fcurr4; 
+ 
+      /* g1(n) = f0(n) * K1  +  g0(n-1) */ 
+      gnext4 = (q31_t) (((q63_t) fcurr4 * (*pk)) >> 31) + gnext3; 
+      gnext3 = (q31_t) (((q63_t) fcurr3 * (*pk)) >> 31) + gnext2; 
+      gnext2 = (q31_t) (((q63_t) fcurr2 * (*pk)) >> 31) + gnext1; 
+      gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk++)) >> 31) + gcurr1; 
+ 
+      /* Update of f values for next coefficient set processing */ 
+      fcurr1 = fnext1; 
+      fcurr2 = fnext2; 
+      fcurr3 = fnext3; 
+      fcurr4 = fnext4; 
+ 
+      stageCnt--; 
+ 
+    } 
+ 
+    /* The results in the 4 accumulators, store in the destination buffer. */ 
+    /* y(n) = fN(n) */ 
+    *pDst++ = fcurr1; 
+    *pDst++ = (q31_t) fcurr2; 
+    *pDst++ = (q31_t) fcurr3; 
+    *pDst++ = (q31_t) fcurr4; 
+ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* f0(n) = x(n) */ 
+    fcurr1 = *pSrc++; 
+ 
+    /* Initialize coeff pointer */ 
+    pk = (pCoeffs); 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* read g2(n) from state buffer */ 
+    gcurr1 = *px; 
+ 
+    /* for sample 1 processing */ 
+    /* f1(n) = f0(n) +  K1 * g0(n-1) */ 
+    fnext1 = (q31_t) (((q63_t) gcurr1 * (*pk)) >> 31) + fcurr1; 
+    /* g1(n) = f0(n) * K1  +  g0(n-1) */ 
+    gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk++)) >> 31) + gcurr1; 
+    /* save g1(n) in state buffer */ 
+    *px++ = fcurr1; 
+ 
+    /* f1(n) is saved in fcurr1  
+       for next stage processing */ 
+    fcurr1 = fnext1; 
+ 
+    stageCnt = (numStages - 1u); 
+ 
+    /* stage loop */ 
+    while(stageCnt > 0u) 
+    { 
+      /* read g2(n) from state buffer */ 
+      gcurr1 = *px; 
+ 
+      /* save g1(n) in state buffer */ 
+      *px++ = gnext1; 
+ 
+      /* Sample processing for K2, K3.... */ 
+      /* f2(n) = f1(n) +  K2 * g1(n-1) */ 
+      fnext1 = (q31_t) (((q63_t) gcurr1 * (*pk)) >> 31) + fcurr1; 
+      /* g2(n) = f1(n) * K2  +  g1(n-1) */ 
+      gnext1 = (q31_t) (((q63_t) fcurr1 * (*pk++)) >> 31) + gcurr1; 
+ 
+      /* f1(n) is saved in fcurr1  
+         for next stage processing */ 
+      fcurr1 = fnext1; 
+ 
+      stageCnt--; 
+ 
+    } 
+ 
+    /* y(n) = fN(n) */ 
+    *pDst++ = fcurr1; 
+ 
+    blkCnt--; 
+ 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR_Lattice group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,274 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_q15.c  
+*  
+* Description:  Q15 FIR filter processing function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q15 FIR filter.  
+ * @param[in] *S points to an instance of the Q15 FIR structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data.  
+ * @param[in]  blockSize number of samples to process per call.  
+ * @return none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.  
+ * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.  
+ * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
+ * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.  
+ * Lastly, the accumulator is saturated to yield a result in 1.15 format.  
+ *  
+ * \par  
+ * Refer to the function <code>arm_fir_fast_q15()</code> for a faster but less precise implementation of this function.  
+ */ 
+ 
+void arm_fir_q15( 
+  const arm_fir_instance_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pState = S->pState;                     /* State pointer */ 
+  q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q15_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q15_t *px1;                                    /* Temporary q15 pointer for state buffer */ 
+  q31_t *pb;                                     /* Temporary pointer for coefficient buffer */ 
+  q31_t *px2;                                    /* Temporary q31 pointer for SIMD state buffer accesses */ 
+  q31_t x0, x1, x2, x3, c0;                      /* Temporary variables to hold SIMD state and coefficient values */ 
+  q63_t acc0, acc1, acc2, acc3;                  /* Accumulators */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of taps in the filter */ 
+  uint32_t tapCnt, blkCnt;                       /* Loop counters */ 
+ 
+  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = &(S->pState[(numTaps - 1u)]); 
+ 
+  /* Apply loop unrolling and compute 4 output values simultaneously.  
+   * The variables acc0 ... acc3 hold output values that are being computed:  
+   *  
+   *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]  
+   *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]  
+   *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]  
+   *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]  
+   */ 
+  blkCnt = blockSize >> 2; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy four new input samples into the state buffer.  
+     ** Use 32-bit SIMD to move the 16-bit data.  Only requires two copies. */ 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++; 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++; 
+ 
+    /* Set all accumulators to zero */ 
+    acc0 = 0; 
+    acc1 = 0; 
+    acc2 = 0; 
+    acc3 = 0; 
+ 
+    /* Initialize state pointer of type q15 */ 
+    px1 = pState; 
+ 
+    /* Initialize coeff pointer of type q31 */ 
+    pb = (q31_t *) (pCoeffs); 
+ 
+    /* Read the first two samples from the state buffer:  x[n-N], x[n-N-1] */ 
+    x0 = *(q31_t *) (px1++); 
+ 
+    /* Read the third and forth samples from the state buffer: x[n-N-1], x[n-N-2] */ 
+    x1 = *(q31_t *) (px1++); 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numTaps-4 coefficients. */ 
+    tapCnt = numTaps >> 2; 
+    do 
+    { 
+      /* Read the first two coefficients using SIMD:  b[N] and b[N-1] coefficients */ 
+      c0 = *(pb++); 
+ 
+      /* acc0 +=  b[N] * x[n-N] + b[N-1] * x[n-N-1] */ 
+      acc0 = __SMLALD(x0, c0, acc0); 
+ 
+      /* acc1 +=  b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */ 
+      acc1 = __SMLALD(x1, c0, acc1); 
+ 
+      /* Read state x[n-N-2], x[n-N-3] */ 
+      x2 = *(q31_t *) (px1++); 
+ 
+      /* Read state x[n-N-3], x[n-N-4] */ 
+      x3 = *(q31_t *) (px1++); 
+ 
+      /* acc2 +=  b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */ 
+      acc2 = __SMLALD(x2, c0, acc2); 
+ 
+      /* acc3 +=  b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */ 
+      acc3 = __SMLALD(x3, c0, acc3); 
+ 
+      /* Read coefficients b[N-2], b[N-3] */ 
+      c0 = *(pb++); 
+ 
+      /* acc0 +=  b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */ 
+      acc0 = __SMLALD(x2, c0, acc0); 
+ 
+      /* acc1 +=  b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */ 
+      acc1 = __SMLALD(x3, c0, acc1); 
+ 
+      /* Read state x[n-N-4], x[n-N-5] */ 
+      x0 = *(q31_t *) (px1++); 
+ 
+      /* Read state x[n-N-5], x[n-N-6] */ 
+      x1 = *(q31_t *) (px1++); 
+ 
+      /* acc2 +=  b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */ 
+      acc2 = __SMLALD(x0, c0, acc2); 
+ 
+      /* acc3 +=  b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */ 
+      acc3 = __SMLALD(x1, c0, acc3); 
+      tapCnt--; 
+ 
+    } 
+    while(tapCnt > 0u); 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps.  
+     ** This is always be 2 taps since the filter length is even. */ 
+    if((numTaps & 0x3u) != 0u) 
+    { 
+      /* Read 2 coefficients */ 
+      c0 = *(pb++); 
+      /* Fetch 4 state variables */ 
+      x2 = *(q31_t *) (px1++); 
+      x3 = *(q31_t *) (px1++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 = __SMLALD(x0, c0, acc0); 
+      acc1 = __SMLALD(x1, c0, acc1); 
+      acc2 = __SMLALD(x2, c0, acc2); 
+      acc3 = __SMLALD(x3, c0, acc3); 
+    } 
+ 
+    /* The results in the 4 accumulators are in 2.30 format.  Convert to 1.15 with saturation.  
+     ** Then store the 4 outputs in the destination buffer. */ 
+    *__SIMD32(pDst)++ = 
+      __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16); 
+    *__SIMD32(pDst)++ = 
+      __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16); 
+ 
+ 
+    /* Advance the state pointer by 4 to process the next group of 4 samples */ 
+    pState = pState + 4; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy two samples into state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Set the accumulator to zero */ 
+    acc0 = 0; 
+ 
+    /* Use SIMD to hold states and coefficients */ 
+    px2 = (q31_t *) pState; 
+    pb = (q31_t *) (pCoeffs); 
+    tapCnt = numTaps >> 1; 
+ 
+    do 
+    { 
+      acc0 = __SMLALD(*px2++, *(pb++), acc0); 
+      tapCnt--; 
+    } 
+    while(tapCnt > 0u); 
+ 
+    /* The result is in 2.30 format.  Convert to 1.15 with saturation.  
+     ** Then store the output in the destination buffer. */ 
+    *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16)); 
+ 
+    /* Advance state pointer by 1 for the next sample */ 
+    pState = pState + 1; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  /* Calculation of count for copying integer writes */ 
+  tapCnt = (numTaps - 1u) >> 2; 
+ 
+  while(tapCnt > 0u) 
+  { 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+ 
+    tapCnt--; 
+ 
+  } 
+ 
+  /* Calculation of count for remaining q15_t data */ 
+  tapCnt = (numTaps - 1u) % 0x4u; 
+ 
+  /* copy remaining data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,300 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_q31.c  
+*  
+* Description:	Q31 FIR filter processing function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR  
+ * @{  
+ */ 
+ 
+/**  
+ * @param[in] *S points to an instance of the Q31 FIR filter structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data.  
+ * @param[in] blockSize number of samples to process per call.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * Thus, if the accumulator result overflows it wraps around rather than clip.  
+ * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits.  
+ * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.  
+ *  
+ * \par  
+ * Refer to the function <code>arm_fir_fast_q31()</code> for a faster but less precise implementation of this filter.  
+ */ 
+ 
+void arm_fir_q31( 
+  const arm_fir_instance_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pState = S->pState;                     /* State pointer */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q31_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q31_t x0, x1, x2, x3;                          /* Temporary variables to hold state */ 
+  q31_t c0;                                      /* Temporary variable to hold coefficient value */ 
+  q31_t *px;                                     /* Temporary pointer for state */ 
+  q31_t *pb;                                     /* Temporary pointer for coefficient buffer */ 
+  q63_t acc0, acc1, acc2, acc3;                  /* Accumulators */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */ 
+  uint32_t i, tapCnt, blkCnt;                    /* Loop counters */ 
+ 
+  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = &(S->pState[(numTaps - 1u)]); 
+ 
+  /* Apply loop unrolling and compute 4 output values simultaneously.  
+   * The variables acc0 ... acc3 hold output values that are being computed:  
+   *  
+   *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]  
+   *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]  
+   *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]  
+   *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]  
+   */ 
+  blkCnt = blockSize >> 2; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy four new input samples into the state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+    *pStateCurnt++ = *pSrc++; 
+    *pStateCurnt++ = *pSrc++; 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Set all accumulators to zero */ 
+    acc0 = 0; 
+    acc1 = 0; 
+    acc2 = 0; 
+    acc3 = 0; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize coefficient pointer */ 
+    pb = pCoeffs; 
+ 
+    /* Read the first three samples from the state buffer:  
+     *  x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */ 
+    x0 = *(px++); 
+    x1 = *(px++); 
+    x2 = *(px++); 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+    i = tapCnt; 
+ 
+    while(i > 0u) 
+    { 
+      /* Read the b[numTaps] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-3] sample */ 
+      x3 = *(px++); 
+ 
+      /* acc0 +=  b[numTaps] * x[n-numTaps] */ 
+      acc0 += ((q63_t) x0 * c0); 
+ 
+      /* acc1 +=  b[numTaps] * x[n-numTaps-1] */ 
+      acc1 += ((q63_t) x1 * c0); 
+ 
+      /* acc2 +=  b[numTaps] * x[n-numTaps-2] */ 
+      acc2 += ((q63_t) x2 * c0); 
+ 
+      /* acc3 +=  b[numTaps] * x[n-numTaps-3] */ 
+      acc3 += ((q63_t) x3 * c0); 
+ 
+      /* Read the b[numTaps-1] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-4] sample */ 
+      x0 = *(px++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 += ((q63_t) x1 * c0); 
+      acc1 += ((q63_t) x2 * c0); 
+      acc2 += ((q63_t) x3 * c0); 
+      acc3 += ((q63_t) x0 * c0); 
+ 
+      /* Read the b[numTaps-2] coefficient */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-5] sample */ 
+      x1 = *(px++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 += ((q63_t) x2 * c0); 
+      acc1 += ((q63_t) x3 * c0); 
+      acc2 += ((q63_t) x0 * c0); 
+      acc3 += ((q63_t) x1 * c0); 
+      /* Read the b[numTaps-3] coefficients */ 
+      c0 = *(pb++); 
+ 
+      /* Read x[n-numTaps-6] sample */ 
+      x2 = *(px++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 += ((q63_t) x3 * c0); 
+      acc1 += ((q63_t) x0 * c0); 
+      acc2 += ((q63_t) x1 * c0); 
+      acc3 += ((q63_t) x2 * c0); 
+      i--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+ 
+    i = numTaps - (tapCnt * 4u); 
+    while(i > 0u) 
+    { 
+      /* Read coefficients */ 
+      c0 = *(pb++); 
+ 
+      /* Fetch 1 state variable */ 
+      x3 = *(px++); 
+ 
+      /* Perform the multiply-accumulates */ 
+      acc0 += ((q63_t) x0 * c0); 
+      acc1 += ((q63_t) x1 * c0); 
+      acc2 += ((q63_t) x2 * c0); 
+      acc3 += ((q63_t) x3 * c0); 
+ 
+      /* Reuse the present sample states for next sample */ 
+      x0 = x1; 
+      x1 = x2; 
+      x2 = x3; 
+ 
+      /* Decrement the loop counter */ 
+      i--; 
+    } 
+ 
+    /* Advance the state pointer by 4 to process the next group of 4 samples */ 
+    pState = pState + 4; 
+ 
+    /* The results in the 4 accumulators are in 2.30 format.  Convert to 1.31  
+     ** Then store the 4 outputs in the destination buffer. */ 
+    *pDst++ = (q31_t) (acc0 >> 31u); 
+    *pDst++ = (q31_t) (acc1 >> 31u); 
+    *pDst++ = (q31_t) (acc2 >> 31u); 
+    *pDst++ = (q31_t) (acc3 >> 31u); 
+ 
+    /* Decrement the samples loop counter */ 
+    blkCnt--; 
+  } 
+ 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy one sample at a time into state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Set the accumulator to zero */ 
+    acc0 = 0; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize Coefficient pointer */ 
+    pb = (pCoeffs); 
+ 
+    i = numTaps; 
+ 
+    /* Perform the multiply-accumulates */ 
+    do 
+    { 
+      acc0 += (q63_t) * (px++) * (*(pb++)); 
+      i--; 
+    } while(i > 0u); 
+ 
+    /* The result is in 2.62 format.  Convert to 1.31  
+     ** Then store the output in the destination buffer. */ 
+    *pDst++ = (q31_t) (acc0 >> 31u); 
+ 
+    /* Advance state pointer by 1 for the next sample */ 
+    pState = pState + 1; 
+ 
+    /* Decrement the samples loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  tapCnt = (numTaps - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  /* Calculate remaining number of copies */ 
+  tapCnt = (numTaps - 1u) % 0x4u; 
+ 
+  /* Copy the remaining q31_t data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of FIR group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,207 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_q7.c  
+*  
+* Description:  Q7 FIR filter processing function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR  
+ * @{  
+ */ 
+ 
+/**  
+ * @param[in]   *S points to an instance of the Q7 FIR filter structure.  
+ * @param[in]   *pSrc points to the block of input data.  
+ * @param[out]  *pDst points to the block of output data.  
+ * @param[in]   blockSize number of samples to process per call.  
+ * @return 	none.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using a 32-bit internal accumulator.  
+ * Both coefficients and state variables are represented in 1.7 format and multiplications yield a 2.14 result.  
+ * The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format.  
+ * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
+ * The accumulator is converted to 18.7 format by discarding the low 7 bits.  
+ * Finally, the result is truncated to 1.7 format.  
+ */ 
+ 
+void arm_fir_q7( 
+  const arm_fir_instance_q7 * S, 
+  q7_t * pSrc, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t numTaps = S->numTaps;                 /* Number of taps in the filter */ 
+  uint32_t i, blkCnt;                            /* Loop counters */ 
+  q7_t *pState = S->pState;                      /* State pointer */ 
+  q7_t *pCoeffs = S->pCoeffs;                    /* Coefficient pointer */ 
+  q7_t *px, *pb;                                 /* Temporary pointers to state and coeff */ 
+  q31_t acc = 0;                                 /* Accumlator */ 
+  q31_t input1, input2;                          /* Temporary variables to store input */ 
+  q15_t in1, in2;                                /* Temporary variables to store input */ 
+  q7_t *pStateCurnt;                             /* Points to the current sample of the state */ 
+ 
+ 
+  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = S->pState + (numTaps - 1u); 
+ 
+  i = blockSize >> 2u; 
+ 
+  /* Copy four new input samples into the state buffer.  
+   ** Use 32-bit SIMD to move the four 8-bit data.  Only requires one copy for every four samples. */ 
+  while(i > 0u) 
+  { 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++; 
+    i--; 
+  } 
+ 
+  i = blockSize % 0x4u; 
+ 
+  /* Copy remining samples into the state buffer. */ 
+  while(i > 0u) 
+  { 
+    *pStateCurnt++ = *pSrc++; 
+    i--; 
+  } 
+ 
+  blkCnt = blockSize; 
+ 
+  /* Perform filtering upto BlockSize - BlockSize%4  */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Set accumulator to zero */ 
+    acc = 0; 
+ 
+    /* Initialize state pointer of type q7 */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer of type q7 */ 
+    pb = pCoeffs; 
+ 
+    i = numTaps >> 2u; 
+ 
+    /* Loop over the number of taps.  Unroll by a factor of 4.  
+     ** Repeat until we've computed numTaps-4 coefficients. */ 
+    while(i > 0u) 
+    { 
+      /* Reading two inputs of state buffer and packing */ 
+      in1 = (q15_t) * px++; 
+      in2 = (q15_t) * px++; 
+      input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* Reading two inputs of coefficient buffer and packing */ 
+      in1 = (q15_t) * pb++; 
+      in2 = (q15_t) * pb++; 
+      input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* Perform Multiply and accumlation of 2 packed inputs and coefficients using SMLALD and store the result in accumlator. */ 
+      acc = __SMLAD(input1, input2, acc); 
+ 
+      /* Reading two inputs of state buffer and packing */ 
+      in1 = (q15_t) * px++; 
+      in2 = (q15_t) * px++; 
+      input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* Reading two inputs of coefficient buffer and packing */ 
+      in1 = (q15_t) * pb++; 
+      in2 = (q15_t) * pb++; 
+      input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+      /* Perform Multiply and accumlation of 2 packed inputs and coefficients using SMLALD and store the result in accumlator. */ 
+      acc = __SMLAD(input1, input2, acc); 
+ 
+      /* Decrement the tap loop counter */ 
+      i--; 
+    } 
+ 
+    i = numTaps % 0x4u; 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    while(i > 0u) 
+    { 
+      acc = __SMLAD(*px++, *pb++, acc); 
+      i--; 
+ 
+    } 
+ 
+    /* Saturate output */ 
+    acc = __SSAT((acc >> 7), 8); 
+ 
+    /*Store filter output */ 
+    *pDst++ = (q7_t) (acc); 
+ 
+    /* Advance the state pointer by 1 to process the next sample */ 
+    pState = pState + 1; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete.  
+   ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.  
+   ** This prepares the state buffer for the next function call. */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  /* Calculation of count for copying integer writes */ 
+  i = (numTaps - 1u) >> 2u; 
+ 
+  /* Copy four values using integer pointer */ 
+  while(i > 0u) 
+  { 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+ 
+    i--; 
+ 
+  } 
+ 
+  /* Calculation of count for remaining q7_t data */ 
+  i = (numTaps - 1u) % 0x4u; 
+ 
+  /* Copy of remaining q7_t data */ 
+  while(i > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    i--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of FIR group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,275 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_sparse_f32.c  
+*  
+* Description:	Floating-point sparse FIR filter processing function. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ------------------------------------------------------------------- */ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup FIR_Sparse Finite Impulse Response (FIR) Sparse Filters  
+ *  
+ * This group of functions implements sparse FIR filters.   
+ * Sparse FIR filters are equivalent to standard FIR filters except that most of the coefficients are equal to zero. 
+ * Sparse filters are used for simulating reflections in communications and audio applications. 
+ * 
+ * There are separate functions for Q7, Q15, Q31, and floating-point data types.  
+ * The functions operate on blocks  of input and output data and each call to the function processes  
+ * <code>blockSize</code> samples through the filter.  <code>pSrc</code> and  
+ * <code>pDst</code> points to input and output arrays respectively containing <code>blockSize</code> values.  
+ *  
+ * \par Algorithm:  
+ * The sparse filter instant structure contains an array of tap indices <code>pTapDelay</code> which specifies the locations of the non-zero coefficients. 
+ * This is in addition to the coefficient array <code>b</code>. 
+ * The implementation essentially skips the multiplications by zero and leads to an efficient realization. 
+ * <pre> 
+ *     y[n] = b[0] * x[n-pTapDelay[0]] + b[1] * x[n-pTapDelay[1]] + b[2] * x[n-pTapDelay[2]] + ...+ b[numTaps-1] * x[n-pTapDelay[numTaps-1]]  
+ * </pre>  
+ * \par  
+ * \image html FIRSparse.gif "Sparse FIR filter.  b[n] represents the filter coefficients" 
+ * \par  
+ * <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>;  
+ * <code>pTapDelay</code> points to an array of nonzero indices and is also of size <code>numTaps</code>; 
+ * <code>pState</code> points to a state array of size <code>maxDelay + blockSize</code>, where 
+ * <code>maxDelay</code> is the largest offset value that is ever used in the <code>pTapDelay</code> array. 
+ * Some of the processing functions also require temporary working buffers. 
+ * 
+ * \par Instance Structure  
+ * The coefficients and state variables for a filter are stored together in an instance data structure.  
+ * A separate instance structure must be defined for each filter.  
+ * Coefficient and offset arrays may be shared among several instances while state variable arrays cannot be shared.  
+ * There are separate instance structure declarations for each of the 4 supported data types.  
+ *  
+ * \par Initialization Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Zeros out the values in the state buffer.  
+ *  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * Set the values in the state buffer to zeros before static initialization.  
+ * The code below statically initializes each of the 4 different data type filter instance structures  
+ * <pre>  
+ *arm_fir_sparse_instance_f32 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};  
+ *arm_fir_sparse_instance_q31 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};  
+ *arm_fir_sparse_instance_q15 S = {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};  
+ *arm_fir_sparse_instance_q7 S =  {numTaps, 0, pState, pCoeffs, maxDelay, pTapDelay};  
+ * </pre>  
+ * \par  
+ *  
+ * \par Fixed-Point Behavior  
+ * Care must be taken when using the fixed-point versions of the sparse FIR filter functions.  
+ * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.  
+ * Refer to the function specific documentation below for usage guidelines.  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Sparse  
+ * @{  
+ */ 
+ 
+/** 
+ * @brief Processing function for the floating-point sparse FIR filter. 
+ * @param[in]  *S          points to an instance of the floating-point sparse FIR structure. 
+ * @param[in]  *pSrc       points to the block of input data. 
+ * @param[out] *pDst       points to the block of output data 
+ * @param[in]  *pScratchIn points to a temporary buffer of size blockSize. 
+ * @param[in]  blockSize   number of input samples to process per call. 
+ * @return none. 
+ */ 
+ 
+void arm_fir_sparse_f32( 
+  arm_fir_sparse_instance_f32 * S, 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  float32_t * pScratchIn, 
+  uint32_t blockSize) 
+{ 
+ 
+  float32_t *pState = S->pState;                 /* State pointer */ 
+  float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */ 
+  float32_t *px;                                 /* Scratch buffer pointer */ 
+  float32_t *py = pState;                        /* Temporary pointers for state buffer */ 
+  float32_t *pb = pScratchIn;                    /* Temporary pointers for scratch buffer */ 
+  float32_t *pOut;                               /* Destination pointer */ 
+  int32_t *pTapDelay = S->pTapDelay;             /* Pointer to the array containing offset of the non-zero tap values. */ 
+  uint32_t delaySize = S->maxDelay + blockSize;  /* state length */ 
+  uint16_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter  */ 
+  int32_t readIndex;                             /* Read index of the state buffer */ 
+  uint32_t tapCnt, blkCnt;                       /* loop counters */ 
+  float32_t coeff = *pCoeffs++;                  /* Read the first coefficient value */ 
+ 
+ 
+ 
+  /* BlockSize of Input samples are copied into the state buffer */ 
+  /* StateIndex points to the starting position to write in the state buffer */ 
+  arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1, 
+                        (int32_t *) pSrc, 1, blockSize); 
+ 
+ 
+  /* Read Index, from where the state buffer should be read, is calculated. */ 
+  readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 
+ 
+  /* Wraparound of readIndex */ 
+  if(readIndex < 0) 
+  { 
+    readIndex += (int32_t) delaySize; 
+  } 
+ 
+  /* Working pointer for state buffer is updated */ 
+  py = pState; 
+ 
+  /* blockSize samples are read from the state buffer */ 
+  arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 
+                       (int32_t *) pb, (int32_t *) pb, blockSize, 1, 
+                       blockSize); 
+ 
+  /* Working pointer for the scratch buffer */ 
+  px = pb; 
+ 
+  /* Working pointer for destination buffer */ 
+  pOut = pDst; 
+ 
+  /* Loop over the blockSize. Unroll by a factor of 4.  
+   * Compute 4 Multiplications at a time. */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Perform Multiplications and store in destination buffer */ 
+    *pOut++ = *px++ * coeff; 
+    *pOut++ = *px++ * coeff; 
+    *pOut++ = *px++ * coeff; 
+    *pOut++ = *px++ * coeff; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4,  
+   * compute the remaining samples */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Perform Multiplications and store in destination buffer */ 
+    *pOut++ = *px++ * coeff; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Load the coefficient value and  
+   * increment the coefficient buffer for the next set of state values */ 
+  coeff = *pCoeffs++; 
+ 
+  /* Read Index, from where the state buffer should be read, is calculated. */ 
+  readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 
+ 
+  /* Wraparound of readIndex */ 
+  if(readIndex < 0) 
+  { 
+    readIndex += (int32_t) delaySize; 
+  } 
+ 
+  /* Loop over the number of taps. */ 
+  tapCnt = (uint32_t) numTaps - 1u; 
+ 
+  while(tapCnt > 0u) 
+  { 
+ 
+    /* Working pointer for state buffer is updated */ 
+    py = pState; 
+ 
+    /* blockSize samples are read from the state buffer */ 
+    arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 
+                         (int32_t *) pb, (int32_t *) pb, blockSize, 1, 
+                         blockSize); 
+ 
+    /* Working pointer for the scratch buffer */ 
+    px = pb; 
+ 
+    /* Working pointer for destination buffer */ 
+    pOut = pDst; 
+ 
+    /* Loop over the blockSize. Unroll by a factor of 4.  
+     * Compute 4 MACS at a time. */ 
+    blkCnt = blockSize >> 2u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Perform Multiply-Accumulate */ 
+      *pOut++ += *px++ * coeff; 
+      *pOut++ += *px++ * coeff; 
+      *pOut++ += *px++ * coeff; 
+      *pOut++ += *px++ * coeff; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4,  
+     * compute the remaining samples */ 
+    blkCnt = blockSize % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Perform Multiply-Accumulate */ 
+      *pOut++ += *px++ * coeff; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* Load the coefficient value and  
+     * increment the coefficient buffer for the next set of state values */ 
+    coeff = *pCoeffs++; 
+ 
+    /* Read Index, from where the state buffer should be read, is calculated. */ 
+    readIndex = ((int32_t) S->stateIndex - 
+                 (int32_t) blockSize) - *pTapDelay++; 
+ 
+    /* Wraparound of readIndex */ 
+    if(readIndex < 0) 
+    { 
+      readIndex += (int32_t) delaySize; 
+    } 
+ 
+    /* Decrement the tap loop counter */ 
+    tapCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of FIR_Sparse group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,96 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_sparse_init_f32.c  
+*  
+* Description:	Floating-point sparse FIR filter initialization function. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Sparse  
+ * @{  
+ */ 
+ 
+/** 
+ * @brief  Initialization function for the floating-point sparse FIR filter. 
+ * @param[in,out] *S         points to an instance of the floating-point sparse FIR structure. 
+ * @param[in]     numTaps    number of nonzero coefficients in the filter. 
+ * @param[in]     *pCoeffs   points to the array of filter coefficients. 
+ * @param[in]     *pState    points to the state buffer. 
+ * @param[in]     *pTapDelay points to the array of offset times. 
+ * @param[in]     maxDelay   maximum offset time supported. 
+ * @param[in]     blockSize  number of samples that will be processed per block. 
+ * @return none 
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> holds the filter coefficients and has length <code>numTaps</code>.  
+ * <code>pState</code> holds the filter's state variables and must be of length  
+ * <code>maxDelay + blockSize</code>, where <code>maxDelay</code>  
+ * is the maximum number of delay line values.  
+ * <code>blockSize</code> is the  
+ * number of samples processed by the <code>arm_fir_sparse_f32()</code> function.  
+ */ 
+ 
+void arm_fir_sparse_init_f32( 
+  arm_fir_sparse_instance_f32 * S, 
+  uint16_t numTaps, 
+  float32_t * pCoeffs, 
+  float32_t * pState, 
+  int32_t * pTapDelay, 
+  uint16_t maxDelay, 
+  uint32_t blockSize) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Assign TapDelay pointer */ 
+  S->pTapDelay = pTapDelay; 
+ 
+  /* Assign MaxDelay */ 
+  S->maxDelay = maxDelay; 
+ 
+  /* reset the stateIndex to 0 */ 
+  S->stateIndex = 0u; 
+ 
+  /* Clear state buffer and size is always maxDelay + blockSize */ 
+  memset(pState, 0, (maxDelay + blockSize) * sizeof(float32_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+} 
+ 
+/**  
+ * @} end of FIR_Sparse group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,96 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_sparse_init_q15.c  
+*  
+* Description:	Q15 sparse FIR filter initialization function. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Sparse  
+ * @{  
+ */ 
+ 
+/** 
+ * @brief  Initialization function for the Q15 sparse FIR filter. 
+ * @param[in,out] *S         points to an instance of the Q15 sparse FIR structure. 
+ * @param[in]     numTaps    number of nonzero coefficients in the filter. 
+ * @param[in]     *pCoeffs   points to the array of filter coefficients. 
+ * @param[in]     *pState    points to the state buffer. 
+ * @param[in]     *pTapDelay points to the array of offset times. 
+ * @param[in]     maxDelay   maximum offset time supported. 
+ * @param[in]     blockSize  number of samples that will be processed per block. 
+ * @return none 
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> holds the filter coefficients and has length <code>numTaps</code>.  
+ * <code>pState</code> holds the filter's state variables and must be of length  
+ * <code>maxDelay + blockSize</code>, where <code>maxDelay</code>  
+ * is the maximum number of delay line values.  
+ * <code>blockSize</code> is the  
+ * number of words processed by <code>arm_fir_sparse_q15()</code> function.  
+ */ 
+ 
+void arm_fir_sparse_init_q15( 
+  arm_fir_sparse_instance_q15 * S, 
+  uint16_t numTaps, 
+  q15_t * pCoeffs, 
+  q15_t * pState, 
+  int32_t * pTapDelay, 
+  uint16_t maxDelay, 
+  uint32_t blockSize) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Assign TapDelay pointer */ 
+  S->pTapDelay = pTapDelay; 
+ 
+  /* Assign MaxDelay */ 
+  S->maxDelay = maxDelay; 
+ 
+  /* reset the stateIndex to 0 */ 
+  S->stateIndex = 0u; 
+ 
+  /* Clear state buffer and size is always maxDelay + blockSize */ 
+  memset(pState, 0, (maxDelay + blockSize) * sizeof(q15_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+} 
+ 
+/**  
+ * @} end of FIR_Sparse group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,95 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_sparse_init_q31.c  
+*  
+* Description:	Q31 sparse FIR filter initialization function. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Sparse  
+ * @{  
+ */ 
+ 
+/** 
+ * @brief  Initialization function for the Q31 sparse FIR filter. 
+ * @param[in,out] *S         points to an instance of the Q31 sparse FIR structure. 
+ * @param[in]     numTaps    number of nonzero coefficients in the filter. 
+ * @param[in]     *pCoeffs   points to the array of filter coefficients. 
+ * @param[in]     *pState    points to the state buffer. 
+ * @param[in]     *pTapDelay points to the array of offset times. 
+ * @param[in]     maxDelay   maximum offset time supported. 
+ * @param[in]     blockSize  number of samples that will be processed per block. 
+ * @return none 
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> holds the filter coefficients and has length <code>numTaps</code>.  
+ * <code>pState</code> holds the filter's state variables and must be of length  
+ * <code>maxDelay + blockSize</code>, where <code>maxDelay</code>  
+ * is the maximum number of delay line values.  
+ * <code>blockSize</code> is the number of words processed by <code>arm_fir_sparse_q31()</code> function.  
+ */ 
+ 
+void arm_fir_sparse_init_q31( 
+  arm_fir_sparse_instance_q31 * S, 
+  uint16_t numTaps, 
+  q31_t * pCoeffs, 
+  q31_t * pState, 
+  int32_t * pTapDelay, 
+  uint16_t maxDelay, 
+  uint32_t blockSize) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Assign TapDelay pointer */ 
+  S->pTapDelay = pTapDelay; 
+ 
+  /* Assign MaxDelay */ 
+  S->maxDelay = maxDelay; 
+ 
+  /* reset the stateIndex to 0 */ 
+  S->stateIndex = 0u; 
+ 
+  /* Clear state buffer and size is always maxDelay + blockSize */ 
+  memset(pState, 0, (maxDelay + blockSize) * sizeof(q31_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+} 
+ 
+/**  
+ * @} end of FIR_Sparse group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_init_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,96 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_fir_sparse_init_q7.c  
+*  
+* Description:	Q7 sparse FIR filter initialization function. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Sparse  
+ * @{  
+ */ 
+ 
+/** 
+ * @brief  Initialization function for the Q7 sparse FIR filter. 
+ * @param[in,out] *S         points to an instance of the Q7 sparse FIR structure. 
+ * @param[in]     numTaps    number of nonzero coefficients in the filter. 
+ * @param[in]     *pCoeffs   points to the array of filter coefficients. 
+ * @param[in]     *pState    points to the state buffer. 
+ * @param[in]     *pTapDelay points to the array of offset times. 
+ * @param[in]     maxDelay   maximum offset time supported. 
+ * @param[in]     blockSize  number of samples that will be processed per block. 
+ * @return none 
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> holds the filter coefficients and has length <code>numTaps</code>.  
+ * <code>pState</code> holds the filter's state variables and must be of length  
+ * <code>maxDelay + blockSize</code>, where <code>maxDelay</code>  
+ * is the maximum number of delay line values.  
+ * <code>blockSize</code> is the  
+ * number of samples processed by the <code>arm_fir_sparse_q7()</code> function.  
+ */ 
+ 
+void arm_fir_sparse_init_q7( 
+  arm_fir_sparse_instance_q7 * S, 
+  uint16_t numTaps, 
+  q7_t * pCoeffs, 
+  q7_t * pState, 
+  int32_t * pTapDelay, 
+  uint16_t maxDelay, 
+  uint32_t blockSize) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Assign TapDelay pointer */ 
+  S->pTapDelay = pTapDelay; 
+ 
+  /* Assign MaxDelay */ 
+  S->maxDelay = maxDelay; 
+ 
+  /* reset the stateIndex to 0 */ 
+  S->stateIndex = 0u; 
+ 
+  /* Clear state buffer and size is always maxDelay + blockSize */ 
+  memset(pState, 0, (maxDelay + blockSize) * sizeof(q7_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+} 
+ 
+/**  
+ * @} end of FIR_Sparse group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,255 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_sparse_q15.c  
+*  
+* Description:	Q15 sparse FIR filter processing function. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ------------------------------------------------------------------- */ 
+#include "arm_math.h" 
+ 
+/**  
+ * @addtogroup FIR_Sparse  
+ * @{  
+ */ 
+ 
+/** 
+ * @brief Processing function for the Q15 sparse FIR filter. 
+ * @param[in]  *S           points to an instance of the Q15 sparse FIR structure. 
+ * @param[in]  *pSrc        points to the block of input data. 
+ * @param[out] *pDst        points to the block of output data 
+ * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize. 
+ * @param[in]  *pScratchOut points to a temporary buffer of size blockSize. 
+ * @param[in]  blockSize    number of input samples to process per call. 
+ * @return none. 
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using an internal 32-bit accumulator. 
+ * The 1.15 x 1.15 multiplications yield a 2.30 result and these are added to a 2.30 accumulator. 
+ * Thus the full precision of the multiplications is maintained but there is only a single guard bit in the accumulator. 
+ * If the accumulator result overflows it will wrap around rather than saturate. 
+ * After all multiply-accumulates are performed, the 2.30 accumulator is truncated to 2.15 format and then saturated to 1.15 format.  
+ * In order to avoid overflows the input signal or coefficients must be scaled down by log2(numTaps) bits. 
+ */ 
+ 
+ 
+void arm_fir_sparse_q15( 
+  arm_fir_sparse_instance_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  q15_t * pScratchIn, 
+  q31_t * pScratchOut, 
+  uint32_t blockSize) 
+{ 
+ 
+  q15_t *pState = S->pState;                     /* State pointer */ 
+  q15_t *pIn = (q15_t *) pSrc;                   /* Working pointer for input */ 
+  q15_t *pOut = pDst;                            /* Working pointer for output */ 
+  q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q15_t *px;                                     /* Temporary pointers for scratch buffer */ 
+  q15_t *pb = pScratchIn;                        /* Temporary pointers for scratch buffer */ 
+  q15_t *py = pState;                            /* Temporary pointers for state buffer */ 
+  int32_t *pTapDelay = S->pTapDelay;             /* Pointer to the array containing offset of the non-zero tap values. */ 
+  uint32_t delaySize = S->maxDelay + blockSize;  /* state length */ 
+  uint16_t numTaps = S->numTaps;                 /* Filter order */ 
+  int32_t readIndex;                             /* Read index of the state buffer */ 
+  uint32_t tapCnt, blkCnt;                       /* loop counters */ 
+  q15_t coeff = *pCoeffs++;                      /* Read the first coefficient value */ 
+  q31_t *pScr2 = pScratchOut;                    /* Working pointer for pScratchOut */ 
+  q31_t in1, in2;                                /* Temporary variables */ 
+ 
+ 
+ 
+  /* BlockSize of Input samples are copied into the state buffer */ 
+  /* StateIndex points to the starting position to write in the state buffer */ 
+  arm_circularWrite_q15(py, delaySize, &S->stateIndex, 1, pIn, 1, blockSize); 
+ 
+  /* Loop over the number of taps. */ 
+  tapCnt = numTaps; 
+ 
+  /* Read Index, from where the state buffer should be read, is calculated. */ 
+  readIndex = (S->stateIndex - blockSize) - *pTapDelay++; 
+ 
+  /* Wraparound of readIndex */ 
+  if(readIndex < 0) 
+  { 
+    readIndex += (int32_t) delaySize; 
+  } 
+ 
+  /* Working pointer for state buffer is updated */ 
+  py = pState; 
+ 
+  /* blockSize samples are read from the state buffer */ 
+  arm_circularRead_q15(py, delaySize, &readIndex, 1, 
+                       pb, pb, blockSize, 1, blockSize); 
+ 
+  /* Working pointer for the scratch buffer of state values */ 
+  px = pb; 
+ 
+  /* Working pointer for scratch buffer of output values */ 
+  pScratchOut = pScr2; 
+ 
+  /* Loop over the blockSize. Unroll by a factor of 4.  
+   * Compute 4 multiplications at a time. */ 
+  blkCnt = blockSize >> 2; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Perform multiplication and store in the scratch buffer */ 
+    *pScratchOut++ = ((q31_t) * px++ * coeff); 
+    *pScratchOut++ = ((q31_t) * px++ * coeff); 
+    *pScratchOut++ = ((q31_t) * px++ * coeff); 
+    *pScratchOut++ = ((q31_t) * px++ * coeff); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4,  
+   * compute the remaining samples */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Perform multiplication and store in the scratch buffer */ 
+    *pScratchOut++ = ((q31_t) * px++ * coeff); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Load the coefficient value and  
+   * increment the coefficient buffer for the next set of state values */ 
+  coeff = *pCoeffs++; 
+ 
+  /* Read Index, from where the state buffer should be read, is calculated. */ 
+  readIndex = (S->stateIndex - blockSize) - *pTapDelay++; 
+ 
+  /* Wraparound of readIndex */ 
+  if(readIndex < 0) 
+  { 
+    readIndex += (int32_t) delaySize; 
+  } 
+ 
+  /* Loop over the number of taps. */ 
+  tapCnt = (uint32_t) numTaps - 1u; 
+ 
+  while(tapCnt > 0u) 
+  { 
+    /* Working pointer for state buffer is updated */ 
+    py = pState; 
+ 
+    /* blockSize samples are read from the state buffer */ 
+    arm_circularRead_q15(py, delaySize, &readIndex, 1, 
+                         pb, pb, blockSize, 1, blockSize); 
+ 
+    /* Working pointer for the scratch buffer of state values */ 
+    px = pb; 
+ 
+    /* Working pointer for scratch buffer of output values */ 
+    pScratchOut = pScr2; 
+ 
+    /* Loop over the blockSize. Unroll by a factor of 4.  
+     * Compute 4 MACS at a time. */ 
+    blkCnt = blockSize >> 2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Perform Multiply-Accumulate */ 
+      *pScratchOut++ += (q31_t) * px++ * coeff; 
+      *pScratchOut++ += (q31_t) * px++ * coeff; 
+      *pScratchOut++ += (q31_t) * px++ * coeff; 
+      *pScratchOut++ += (q31_t) * px++ * coeff; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4,  
+     * compute the remaining samples */ 
+    blkCnt = blockSize % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Perform Multiply-Accumulate */ 
+      *pScratchOut++ += (q31_t) * px++ * coeff; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* Load the coefficient value and  
+     * increment the coefficient buffer for the next set of state values */ 
+    coeff = *pCoeffs++; 
+ 
+    /* Read Index, from where the state buffer should be read, is calculated. */ 
+    readIndex = (S->stateIndex - blockSize) - *pTapDelay++; 
+ 
+    /* Wraparound of readIndex */ 
+    if(readIndex < 0) 
+    { 
+      readIndex += (int32_t) delaySize; 
+    } 
+ 
+    /* Decrement the tap loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  /* All the output values are in pScratchOut buffer.  
+     Convert them into 1.15 format, saturate and store in the destination buffer. */ 
+  /* Loop over the blockSize. */ 
+  blkCnt = blockSize >> 2; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    in1 = *pScr2++; 
+    in2 = *pScr2++; 
+    *__SIMD32(pOut)++ = 
+      __PKHBT((q15_t) __SSAT(in1 >> 15, 16), (q15_t) __SSAT(in2 >> 15, 16), 
+              16); 
+ 
+    in1 = *pScr2++; 
+    in2 = *pScr2++; 
+    *__SIMD32(pOut)++ = 
+      __PKHBT((q15_t) __SSAT(in1 >> 15, 16), (q15_t) __SSAT(in2 >> 15, 16), 
+              16); 
+ 
+    blkCnt--; 
+ 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4,  
+     remaining samples are processed in the below loop */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    *pOut++ = (q15_t) __SSAT(*pScr2++ >> 15, 16); 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR_Sparse group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,265 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_sparse_q31.c  
+*  
+* Description:	Q31 sparse FIR filter processing function. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ------------------------------------------------------------------- */ 
+#include "arm_math.h" 
+ 
+ 
+/**  
+ * @addtogroup FIR_Sparse  
+ * @{  
+ */ 
+ 
+/** 
+ * @brief Processing function for the Q31 sparse FIR filter. 
+ * @param[in]  *S          points to an instance of the Q31 sparse FIR structure. 
+ * @param[in]  *pSrc       points to the block of input data. 
+ * @param[out] *pDst       points to the block of output data 
+ * @param[in]  *pScratchIn points to a temporary buffer of size blockSize. 
+ * @param[in]  blockSize   number of input samples to process per call. 
+ * @return none. 
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using an internal 32-bit accumulator. 
+ * The 1.31 x 1.31 multiplications are truncated to 2.30 format. 
+ * This leads to loss of precision on the intermediate multiplications and provides only a single guard bit.  
+ * If the accumulator result overflows, it wraps around rather than saturate. 
+ * In order to avoid overflows the input signal or coefficients must be scaled down by log2(numTaps) bits. 
+ */ 
+ 
+void arm_fir_sparse_q31( 
+  arm_fir_sparse_instance_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  q31_t * pScratchIn, 
+  uint32_t blockSize) 
+{ 
+ 
+  q31_t *pState = S->pState;                     /* State pointer */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q31_t *px;                                     /* Scratch buffer pointer */ 
+  q31_t *py = pState;                            /* Temporary pointers for state buffer */ 
+  q31_t *pb = pScratchIn;                        /* Temporary pointers for scratch buffer */ 
+  q31_t *pOut;                                   /* Destination pointer */ 
+  q63_t out;                                     /* Temporary output variable */ 
+  int32_t *pTapDelay = S->pTapDelay;             /* Pointer to the array containing offset of the non-zero tap values. */ 
+  uint32_t delaySize = S->maxDelay + blockSize;  /* state length */ 
+  uint16_t numTaps = S->numTaps;                 /* Filter order */ 
+  int32_t readIndex;                             /* Read index of the state buffer */ 
+  uint32_t tapCnt, blkCnt;                       /* loop counters */ 
+  q31_t coeff = *pCoeffs++;                      /* Read the first coefficient value */ 
+  q31_t in; 
+ 
+ 
+  /* BlockSize of Input samples are copied into the state buffer */ 
+  /* StateIndex points to the starting position to write in the state buffer */ 
+  arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1, 
+                        (int32_t *) pSrc, 1, blockSize); 
+ 
+  /* Read Index, from where the state buffer should be read, is calculated. */ 
+  readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; 
+ 
+  /* Wraparound of readIndex */ 
+  if(readIndex < 0) 
+  { 
+    readIndex += (int32_t) delaySize; 
+  } 
+ 
+  /* Working pointer for state buffer is updated */ 
+  py = pState; 
+ 
+  /* blockSize samples are read from the state buffer */ 
+  arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 
+                       (int32_t *) pb, (int32_t *) pb, blockSize, 1, 
+                       blockSize); 
+ 
+  /* Working pointer for the scratch buffer of state values */ 
+  px = pb; 
+ 
+  /* Working pointer for scratch buffer of output values */ 
+  pOut = pDst; 
+ 
+  /* Loop over the blockSize. Unroll by a factor of 4.  
+   * Compute 4 Multiplications at a time. */ 
+  blkCnt = blockSize >> 2; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Perform Multiplications and store in the destination buffer */ 
+    *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 
+    *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 
+    *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 
+    *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4,  
+   * compute the remaining samples */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Perform Multiplications and store in the destination buffer */ 
+    *pOut++ = (q31_t) (((q63_t) * px++ * coeff) >> 32); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Load the coefficient value and  
+   * increment the coefficient buffer for the next set of state values */ 
+  coeff = *pCoeffs++; 
+ 
+  /* Read Index, from where the state buffer should be read, is calculated. */ 
+  readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; 
+ 
+  /* Wraparound of readIndex */ 
+  if(readIndex < 0) 
+  { 
+    readIndex += (int32_t) delaySize; 
+  } 
+ 
+  /* Loop over the number of taps. */ 
+  tapCnt = (uint32_t) numTaps - 1u; 
+ 
+  while(tapCnt > 0u) 
+  { 
+    /* Working pointer for state buffer is updated */ 
+    py = pState; 
+ 
+    /* blockSize samples are read from the state buffer */ 
+    arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1, 
+                         (int32_t *) pb, (int32_t *) pb, blockSize, 1, 
+                         blockSize); 
+ 
+    /* Working pointer for the scratch buffer of state values */ 
+    px = pb; 
+ 
+    /* Working pointer for scratch buffer of output values */ 
+    pOut = pDst; 
+ 
+    /* Loop over the blockSize. Unroll by a factor of 4.  
+     * Compute 4 MACS at a time. */ 
+    blkCnt = blockSize >> 2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      out = *pOut; 
+      out += ((q63_t) * px++ * coeff) >> 32; 
+      *pOut++ = (q31_t) (out); 
+ 
+      out = *pOut; 
+      out += ((q63_t) * px++ * coeff) >> 32; 
+      *pOut++ = (q31_t) (out); 
+ 
+      out = *pOut; 
+      out += ((q63_t) * px++ * coeff) >> 32; 
+      *pOut++ = (q31_t) (out); 
+ 
+      out = *pOut; 
+      out += ((q63_t) * px++ * coeff) >> 32; 
+      *pOut++ = (q31_t) (out); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4,  
+     * compute the remaining samples */ 
+    blkCnt = blockSize % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Perform Multiply-Accumulate */ 
+      out = *pOut; 
+      out += ((q63_t) * px++ * coeff) >> 32; 
+      *pOut++ = (q31_t) (out); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* Load the coefficient value and  
+     * increment the coefficient buffer for the next set of state values */ 
+    coeff = *pCoeffs++; 
+ 
+    /* Read Index, from where the state buffer should be read, is calculated. */ 
+    readIndex = (int32_t) (S->stateIndex - blockSize) - *pTapDelay++; 
+ 
+    /* Wraparound of readIndex */ 
+    if(readIndex < 0) 
+    { 
+      readIndex += (int32_t) delaySize; 
+    } 
+ 
+    /* Decrement the tap loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  /* Working output pointer is updated */ 
+  pOut = pDst; 
+ 
+  /* Output is converted into 1.15 format. */ 
+  /* Loop over the blockSize. Unroll by a factor of 4.  
+   * process 4 output samples at a time. */ 
+  blkCnt = blockSize >> 2; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    in = *pOut << 1; 
+    *pOut++ = in; 
+    in = *pOut << 1; 
+    *pOut++ = in; 
+    in = *pOut << 1; 
+    *pOut++ = in; 
+    in = *pOut << 1; 
+    *pOut++ = in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4,  
+   * process the remaining output samples */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    in = *pOut << 1; 
+    *pOut++ = in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR_Sparse group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_fir_sparse_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,262 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_fir_sparse_q7.c  
+*  
+* Description:	Q7 sparse FIR filter processing function. 
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ------------------------------------------------------------------- */ 
+#include "arm_math.h" 
+ 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup FIR_Sparse  
+ * @{  
+ */ 
+ 
+ 
+/** 
+ * @brief Processing function for the Q7 sparse FIR filter. 
+ * @param[in]  *S           points to an instance of the Q7 sparse FIR structure. 
+ * @param[in]  *pSrc        points to the block of input data. 
+ * @param[out] *pDst        points to the block of output data 
+ * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize. 
+ * @param[in]  *pScratchOut points to a temporary buffer of size blockSize. 
+ * @param[in]  blockSize    number of input samples to process per call. 
+ * @return none. 
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using a 32-bit internal accumulator.  
+ * Both coefficients and state variables are represented in 1.7 format and multiplications yield a 2.14 result.  
+ * The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format.  
+ * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
+ * The accumulator is then converted to 18.7 format by discarding the low 7 bits. 
+ * Finally, the result is truncated to 1.7 format. 
+ */ 
+ 
+void arm_fir_sparse_q7( 
+  arm_fir_sparse_instance_q7 * S, 
+  q7_t * pSrc, 
+  q7_t * pDst, 
+  q7_t * pScratchIn, 
+  q31_t * pScratchOut, 
+  uint32_t blockSize) 
+{ 
+ 
+  q7_t *pState = S->pState;                      /* State pointer */ 
+  q7_t *pCoeffs = S->pCoeffs;                    /* Coefficient pointer */ 
+  q7_t *px;                                      /* Scratch buffer pointer */ 
+  q7_t *py = pState;                             /* Temporary pointers for state buffer */ 
+  q7_t *pb = pScratchIn;                         /* Temporary pointers for scratch buffer */ 
+  q7_t *pOut = pDst;                             /* Destination pointer */ 
+  int32_t *pTapDelay = S->pTapDelay;             /* Pointer to the array containing offset of the non-zero tap values. */ 
+  uint32_t delaySize = S->maxDelay + blockSize;  /* state length */ 
+  uint16_t numTaps = S->numTaps;                 /* Filter order */ 
+  int32_t readIndex;                             /* Read index of the state buffer */ 
+  uint32_t tapCnt, blkCnt;                       /* loop counters */ 
+  q7_t coeff = *pCoeffs++;                       /* Read the coefficient value */ 
+  q31_t *pScr2 = pScratchOut;                    /* Working pointer for scratch buffer of output values */ 
+  q31_t in; 
+  q7_t in1, in2, in3, in4; 
+ 
+  /* BlockSize of Input samples are copied into the state buffer */ 
+  /* StateIndex points to the starting position to write in the state buffer */ 
+  arm_circularWrite_q7(py, (int32_t) delaySize, &S->stateIndex, 1, pSrc, 1, 
+                       blockSize); 
+ 
+  /* Loop over the number of taps. */ 
+  tapCnt = numTaps; 
+ 
+  /* Read Index, from where the state buffer should be read, is calculated. */ 
+  readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 
+ 
+  /* Wraparound of readIndex */ 
+  if(readIndex < 0) 
+  { 
+    readIndex += (int32_t) delaySize; 
+  } 
+ 
+  /* Working pointer for state buffer is updated */ 
+  py = pState; 
+ 
+  /* blockSize samples are read from the state buffer */ 
+  arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, 
+                      (int32_t) blockSize, 1, blockSize); 
+ 
+  /* Working pointer for the scratch buffer of state values */ 
+  px = pb; 
+ 
+  /* Working pointer for scratch buffer of output values */ 
+  pScratchOut = pScr2; 
+ 
+  /* Loop over the blockSize. Unroll by a factor of 4.  
+   * Compute 4 multiplications at a time. */ 
+  blkCnt = blockSize >> 2; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Perform multiplication and store in the scratch buffer */ 
+    *pScratchOut++ = ((q31_t) * px++ * coeff); 
+    *pScratchOut++ = ((q31_t) * px++ * coeff); 
+    *pScratchOut++ = ((q31_t) * px++ * coeff); 
+    *pScratchOut++ = ((q31_t) * px++ * coeff); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4,  
+   * compute the remaining samples */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Perform multiplication and store in the scratch buffer */ 
+    *pScratchOut++ = ((q31_t) * px++ * coeff); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Load the coefficient value and  
+   * increment the coefficient buffer for the next set of state values */ 
+  coeff = *pCoeffs++; 
+ 
+  /* Read Index, from where the state buffer should be read, is calculated. */ 
+  readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++; 
+ 
+  /* Wraparound of readIndex */ 
+  if(readIndex < 0) 
+  { 
+    readIndex += (int32_t) delaySize; 
+  } 
+ 
+  /* Loop over the number of taps. */ 
+  tapCnt = (uint32_t) numTaps - 1u; 
+ 
+  while(tapCnt > 0u) 
+  { 
+    /* Working pointer for state buffer is updated */ 
+    py = pState; 
+ 
+    /* blockSize samples are read from the state buffer */ 
+    arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb, 
+                        (int32_t) blockSize, 1, blockSize); 
+ 
+    /* Working pointer for the scratch buffer of state values */ 
+    px = pb; 
+ 
+    /* Working pointer for scratch buffer of output values */ 
+    pScratchOut = pScr2; 
+ 
+    /* Loop over the blockSize. Unroll by a factor of 4.  
+     * Compute 4 MACS at a time. */ 
+    blkCnt = blockSize >> 2; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Perform Multiply-Accumulate */ 
+      in = *pScratchOut + ((q31_t) * px++ * coeff); 
+      *pScratchOut++ = in; 
+      in = *pScratchOut + ((q31_t) * px++ * coeff); 
+      *pScratchOut++ = in; 
+      in = *pScratchOut + ((q31_t) * px++ * coeff); 
+      *pScratchOut++ = in; 
+      in = *pScratchOut + ((q31_t) * px++ * coeff); 
+      *pScratchOut++ = in; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4,  
+     * compute the remaining samples */ 
+    blkCnt = blockSize % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* Perform Multiply-Accumulate */ 
+      in = *pScratchOut + ((q31_t) * px++ * coeff); 
+      *pScratchOut++ = in; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* Load the coefficient value and  
+     * increment the coefficient buffer for the next set of state values */ 
+    coeff = *pCoeffs++; 
+ 
+    /* Read Index, from where the state buffer should be read, is calculated. */ 
+    readIndex = ((int32_t) S->stateIndex - 
+                 (int32_t) blockSize) - *pTapDelay++; 
+ 
+    /* Wraparound of readIndex */ 
+    if(readIndex < 0) 
+    { 
+      readIndex += (int32_t) delaySize; 
+    } 
+ 
+    /* Decrement the tap loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  /* All the output values are in pScratchOut buffer.  
+     Convert them into 1.15 format, saturate and store in the destination buffer. */ 
+  /* Loop over the blockSize. */ 
+  blkCnt = blockSize >> 2; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    in1 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 
+    in2 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 
+    in3 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 
+    in4 = (q7_t) __SSAT(*pScr2++ >> 7, 8); 
+ 
+    *__SIMD32(pOut)++ = __PACKq7(in1, in2, in3, in4); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4,  
+     remaining samples are processed in the below loop */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    *pOut++ = (q7_t) __SSAT(*pScr2++ >> 7, 8); 
+ 
+    /* Decrement the blockSize loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of FIR_Sparse group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,313 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_iir_lattice_f32.c  
+*  
+* Description:	Floating-point IIR Lattice filter processing function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup IIR_Lattice Infinite Impulse Response (IIR) Lattice Filters  
+ *  
+ * This set of functions implements lattice filters  
+ * for Q15, Q31 and floating-point data types.  Lattice filters are used in a   
+ * variety of adaptive filter applications.  The filter structure has feedforward and  
+ * feedback components and the net impulse response is infinite length.  
+ * The functions operate on blocks  
+ * of input and output data and each call to the function processes  
+ * <code>blockSize</code> samples through the filter.  <code>pSrc</code> and  
+ * <code>pDst</code> point to input and output arrays containing <code>blockSize</code> values.  
+  
+ * \par Algorithm:  
+ * \image html IIRLattice.gif "Infinite Impulse Response Lattice filter"  
+ * <pre>  
+ *    fN(n)   =  x(n)  
+ *    fm-1(n) = fm(n) - km * gm-1(n-1)   for m = N, N-1, ...1  
+ *    gm(n)   = km * fm-1(n) + gm-1(n-1) for m = N, N-1, ...1  
+ *    y(n)    = vN * gN(n) + vN-1 * gN-1(n) + ...+ v0 * g0(n)  
+ * </pre>  
+ * \par  
+ * <code>pkCoeffs</code> points to array of reflection coefficients of size <code>numStages</code>.   
+ * Reflection coefficients are stored in time-reversed order.  
+ * \par  
+ * <pre>  
+ *    {kN, kN-1, ....k1}  
+ * </pre>  
+ * <code>pvCoeffs</code> points to the array of ladder coefficients of size <code>(numStages+1)</code>.   
+ * Ladder coefficients are stored in time-reversed order.  
+ * \par  
+ * <pre>  
+ *    {vN, vN-1, ...v0}  
+ * </pre>  
+ * <code>pState</code> points to a state array of size <code>numStages + blockSize</code>.  
+ * The state variables shown in the figure above (the g values) are stored in the <code>pState</code> array.  
+ * The state variables are updated after each block of data is processed; the coefficients are untouched.  
+ * \par Instance Structure  
+ * The coefficients and state variables for a filter are stored together in an instance data structure.  
+ * A separate instance structure must be defined for each filter.  
+ * Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.  
+ * There are separate instance structure declarations for each of the 3 supported data types.  
+  *  
+ * \par Initialization Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Zeros out the values in the state buffer.  
+ *  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * Set the values in the state buffer to zeros and then manually initialize the instance structure as follows:  
+ * <pre>  
+ *arm_iir_lattice_instance_f32 S = {numStages, pState, pkCoeffs, pvCoeffs};  
+ *arm_iir_lattice_instance_q31 S = {numStages, pState, pkCoeffs, pvCoeffs};  
+ *arm_iir_lattice_instance_q15 S = {numStages, pState, pkCoeffs, pvCoeffs};  
+ * </pre>  
+ * \par  
+ * where <code>numStages</code> is the number of stages in the filter; <code>pState</code> points to the state buffer array;  
+ * <code>pkCoeffs</code> points to array of the reflection coefficients; <code>pvCoeffs</code> points to the array of ladder coefficients.  
+ * \par Fixed-Point Behavior  
+ * Care must be taken when using the fixed-point versions of the IIR lattice filter functions.  
+ * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.  
+ * Refer to the function specific documentation below for usage guidelines.  
+ */ 
+ 
+/**  
+ * @addtogroup IIR_Lattice  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the floating-point IIR lattice filter.  
+ * @param[in] *S points to an instance of the floating-point IIR lattice structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data.  
+ * @param[in] blockSize number of samples to process.  
+ * @return none.  
+ */ 
+ 
+void arm_iir_lattice_f32( 
+  const arm_iir_lattice_instance_f32 * S, 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  float32_t fcurr, fnext, gcurr, gnext;          /* Temporary variables for lattice stages */ 
+  float32_t acc;                                 /* Accumlator */ 
+  uint32_t blkCnt, tapCnt;                       /* temporary variables for counts */ 
+  float32_t *px1, *px2, *pk, *pv;                /* temporary pointers for state and coef */ 
+  uint32_t numStages = S->numStages;             /* number of stages */ 
+  float32_t *pState;                             /* State pointer */ 
+  float32_t *pStateCurnt;                        /* State current pointer */ 
+ 
+  gcurr = 0.0f; 
+  blkCnt = blockSize; 
+ 
+  pState = &S->pState[0]; 
+ 
+  /* Sample processing */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Read Sample from input buffer */ 
+    /* fN(n) = x(n) */ 
+    fcurr = *pSrc++; 
+ 
+    /* Initialize state read pointer */ 
+    px1 = pState; 
+    /* Initialize state write pointer */ 
+    px2 = pState; 
+    /* Set accumulator to zero */ 
+    acc = 0.0f; 
+    /* Initialize Ladder coeff pointer */ 
+    pv = &S->pvCoeffs[0]; 
+    /* Initialize Reflection coeff pointer */ 
+    pk = &S->pkCoeffs[0]; 
+ 
+ 
+    /* Process sample for first tap */ 
+    gcurr = *px1++; 
+    /* fN-1(n) = fN(n) - kN * gN-1(n-1) */ 
+    fnext = fcurr - ((*pk) * gcurr); 
+    /* gN(n) = kN * fN-1(n) + gN-1(n-1) */ 
+    gnext = (fnext * (*pk++)) + gcurr; 
+    /* write gN(n) into state for next sample processing */ 
+    *px2++ = gnext; 
+    /* y(n) += gN(n) * vN  */ 
+    acc += (gnext * (*pv++)); 
+ 
+    /* Update f values for next coefficient processing */ 
+    fcurr = fnext; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = (numStages - 1u) >> 2; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Process sample for 2nd, 6th ...taps */ 
+      /* Read gN-2(n-1) from state buffer */ 
+      gcurr = *px1++; 
+      /* Process sample for 2nd, 6th .. taps */ 
+      /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */ 
+      fnext = fcurr - ((*pk) * gcurr); 
+      /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */ 
+      gnext = (fnext * (*pk++)) + gcurr; 
+      /* y(n) += gN-1(n) * vN-1  */ 
+      /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */ 
+      acc += (gnext * (*pv++)); 
+      /* write gN-1(n) into state for next sample processing */ 
+      *px2++ = gnext; 
+ 
+ 
+      /* Process sample for 3nd, 7th ...taps */ 
+      /* Read gN-3(n-1) from state buffer */ 
+      gcurr = *px1++; 
+      /* Process sample for 3rd, 7th .. taps */ 
+      /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */ 
+      fcurr = fnext - ((*pk) * gcurr); 
+      /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */ 
+      gnext = (fcurr * (*pk++)) + gcurr; 
+      /* y(n) += gN-2(n) * vN-2  */ 
+      /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */ 
+      acc += (gnext * (*pv++)); 
+      /* write gN-2(n) into state for next sample processing */ 
+      *px2++ = gnext; 
+ 
+ 
+      /* Process sample for 4th, 8th ...taps */ 
+      /* Read gN-4(n-1) from state buffer */ 
+      gcurr = *px1++; 
+      /* Process sample for 4th, 8th .. taps */ 
+      /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */ 
+      fnext = fcurr - ((*pk) * gcurr); 
+      /* gN-3(n) = kN-3 * fN-4(n) + gN-4(n-1) */ 
+      gnext = (fnext * (*pk++)) + gcurr; 
+      /* y(n) += gN-3(n) * vN-3  */ 
+      /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */ 
+      acc += (gnext * (*pv++)); 
+      /* write gN-3(n) into state for next sample processing */ 
+      *px2++ = gnext; 
+ 
+ 
+      /* Process sample for 5th, 9th ...taps */ 
+      /* Read gN-5(n-1) from state buffer */ 
+      gcurr = *px1++; 
+      /* Process sample for 5th, 9th .. taps */ 
+      /* fN-5(n) = fN-4(n) - kN-4 * gN-1(n-1) */ 
+      fcurr = fnext - ((*pk) * gcurr); 
+      /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */ 
+      gnext = (fcurr * (*pk++)) + gcurr; 
+      /* y(n) += gN-4(n) * vN-4  */ 
+      /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */ 
+      acc += (gnext * (*pv++)); 
+      /* write gN-4(n) into state for next sample processing */ 
+      *px2++ = gnext; 
+ 
+      tapCnt--; 
+ 
+    } 
+ 
+    fnext = fcurr; 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = (numStages - 1u) % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      gcurr = *px1++; 
+      /* Process sample for last taps */ 
+      fnext = fcurr - ((*pk) * gcurr); 
+      gnext = (fnext * (*pk++)) + gcurr; 
+      /* Output samples for last taps */ 
+      acc += (gnext * (*pv++)); 
+      *px2++ = gnext; 
+      fcurr = fnext; 
+ 
+      tapCnt--; 
+ 
+    } 
+ 
+ 
+    /* y(n) += g0(n) * v0 */ 
+    acc += (fnext * (*pv)); 
+ 
+    *px2++ = fnext; 
+ 
+    /* write out into pDst */ 
+    *pDst++ = acc; 
+ 
+    /* Advance the state pointer by 4 to process the next group of 4 samples */ 
+    pState = pState + 1u; 
+    blkCnt--; 
+ 
+  } 
+ 
+  /* Processing is complete. Now copy last S->numStages samples to start of the buffer  
+     for the preperation of next frame process */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = &S->pState[0]; 
+  pState = &S->pState[blockSize]; 
+ 
+  tapCnt = numStages >> 2u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+ 
+  } 
+ 
+  /* Calculate remaining number of copies */ 
+  tapCnt = (numStages) % 0x4u; 
+ 
+  /* Copy the remaining q31_t data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+} 
+ 
+ 
+ 
+ 
+/**  
+ * @} end of IIR_Lattice group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,80 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_iir_lattice_init_f32.c  
+*  
+* Description:  Floating-point IIR lattice filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup IIR_Lattice  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Initialization function for the floating-point IIR lattice filter.  
+ * @param[in] *S points to an instance of the floating-point IIR lattice structure.  
+ * @param[in] numStages number of stages in the filter.  
+ * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.  
+ * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.  
+ * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize.  
+ * @param[in] blockSize number of samples to process.  
+ * @return none.  
+ */ 
+ 
+void arm_iir_lattice_init_f32( 
+  arm_iir_lattice_instance_f32 * S, 
+  uint16_t numStages, 
+  float32_t * pkCoeffs, 
+  float32_t * pvCoeffs, 
+  float32_t * pState, 
+  uint32_t blockSize) 
+{ 
+  /* Assign filter taps */ 
+  S->numStages = numStages; 
+ 
+  /* Assign reflection coefficient pointer */ 
+  S->pkCoeffs = pkCoeffs; 
+ 
+  /* Assign ladder coefficient pointer */ 
+  S->pvCoeffs = pvCoeffs; 
+ 
+  /* Clear state buffer and size is always blockSize + numStages */ 
+  memset(pState, 0, (numStages + blockSize) * sizeof(float32_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+ 
+} 
+ 
+  /**  
+   * @} end of IIR_Lattice group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,80 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_iir_lattice_init_q15.c  
+*  
+* Description:  Q15 IIR lattice filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup IIR_Lattice  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief Initialization function for the Q15 IIR lattice filter.  
+   * @param[in] *S points to an instance of the Q15 IIR lattice structure.  
+   * @param[in] numStages  number of stages in the filter.  
+   * @param[in] *pkCoeffs points to reflection coefficient buffer.  The array is of length numStages.  
+   * @param[in] *pvCoeffs points to ladder coefficient buffer.  The array is of length numStages+1.  
+   * @param[in] *pState points to state buffer.  The array is of length numStages+blockSize.  
+   * @param[in] blockSize number of samples to process per call.  
+   * @return none.  
+   */ 
+ 
+void arm_iir_lattice_init_q15( 
+  arm_iir_lattice_instance_q15 * S, 
+  uint16_t numStages, 
+  q15_t * pkCoeffs, 
+  q15_t * pvCoeffs, 
+  q15_t * pState, 
+  uint32_t blockSize) 
+{ 
+  /* Assign filter taps */ 
+  S->numStages = numStages; 
+ 
+  /* Assign reflection coefficient pointer */ 
+  S->pkCoeffs = pkCoeffs; 
+ 
+  /* Assign ladder coefficient pointer */ 
+  S->pvCoeffs = pvCoeffs; 
+ 
+  /* Clear state buffer and size is always blockSize + numStages */ 
+  memset(pState, 0, (numStages + blockSize) * sizeof(q15_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+ 
+} 
+ 
+/**  
+ * @} end of IIR_Lattice group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,80 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_iir_lattice_init_q31.c  
+*  
+* Description:  Initialization function for the Q31 IIR lattice filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup IIR_Lattice  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief Initialization function for the Q31 IIR lattice filter.  
+   * @param[in] *S points to an instance of the Q31 IIR lattice structure.  
+   * @param[in] numStages number of stages in the filter.  
+   * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.  
+   * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.  
+   * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize.  
+   * @param[in] blockSize number of samples to process.  
+   * @return none.  
+   */ 
+ 
+void arm_iir_lattice_init_q31( 
+  arm_iir_lattice_instance_q31 * S, 
+  uint16_t numStages, 
+  q31_t * pkCoeffs, 
+  q31_t * pvCoeffs, 
+  q31_t * pState, 
+  uint32_t blockSize) 
+{ 
+  /* Assign filter taps */ 
+  S->numStages = numStages; 
+ 
+  /* Assign reflection coefficient pointer */ 
+  S->pkCoeffs = pkCoeffs; 
+ 
+  /* Assign ladder coefficient pointer */ 
+  S->pvCoeffs = pvCoeffs; 
+ 
+  /* Clear state buffer and size is always blockSize + numStages */ 
+  memset(pState, 0, (numStages + blockSize) * sizeof(q31_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+ 
+} 
+ 
+/**  
+ * @} end of IIR_Lattice group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,281 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_iir_lattice_q15.c  
+*  
+* Description:	Q15 IIR lattice filter processing function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup IIR_Lattice  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q15 IIR lattice filter.  
+ * @param[in] *S points to an instance of the Q15 IIR lattice structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data.  
+ * @param[in] blockSize number of samples to process.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.  
+ * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.  
+ * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
+ * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.  
+ * Lastly, the accumulator is saturated to yield a result in 1.15 format.  
+ */ 
+ 
+void arm_iir_lattice_q15( 
+  const arm_iir_lattice_instance_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t fcurr, fnext, gcurr = 0, gnext;          /* Temporary variables for lattice stages */ 
+  q15_t gnext1, gnext2;                          /* Temporary variables for lattice stages */ 
+  uint32_t stgCnt;                               /* Temporary variables for counts */ 
+  q63_t acc;                                     /* Accumlator */ 
+  uint32_t blkCnt, tapCnt;                       /* Temporary variables for counts */ 
+  q15_t *px1, *px2, *pk, *pv;                    /* temporary pointers for state and coef */ 
+  uint32_t numStages = S->numStages;             /* number of stages */ 
+  q15_t *pState;                                 /* State pointer */ 
+  q15_t *pStateCurnt;                            /* State current pointer */ 
+  q15_t out;                                     /* Temporary variable for output */ 
+  q31_t v;                                       /* Temporary variable for ladder coefficient */ 
+ 
+ 
+  blkCnt = blockSize; 
+ 
+  pState = &S->pState[0]; 
+ 
+  /* Sample processing */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Read Sample from input buffer */ 
+    /* fN(n) = x(n) */ 
+    fcurr = *pSrc++; 
+ 
+    /* Initialize state read pointer */ 
+    px1 = pState; 
+    /* Initialize state write pointer */ 
+    px2 = pState; 
+    /* Set accumulator to zero */ 
+    acc = 0; 
+    /* Initialize Ladder coeff pointer */ 
+    pv = &S->pvCoeffs[0]; 
+    /* Initialize Reflection coeff pointer */ 
+    pk = &S->pkCoeffs[0]; 
+ 
+ 
+    /* Process sample for first tap */ 
+    gcurr = *px1++; 
+    /* fN-1(n) = fN(n) - kN * gN-1(n-1) */ 
+    fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 
+    fnext = __SSAT(fnext, 16); 
+    /* gN(n) = kN * fN-1(n) + gN-1(n-1) */ 
+    gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 
+    gnext = __SSAT(gnext, 16); 
+    /* write gN(n) into state for next sample processing */ 
+    *px2++ = (q15_t) gnext; 
+    /* y(n) += gN(n) * vN  */ 
+    acc += (q31_t) ((gnext * (*pv++))); 
+ 
+ 
+    /* Update f values for next coefficient processing */ 
+    fcurr = fnext; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = (numStages - 1u) >> 2; 
+ 
+    while(tapCnt > 0u) 
+    { 
+ 
+      /* Process sample for 2nd, 6th ...taps */ 
+      /* Read gN-2(n-1) from state buffer */ 
+      gcurr = *px1++; 
+      /* Process sample for 2nd, 6th .. taps */ 
+      /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */ 
+      fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 
+      fnext = __SSAT(fnext, 16); 
+      /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */ 
+      gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 
+      gnext1 = (q15_t) __SSAT(gnext, 16); 
+      /* write gN-1(n) into state */ 
+      *px2++ = (q15_t) gnext1; 
+ 
+ 
+      /* Process sample for 3nd, 7th ...taps */ 
+      /* Read gN-3(n-1) from state */ 
+      gcurr = *px1++; 
+      /* Process sample for 3rd, 7th .. taps */ 
+      /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */ 
+      fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15); 
+      fcurr = __SSAT(fcurr, 16); 
+      /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */ 
+      gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr; 
+      gnext2 = (q15_t) __SSAT(gnext, 16); 
+      /* write gN-2(n) into state */ 
+      *px2++ = (q15_t) gnext2; 
+ 
+      /* Read vN-1 and vN-2 at a time */ 
+      v = *__SIMD32(pv)++; 
+ 
+ 
+      /* Pack gN-1(n) and gN-2(n) */ 
+      gnext = __PKHBT(gnext1, gnext2, 16); 
+      /* y(n) += gN-1(n) * vN-1  */ 
+      /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */ 
+      /* y(n) += gN-2(n) * vN-2  */ 
+      /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */ 
+      acc = __SMLALD(gnext, v, acc); 
+ 
+ 
+      /* Process sample for 4th, 8th ...taps */ 
+      /* Read gN-4(n-1) from state */ 
+      gcurr = *px1++; 
+      /* Process sample for 4th, 8th .. taps */ 
+      /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */ 
+      fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 
+      fnext = __SSAT(fnext, 16); 
+      /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */ 
+      gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 
+      gnext1 = (q15_t) __SSAT(gnext, 16); 
+      /* write  gN-3(n) for the next sample process */ 
+      *px2++ = (q15_t) gnext1; 
+ 
+ 
+      /* Process sample for 5th, 9th ...taps */ 
+      /* Read gN-5(n-1) from state */ 
+      gcurr = *px1++; 
+      /* Process sample for 5th, 9th .. taps */ 
+      /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */ 
+      fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15); 
+      fcurr = __SSAT(fcurr, 16); 
+      /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */ 
+      gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr; 
+      gnext2 = (q15_t) __SSAT(gnext, 16); 
+      /* write      gN-4(n) for the next sample process */ 
+      *px2++ = (q15_t) gnext2; 
+ 
+      /* Read vN-3 and vN-4 at a time */ 
+      v = *__SIMD32(pv)++; 
+ 
+      /* Pack gN-3(n) and gN-4(n) */ 
+      gnext = __PKHBT(gnext1, gnext2, 16); 
+      /* y(n) += gN-4(n) * vN-4  */ 
+      /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */ 
+      /* y(n) += gN-3(n) * vN-3  */ 
+      /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */ 
+      acc = __SMLALD(gnext, v, acc); 
+ 
+      tapCnt--; 
+ 
+    } 
+ 
+    fnext = fcurr; 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = (numStages - 1u) % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      gcurr = *px1++; 
+      /* Process sample for last taps */ 
+      fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15); 
+      fnext = __SSAT(fnext, 16); 
+      gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr; 
+      gnext = __SSAT(gnext, 16); 
+      /* Output samples for last taps */ 
+      acc += (q31_t) (((q31_t) gnext * (*pv++))); 
+      *px2++ = (q15_t) gnext; 
+      fcurr = fnext; 
+ 
+      tapCnt--; 
+    } 
+ 
+    /* y(n) += g0(n) * v0 */ 
+    acc += (q31_t) (((q31_t) fnext * (*pv++))); 
+ 
+    out = (q15_t) __SSAT(acc >> 15, 16); 
+    *px2++ = (q15_t) fnext; 
+ 
+    /* write out into pDst */ 
+    *pDst++ = out; 
+ 
+    /* Advance the state pointer by 4 to process the next group of 4 samples */ 
+    pState = pState + 1u; 
+    blkCnt--; 
+ 
+  } 
+ 
+  /* Processing is complete. Now copy last S->numStages samples to start of the buffer  
+     for the preperation of next frame process */ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = &S->pState[0]; 
+  pState = &S->pState[blockSize]; 
+ 
+  stgCnt = (numStages >> 2u); 
+ 
+  /* copy data */ 
+  while(stgCnt > 0u) 
+  { 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+ 
+    /* Decrement the loop counter */ 
+    stgCnt--; 
+ 
+  } 
+ 
+  /* Calculation of count for remaining q15_t data */ 
+  stgCnt = (numStages) % 0x4u; 
+ 
+  /* copy data */ 
+  while(stgCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    stgCnt--; 
+  } 
+ 
+} 
+ 
+ 
+ 
+ 
+/**  
+ * @} end of IIR_Lattice group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_iir_lattice_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,253 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_iir_lattice_q31.c  
+*  
+* Description:	Q31 IIR lattice filter processing function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup IIR_Lattice  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q31 IIR lattice filter.  
+ * @param[in] *S points to an instance of the Q31 IIR lattice structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[out] *pDst points to the block of output data.  
+ * @param[in] blockSize number of samples to process.  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * Thus, if the accumulator result overflows it wraps around rather than clip.  
+ * In order to avoid overflows completely the input signal must be scaled down by 2*log2(numStages) bits.  
+ * After all multiply-accumulates are performed, the 2.62 accumulator is saturated to 1.32 format and then truncated to 1.31 format.  
+ */ 
+ 
+void arm_iir_lattice_q31( 
+  const arm_iir_lattice_instance_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t fcurr, fnext, gcurr = 0, gnext;          /* Temporary variables for lattice stages */ 
+  q63_t acc;                                     /* Accumlator */ 
+  uint32_t blkCnt, tapCnt;                       /* Temporary variables for counts */ 
+  q31_t *px1, *px2, *pk, *pv;                    /* Temporary pointers for state and coef */ 
+  uint32_t numStages = S->numStages;             /* number of stages */ 
+  q31_t *pState;                                 /* State pointer */ 
+  q31_t *pStateCurnt;                            /* State current pointer */ 
+ 
+  blkCnt = blockSize; 
+ 
+  pState = &S->pState[0]; 
+ 
+  /* Sample processing */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Read Sample from input buffer */ 
+    /* fN(n) = x(n) */ 
+    fcurr = *pSrc++; 
+ 
+    /* Initialize state read pointer */ 
+    px1 = pState; 
+    /* Initialize state write pointer */ 
+    px2 = pState; 
+    /* Set accumulator to zero */ 
+    acc = 0; 
+    /* Initialize Ladder coeff pointer */ 
+    pv = &S->pvCoeffs[0]; 
+    /* Initialize Reflection coeff pointer */ 
+    pk = &S->pkCoeffs[0]; 
+ 
+ 
+    /* Process sample for first tap */ 
+    gcurr = *px1++; 
+    /* fN-1(n) = fN(n) - kN * gN-1(n-1) */ 
+    fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk)) >> 31)); 
+    /* gN(n) = kN * fN-1(n) + gN-1(n-1) */ 
+    gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31)); 
+    /* write gN-1(n-1) into state for next sample processing */ 
+    *px2++ = gnext; 
+    /* y(n) += gN(n) * vN  */ 
+    acc += ((q63_t) gnext * *pv++); 
+ 
+    /* Update f values for next coefficient processing */ 
+    fcurr = fnext; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = (numStages - 1u) >> 2; 
+ 
+    while(tapCnt > 0u) 
+    { 
+ 
+      /* Process sample for 2nd, 6th .. taps */ 
+      /* Read gN-2(n-1) from state buffer */ 
+      gcurr = *px1++; 
+      /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */ 
+      fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk)) >> 31)); 
+      /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */ 
+      gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31)); 
+      /* y(n) += gN-1(n) * vN-1  */ 
+      /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */ 
+      acc += ((q63_t) gnext * *pv++); 
+      /* write gN-1(n) into state for next sample processing */ 
+      *px2++ = gnext; 
+ 
+      /* Process sample for 3nd, 7th ...taps */ 
+      /* Read gN-3(n-1) from state buffer */ 
+      gcurr = *px1++; 
+      /* Process sample for 3rd, 7th .. taps */ 
+      /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */ 
+      fcurr = __QSUB(fnext, (q31_t) (((q63_t) gcurr * (*pk)) >> 31)); 
+      /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */ 
+      gnext = __QADD(gcurr, (q31_t) (((q63_t) fcurr * (*pk++)) >> 31)); 
+      /* y(n) += gN-2(n) * vN-2  */ 
+      /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */ 
+      acc += ((q63_t) gnext * *pv++); 
+      /* write gN-2(n) into state for next sample processing */ 
+      *px2++ = gnext; 
+ 
+ 
+      /* Process sample for 4th, 8th ...taps */ 
+      /* Read gN-4(n-1) from state buffer */ 
+      gcurr = *px1++; 
+      /* Process sample for 4th, 8th .. taps */ 
+      /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */ 
+      fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk)) >> 31)); 
+      /* gN-3(n) = kN-3 * fN-4(n) + gN-4(n-1) */ 
+      gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31)); 
+      /* y(n) += gN-3(n) * vN-3  */ 
+      /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */ 
+      acc += ((q63_t) gnext * *pv++); 
+      /* write gN-3(n) into state for next sample processing */ 
+      *px2++ = gnext; 
+ 
+ 
+      /* Process sample for 5th, 9th ...taps */ 
+      /* Read gN-5(n-1) from state buffer */ 
+      gcurr = *px1++; 
+      /* Process sample for 5th, 9th .. taps */ 
+      /* fN-5(n) = fN-4(n) - kN-4 * gN-1(n-1) */ 
+      fcurr = __QSUB(fnext, (q31_t) (((q63_t) gcurr * (*pk)) >> 31)); 
+      /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */ 
+      gnext = __QADD(gcurr, (q31_t) (((q63_t) fcurr * (*pk++)) >> 31)); 
+      /* y(n) += gN-4(n) * vN-4  */ 
+      /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */ 
+      acc += ((q63_t) gnext * *pv++); 
+      /* write gN-4(n) into state for next sample processing */ 
+      *px2++ = gnext; 
+ 
+      tapCnt--; 
+ 
+    } 
+ 
+    fnext = fcurr; 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = (numStages - 1u) % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      gcurr = *px1++; 
+      /* Process sample for last taps */ 
+      fnext = __QSUB(fcurr, (q31_t) (((q63_t) gcurr * (*pk)) >> 31)); 
+      gnext = __QADD(gcurr, (q31_t) (((q63_t) fnext * (*pk++)) >> 31)); 
+      /* Output samples for last taps */ 
+      acc += ((q63_t) gnext * *pv++); 
+      *px2++ = gnext; 
+      fcurr = fnext; 
+ 
+      tapCnt--; 
+ 
+    } 
+ 
+    /* y(n) += g0(n) * v0 */ 
+    acc += (q63_t) fnext *( 
+  *pv++); 
+ 
+    *px2++ = fnext; 
+ 
+    /* write out into pDst */ 
+    *pDst++ = (q31_t) (acc >> 31u); 
+ 
+    /* Advance the state pointer by 4 to process the next group of 4 samples */ 
+    pState = pState + 1u; 
+    blkCnt--; 
+ 
+  } 
+ 
+  /* Processing is complete. Now copy last S->numStages samples to start of the buffer  
+     for the preperation of next frame process */ 
+ 
+  /* Points to the start of the state buffer */ 
+  pStateCurnt = &S->pState[0]; 
+  pState = &S->pState[blockSize]; 
+ 
+  tapCnt = numStages >> 2u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+ 
+  } 
+ 
+  /* Calculate remaining number of copies */ 
+  tapCnt = (numStages) % 0x4u; 
+ 
+  /* Copy the remaining q31_t data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  }; 
+ 
+} 
+ 
+ 
+ 
+ 
+/**  
+ * @} end of IIR_Lattice group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,330 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_lms_f32.c  
+*  
+* Description:	Processing function for the floating-point LMS filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup LMS Least Mean Square (LMS) Filters  
+ *  
+ * LMS filters are a class of adaptive filters that are able to "learn" an unknown transfer functions.  
+ * LMS filters use a gradient descent method in which the filter coefficients are updated based on the instantaneous error signal.  
+ * Adaptive filters are often used in communication systems, equalizers, and noise removal.  
+ * The CMSIS DSP Library contains LMS filter functions that operate on Q15, Q31, and floating-point data types.  
+ * The library also contains normalized LMS filters in which the filter coefficient adaptation is indepedent of the level of the input signal.  
+ *  
+ * An LMS filter consists of two components as shown below.  
+ * The first component is a standard transversal or FIR filter.  
+ * The second component is a coefficient update mechanism.  
+ * The LMS filter has two input signals.  
+ * The "input" feeds the FIR filter while the "reference input" corresponds to the desired output of the FIR filter.  
+ * That is, the FIR filter coefficients are updated so that the output of the FIR filter matches the reference input.  
+ * The filter coefficient update mechanism is based on the difference between the FIR filter output and the reference input.  
+ * This "error signal" tends towards zero as the filter adapts.  
+ * The LMS processing functions accept the input and reference input signals and generate the filter output and error signal.  
+ * \image html LMS.gif "Internal structure of the Least Mean Square filter"  
+ *  
+ * The functions operate on blocks of data and each call to the function processes  
+ * <code>blockSize</code> samples through the filter.  
+ * <code>pSrc</code> points to input signal, <code>pRef</code> points to reference signal,  
+ * <code>pOut</code> points to output signal and <code>pErr</code> points to error signal.  
+ * All arrays contain <code>blockSize</code> values.  
+ *  
+ * The API functions operate on a block-by-block basis.  
+ * Internally, the filter coefficients <code>b[n]</code> are updated on a sample-by-sample basis.  
+ * The convergence of the LMS filter is slower compared to the normalized LMS algorithm.  
+ *  
+ * \par Algorithm:  
+ * The output signal <code>y[n]</code> is computed by a standard FIR filter:  
+ * <pre>  
+ *     y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]  
+ * </pre>  
+ *  
+ * \par  
+ * The error signal equals the difference between the reference signal <code>d[n]</code> and the filter output:  
+ * <pre>  
+ *     e[n] = d[n] - y[n].  
+ * </pre>  
+ *  
+ * \par  
+ * After each sample of the error signal is computed, the filter coefficients <code>b[k]</code> are updated on a sample-by-sample basis:  
+ * <pre>  
+ *     b[k] = b[k] + e[n] * mu * x[n-k],  for k=0, 1, ..., numTaps-1  
+ * </pre>  
+ * where <code>mu</code> is the step size and controls the rate of coefficient convergence.  
+ *\par  
+ * In the APIs, <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.  
+ * Coefficients are stored in time reversed order.  
+ * \par  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.  
+ * Samples in the state buffer are stored in the order:  
+ * \par  
+ * <pre>  
+ *    {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}  
+ * </pre>  
+ * \par  
+ * Note that the length of the state buffer exceeds the length of the coefficient array by <code>blockSize-1</code> samples.  
+ * The increased state buffer length allows circular addressing, which is traditionally used in FIR filters,  
+ * to be avoided and yields a significant speed improvement.  
+ * The state variables are updated after each block of data is processed.  
+ * \par Instance Structure  
+ * The coefficients and state variables for a filter are stored together in an instance data structure.  
+ * A separate instance structure must be defined for each filter and  
+ * coefficient and state arrays cannot be shared among instances.  
+ * There are separate instance structure declarations for each of the 3 supported data types.  
+ *  
+ * \par Initialization Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Zeros out the values in the state buffer.  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * Set the values in the state buffer to zeros before static initialization.  
+ * The code below statically initializes each of the 3 different data type filter instance structures  
+ * <pre>  
+ *    arm_lms_instance_f32 S = {numTaps, pState, pCoeffs, mu};  
+ *    arm_lms_instance_q31 S = {numTaps, pState, pCoeffs, mu, postShift};  
+ *    arm_lms_instance_q15 S = {numTaps, pState, pCoeffs, mu, postShift};  
+ * </pre>  
+ * where <code>numTaps</code> is the number of filter coefficients in the filter; <code>pState</code> is the address of the state buffer;  
+ * <code>pCoeffs</code> is the address of the coefficient buffer; <code>mu</code> is the step size parameter; and <code>postShift</code> is the shift applied to coefficients.  
+ *  
+ * \par Fixed-Point Behavior:  
+ * Care must be taken when using the Q15 and Q31 versions of the LMS filter.  
+ * The following issues must be considered:  
+ * - Scaling of coefficients  
+ * - Overflow and saturation  
+ *  
+ * \par Scaling of Coefficients:  
+ * Filter coefficients are represented as fractional values and  
+ * coefficients are restricted to lie in the range <code>[-1 +1)</code>.  
+ * The fixed-point functions have an additional scaling parameter <code>postShift</code>.  
+ * At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.  
+ * This essentially scales the filter coefficients by <code>2^postShift</code> and  
+ * allows the filter coefficients to exceed the range <code>[+1 -1)</code>.  
+ * The value of <code>postShift</code> is set by the user based on the expected gain through the system being modeled.  
+ *  
+ * \par Overflow and Saturation:  
+ * Overflow and saturation behavior of the fixed-point Q15 and Q31 versions are  
+ * described separately as part of the function specific documentation below.  
+ */ 
+ 
+/**  
+ * @addtogroup LMS  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief Processing function for floating-point LMS filter.  
+   * @param[in]  *S points to an instance of the floating-point LMS filter structure.  
+   * @param[in]  *pSrc points to the block of input data.  
+   * @param[in]  *pRef points to the block of reference data.  
+   * @param[out] *pOut points to the block of output data.  
+   * @param[out] *pErr points to the block of error data.  
+   * @param[in]  blockSize number of samples to process.  
+   * @return     none.  
+   */ 
+ 
+void arm_lms_f32( 
+  const arm_lms_instance_f32 * S, 
+  float32_t * pSrc, 
+  float32_t * pRef, 
+  float32_t * pOut, 
+  float32_t * pErr, 
+  uint32_t blockSize) 
+{ 
+  float32_t *pState = S->pState;                 /* State pointer */ 
+  float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */ 
+  float32_t *pStateCurnt;                        /* Points to the current sample of the state */ 
+  float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */ 
+  float32_t mu = S->mu;                          /* Adaptive factor */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */ 
+  uint32_t tapCnt, blkCnt;                       /* Loop counters */ 
+  float32_t sum, e, d;                           /* accumulator, error, reference data sample */ 
+  float32_t w = 0.0f;                            /* weight factor */ 
+ 
+  e = 0.0f; 
+  d = 0.0f; 
+ 
+  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = &(S->pState[(numTaps - 1u)]); 
+ 
+  blkCnt = blockSize; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy the new input sample into the state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Initialize pState pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = (pCoeffs); 
+ 
+    /* Set the accumulator to zero */ 
+    sum = 0.0f; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      sum += (*px++) * (*pb++); 
+      sum += (*px++) * (*pb++); 
+      sum += (*px++) * (*pb++); 
+      sum += (*px++) * (*pb++); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      sum += (*px++) * (*pb++); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* The result in the accumulator, store in the destination buffer. */ 
+    *pOut++ = sum; 
+ 
+    /* Compute and store error */ 
+    d = (float32_t) (*pRef++); 
+    e = d - sum; 
+    *pErr++ = e; 
+ 
+    /* Calculation of Weighting factor for the updating filter coefficients */ 
+    w = e * mu; 
+ 
+    /* Initialize pState pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = (pCoeffs); 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    /* Update filter coefficients */ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      *pb = *pb + (w * (*px++)); 
+      pb++; 
+ 
+      *pb = *pb + (w * (*px++)); 
+      pb++; 
+ 
+      *pb = *pb + (w * (*px++)); 
+      pb++; 
+ 
+      *pb = *pb + (w * (*px++)); 
+      pb++; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      *pb = *pb + (w * (*px++)); 
+      pb++; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Advance state pointer by 1 for the next sample */ 
+    pState = pState + 1; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+ 
+  /* Processing is complete. Now copy the last numTaps - 1 samples to the  
+     satrt of the state buffer. This prepares the state buffer for the  
+     next function call. */ 
+ 
+  /* Points to the start of the pState buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  /* Loop unrolling for (numTaps - 1u) samples copy */ 
+  tapCnt = (numTaps - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  /* Calculate remaining number of copies */ 
+  tapCnt = (numTaps - 1u) % 0x4u; 
+ 
+  /* Copy the remaining q31_t data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+} 
+ 
+/**  
+   * @} end of LMS group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,84 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_lms_init_f32.c  
+*  
+* Description:  Floating-point LMS filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @addtogroup LMS  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief Initialization function for floating-point LMS filter.  
+   * @param[in] *S points to an instance of the floating-point LMS filter structure.  
+   * @param[in] numTaps  number of filter coefficients.  
+   * @param[in] *pCoeffs points to the coefficient buffer.  
+   * @param[in] *pState points to state buffer.  
+   * @param[in] mu step size that controls filter coefficient updates.  
+   * @param[in] blockSize number of samples to process.  
+   * @return none.  
+   */ 
+ 
+/**  
+ * \par Description:  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * The initial filter coefficients serve as a starting point for the adaptive filter.  
+ * <code>pState</code> points to an array of length <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_lms_f32()</code>.  
+ */ 
+ 
+void arm_lms_init_f32( 
+  arm_lms_instance_f32 * S, 
+  uint16_t numTaps, 
+  float32_t * pCoeffs, 
+  float32_t * pState, 
+  float32_t mu, 
+  uint32_t blockSize) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always blockSize + numTaps */ 
+  memset(pState, 0, (numTaps + (blockSize - 1)) * sizeof(float32_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+  /* Assign Step size value */ 
+  S->mu = mu; 
+} 
+ 
+/**  
+ * @} end of LMS group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,94 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_lms_init_q15.c  
+*  
+* Description:  Q15 LMS filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup LMS  
+ * @{  
+ */ 
+ 
+/**  
+* @brief Initialization function for the Q15 LMS filter.  
+* @param[in] *S points to an instance of the Q15 LMS filter structure.  
+* @param[in] numTaps  number of filter coefficients.  
+* @param[in] *pCoeffs points to the coefficient buffer.  
+* @param[in] *pState points to the state buffer.  
+* @param[in] mu step size that controls filter coefficient updates.  
+* @param[in] blockSize number of samples to process.  
+* @param[in] postShift bit shift applied to coefficients.  
+* @return    none.  
+*  
+* \par Description:  
+* <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+* <pre>  
+*    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+* </pre>  
+* The initial filter coefficients serve as a starting point for the adaptive filter.  
+* <code>pState</code> points to the array of state variables and size of array is  
+* <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of  
+* input samples processed by each call to <code>arm_lms_q15()</code>.  
+*/ 
+ 
+void arm_lms_init_q15( 
+  arm_lms_instance_q15 * S, 
+  uint16_t numTaps, 
+  q15_t * pCoeffs, 
+  q15_t * pState, 
+  q15_t mu, 
+  uint32_t blockSize, 
+  uint32_t postShift) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always blockSize + numTaps - 1 */ 
+  memset(pState, 0, (numTaps + (blockSize - 1u)) * sizeof(q15_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+  /* Assign Step size value */ 
+  S->mu = mu; 
+ 
+  /* Assign postShift value to be applied */ 
+  S->postShift = postShift; 
+ 
+} 
+ 
+/**  
+ * @} end of LMS group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,94 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_lms_init_q31.c  
+*  
+* Description:  Q31 LMS filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup LMS  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief Initialization function for Q31 LMS filter.  
+   * @param[in] *S points to an instance of the Q31 LMS filter structure.  
+   * @param[in] numTaps  number of filter coefficients.  
+   * @param[in] *pCoeffs points to coefficient buffer.  
+   * @param[in] *pState points to state buffer.  
+   * @param[in] mu step size that controls filter coefficient updates.  
+   * @param[in] blockSize number of samples to process.  
+   * @param[in] postShift bit shift applied to coefficients.  
+   * @return none.  
+ *  
+ * \par Description:  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * The initial filter coefficients serve as a starting point for the adaptive filter.  
+ * <code>pState</code> points to an array of length <code>numTaps+blockSize-1</code>samples,  
+ * where <code>blockSize</code> is the number of input samples processed by each call to  
+ * <code>arm_lms_q31()</code>.  
+ */ 
+ 
+void arm_lms_init_q31( 
+  arm_lms_instance_q31 * S, 
+  uint16_t numTaps, 
+  q31_t * pCoeffs, 
+  q31_t * pState, 
+  q31_t mu, 
+  uint32_t blockSize, 
+  uint32_t postShift) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always blockSize + numTaps - 1 */ 
+  memset(pState, 0, ((uint32_t) numTaps + (blockSize - 1u)) * sizeof(q31_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+  /* Assign Step size value */ 
+  S->mu = mu; 
+ 
+  /* Assign postShift value to be applied */ 
+  S->postShift = postShift; 
+ 
+} 
+ 
+/**  
+ * @} end of LMS group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,344 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_lms_norm_f32.c  
+*  
+* Description:	Processing function for the floating-point Normalised LMS.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @defgroup LMS_NORM Normalized LMS Filters  
+ *  
+ * This set of functions implements a commonly used adaptive filter.  
+ * It is related to the Least Mean Square (LMS) adaptive filter and includes an additional normalization  
+ * factor which increases the adaptation rate of the filter.  
+ * The CMSIS DSP Library contains normalized LMS filter functions that operate on Q15, Q31, and floating-point data types.  
+ *  
+ * A normalized least mean square (NLMS) filter consists of two components as shown below.  
+ * The first component is a standard transversal or FIR filter.  
+ * The second component is a coefficient update mechanism.  
+ * The NLMS filter has two input signals.  
+ * The "input" feeds the FIR filter while the "reference input" corresponds to the desired output of the FIR filter.  
+ * That is, the FIR filter coefficients are updated so that the output of the FIR filter matches the reference input.  
+ * The filter coefficient update mechanism is based on the difference between the FIR filter output and the reference input.  
+ * This "error signal" tends towards zero as the filter adapts.  
+ * The NLMS processing functions accept the input and reference input signals and generate the filter output and error signal.  
+ * \image html LMS.gif "Internal structure of the NLMS adaptive filter"  
+ *  
+ * The functions operate on blocks of data and each call to the function processes  
+ * <code>blockSize</code> samples through the filter.  
+ * <code>pSrc</code> points to input signal, <code>pRef</code> points to reference signal,  
+ * <code>pOut</code> points to output signal and <code>pErr</code> points to error signal.  
+ * All arrays contain <code>blockSize</code> values.  
+ *  
+ * The API functions operate on a block-by-block basis.  
+ * Internally, the filter coefficients <code>b[n]</code> are updated on a sample-by-sample basis.  
+ * The convergence of the LMS filter is slower compared to the normalized LMS algorithm.  
+ *  
+ * \par Algorithm:  
+ * The output signal <code>y[n]</code> is computed by a standard FIR filter:  
+ * <pre>  
+ *     y[n] = b[0] * x[n] + b[1] * x[n-1] + b[2] * x[n-2] + ...+ b[numTaps-1] * x[n-numTaps+1]  
+ * </pre>  
+ *  
+ * \par  
+ * The error signal equals the difference between the reference signal <code>d[n]</code> and the filter output:  
+ * <pre>  
+ *     e[n] = d[n] - y[n].  
+ * </pre>  
+ *  
+ * \par  
+ * After each sample of the error signal is computed the instanteous energy of the filter state variables is calculated:  
+ * <pre>  
+ *    E = x[n]^2 + x[n-1]^2 + ... + x[n-numTaps+1]^2.  
+ * </pre>  
+ * The filter coefficients <code>b[k]</code> are then updated on a sample-by-sample basis:  
+ * <pre>  
+ *     b[k] = b[k] + e[n] * (mu/E) * x[n-k],  for k=0, 1, ..., numTaps-1  
+ * </pre>  
+ * where <code>mu</code> is the step size and controls the rate of coefficient convergence.  
+ *\par  
+ * In the APIs, <code>pCoeffs</code> points to a coefficient array of size <code>numTaps</code>.  
+ * Coefficients are stored in time reversed order.  
+ * \par  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * \par  
+ * <code>pState</code> points to a state array of size <code>numTaps + blockSize - 1</code>.  
+ * Samples in the state buffer are stored in the order:  
+ * \par  
+ * <pre>  
+ *    {x[n-numTaps+1], x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2]....x[0], x[1], ..., x[blockSize-1]}  
+ * </pre>  
+ * \par  
+ * Note that the length of the state buffer exceeds the length of the coefficient array by <code>blockSize-1</code> samples.  
+ * The increased state buffer length allows circular addressing, which is traditionally used in FIR filters,  
+ * to be avoided and yields a significant speed improvement.  
+ * The state variables are updated after each block of data is processed.  
+ * \par Instance Structure  
+ * The coefficients and state variables for a filter are stored together in an instance data structure.  
+ * A separate instance structure must be defined for each filter and  
+ * coefficient and state arrays cannot be shared among instances.  
+ * There are separate instance structure declarations for each of the 3 supported data types.  
+ *  
+ * \par Initialization Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Zeros out the values in the state buffer.  
+ * \par  
+ * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.  
+ * \par Fixed-Point Behavior:  
+ * Care must be taken when using the Q15 and Q31 versions of the normalised LMS filter.  
+ * The following issues must be considered:  
+ * - Scaling of coefficients  
+ * - Overflow and saturation  
+ *  
+ * \par Scaling of Coefficients:  
+ * Filter coefficients are represented as fractional values and  
+ * coefficients are restricted to lie in the range <code>[-1 +1)</code>.  
+ * The fixed-point functions have an additional scaling parameter <code>postShift</code>.  
+ * At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.  
+ * This essentially scales the filter coefficients by <code>2^postShift</code> and  
+ * allows the filter coefficients to exceed the range <code>[+1 -1)</code>.  
+ * The value of <code>postShift</code> is set by the user based on the expected gain through the system being modeled.  
+ *  
+ * \par Overflow and Saturation:  
+ * Overflow and saturation behavior of the fixed-point Q15 and Q31 versions are  
+ * described separately as part of the function specific documentation below.  
+ */ 
+ 
+ 
+/**  
+ * @addtogroup LMS_NORM  
+ * @{  
+ */ 
+ 
+ 
+  /**  
+   * @brief Processing function for floating-point normalized LMS filter.  
+   * @param[in] *S points to an instance of the floating-point normalized LMS filter structure.  
+   * @param[in] *pSrc points to the block of input data.  
+   * @param[in] *pRef points to the block of reference data.  
+   * @param[out] *pOut points to the block of output data.  
+   * @param[out] *pErr points to the block of error data.  
+   * @param[in] blockSize number of samples to process.  
+   * @return none.  
+   */ 
+ 
+void arm_lms_norm_f32( 
+  arm_lms_norm_instance_f32 * S, 
+  float32_t * pSrc, 
+  float32_t * pRef, 
+  float32_t * pOut, 
+  float32_t * pErr, 
+  uint32_t blockSize) 
+{ 
+  float32_t *pState = S->pState;                 /* State pointer */ 
+  float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */ 
+  float32_t *pStateCurnt;                        /* Points to the current sample of the state */ 
+  float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */ 
+  float32_t mu = S->mu;                          /* Adaptive factor */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */ 
+  uint32_t tapCnt, blkCnt;                       /* Loop counters */ 
+  float32_t energy;                              /* Energy of the input */ 
+  float32_t sum, e, d;                           /* accumulator, error, reference data sample */ 
+  float32_t w, x0, in;                           /* weight factor, temporary variable to hold input sample and state */ 
+ 
+  /* Initializations of error,  difference, Coefficient update */ 
+  e = 0.0f; 
+  d = 0.0f; 
+  w = 0.0f; 
+ 
+  energy = S->energy; 
+  x0 = S->x0; 
+ 
+  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = &(S->pState[(numTaps - 1u)]); 
+ 
+  blkCnt = blockSize; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy the new input sample into the state buffer */ 
+    *pStateCurnt++ = *pSrc; 
+ 
+    /* Initialize pState pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = (pCoeffs); 
+ 
+    /* Read the sample from input buffer */ 
+    in = *pSrc++; 
+ 
+    /* Update the energy calculation */ 
+    energy -= x0 * x0; 
+    energy += in * in; 
+ 
+    /* Set the accumulator to zero */ 
+    sum = 0.0f; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      sum += (*px++) * (*pb++); 
+      sum += (*px++) * (*pb++); 
+      sum += (*px++) * (*pb++); 
+      sum += (*px++) * (*pb++); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      sum += (*px++) * (*pb++); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* The result in the accumulator, store in the destination buffer. */ 
+    *pOut++ = sum; 
+ 
+    /* Compute and store error */ 
+    d = (float32_t) (*pRef++); 
+    e = d - sum; 
+    *pErr++ = e; 
+ 
+    /* Calculation of Weighting factor for updating filter coefficients */ 
+    /* epsilon value 0.000000119209289f */ 
+    w = (e * mu) / (energy + 0.000000119209289f); 
+ 
+    /* Initialize pState pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = (pCoeffs); 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    /* Update filter coefficients */ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      *pb += w * (*px++); 
+      pb++; 
+ 
+      *pb += w * (*px++); 
+      pb++; 
+ 
+      *pb += w * (*px++); 
+      pb++; 
+ 
+      *pb += w * (*px++); 
+      pb++; 
+ 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      *pb += w * (*px++); 
+      pb++; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    x0 = *pState; 
+ 
+    /* Advance state pointer by 1 for the next sample */ 
+    pState = pState + 1; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  S->energy = energy; 
+  S->x0 = x0; 
+ 
+  /* Processing is complete. Now copy the last numTaps - 1 samples to the  
+     satrt of the state buffer. This prepares the state buffer for the  
+     next function call. */ 
+ 
+  /* Points to the start of the pState buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  /* Loop unrolling for (numTaps - 1u)/4 samples copy */ 
+  tapCnt = (numTaps - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  /* Calculate remaining number of copies */ 
+  tapCnt = (numTaps - 1u) % 0x4u; 
+ 
+  /* Copy the remaining q31_t data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+ 
+} 
+ 
+/**  
+   * @} end of LMS_NORM group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,94 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_lms_norm_init_f32.c  
+*  
+* Description:  Floating-point NLMS filter initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup LMS_NORM  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief Initialization function for floating-point normalized LMS filter.  
+   * @param[in] *S points to an instance of the floating-point LMS filter structure.  
+   * @param[in] numTaps  number of filter coefficients.  
+   * @param[in] *pCoeffs points to coefficient buffer.  
+   * @param[in] *pState points to state buffer.  
+   * @param[in] mu step size that controls filter coefficient updates.  
+   * @param[in] blockSize number of samples to process.  
+   * @return none.  
+   *  
+ * \par Description:  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * The initial filter coefficients serve as a starting point for the adaptive filter.  
+ * <code>pState</code> points to an array of length <code>numTaps+blockSize-1</code> samples,  
+ * where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_lms_norm_f32()</code>.  
+ */ 
+ 
+void arm_lms_norm_init_f32( 
+  arm_lms_norm_instance_f32 * S, 
+  uint16_t numTaps, 
+  float32_t * pCoeffs, 
+  float32_t * pState, 
+  float32_t mu, 
+  uint32_t blockSize) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always blockSize + numTaps - 1 */ 
+  memset(pState, 0, (numTaps + (blockSize - 1u)) * sizeof(float32_t)); 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+  /* Assign Step size value */ 
+  S->mu = mu; 
+ 
+  /* Initialise Energy to zero */ 
+  S->energy = 0.0f; 
+ 
+  /* Initialise x0 to zero */ 
+  S->x0 = 0.0f; 
+ 
+} 
+ 
+/**  
+ * @} end of LMS_NORM group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,101 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_lms_norm_init_q15.c  
+*  
+* Description:  Q15 NLMS initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+#include "arm_common_tables.h" 
+ 
+/**  
+ * @addtogroup LMS_NORM  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief Initialization function for Q15 normalized LMS filter.  
+   * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.  
+   * @param[in] numTaps  number of filter coefficients.  
+   * @param[in] *pCoeffs points to coefficient buffer.  
+   * @param[in] *pState points to state buffer.  
+   * @param[in] mu step size that controls filter coefficient updates.  
+   * @param[in] blockSize number of samples to process.  
+   * @param[in] postShift bit shift applied to coefficients.  
+   * @return none.  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * The initial filter coefficients serve as a starting point for the adaptive filter.  
+ * <code>pState</code> points to the array of state variables and size of array is  
+ * <code>numTaps+blockSize-1</code> samples, where <code>blockSize</code> is the number of input samples processed  
+ * by each call to <code>arm_lms_norm_q15()</code>.  
+ */ 
+ 
+void arm_lms_norm_init_q15( 
+  arm_lms_norm_instance_q15 * S, 
+  uint16_t numTaps, 
+  q15_t * pCoeffs, 
+  q15_t * pState, 
+  q15_t mu, 
+  uint32_t blockSize, 
+  uint8_t postShift) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always blockSize + numTaps - 1 */ 
+  memset(pState, 0, (numTaps + (blockSize - 1u)) * sizeof(q15_t)); 
+ 
+  /* Assign post Shift value applied to coefficients */ 
+  S->postShift = postShift; 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+  /* Assign Step size value */ 
+  S->mu = mu; 
+ 
+  /* Initialize reciprocal pointer table */ 
+  S->recipTable = (q15_t*)armRecipTableQ15; 
+ 
+  /* Initialise Energy to zero */ 
+  S->energy = 0; 
+ 
+  /* Initialise x0 to zero */ 
+  S->x0 = 0; 
+ 
+} 
+ 
+/**  
+ * @} end of LMS_NORM group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,100 @@
+/*-----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_lms_norm_init_q31.c  
+*  
+* Description:  Q31 NLMS initialization function.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------*/ 
+ 
+#include "arm_math.h" 
+#include "arm_common_tables.h" 
+ 
+/**  
+ * @addtogroup LMS_NORM  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief Initialization function for Q31 normalized LMS filter.  
+   * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.  
+   * @param[in] numTaps  number of filter coefficients.  
+   * @param[in] *pCoeffs points to coefficient buffer.  
+   * @param[in] *pState points to state buffer.  
+   * @param[in] mu step size that controls filter coefficient updates.  
+   * @param[in] blockSize number of samples to process.  
+   * @param[in] postShift bit shift applied to coefficients.  
+   * @return none.  
+ *  
+ * <b>Description:</b>  
+ * \par  
+ * <code>pCoeffs</code> points to the array of filter coefficients stored in time reversed order:  
+ * <pre>  
+ *    {b[numTaps-1], b[numTaps-2], b[N-2], ..., b[1], b[0]}  
+ * </pre>  
+ * The initial filter coefficients serve as a starting point for the adaptive filter.  
+ * <code>pState</code> points to an array of length <code>numTaps+blockSize-1</code> samples,  
+ * where <code>blockSize</code> is the number of input samples processed by each call to <code>arm_lms_norm_q31()</code>.  
+ */ 
+ 
+void arm_lms_norm_init_q31( 
+  arm_lms_norm_instance_q31 * S, 
+  uint16_t numTaps, 
+  q31_t * pCoeffs, 
+  q31_t * pState, 
+  q31_t mu, 
+  uint32_t blockSize, 
+  uint8_t postShift) 
+{ 
+  /* Assign filter taps */ 
+  S->numTaps = numTaps; 
+ 
+  /* Assign coefficient pointer */ 
+  S->pCoeffs = pCoeffs; 
+ 
+  /* Clear state buffer and size is always blockSize + numTaps - 1  */ 
+  memset(pState, 0, (numTaps + (blockSize - 1u)) * sizeof(q31_t)); 
+ 
+  /* Assign post Shift value applied to coefficients */ 
+  S->postShift = postShift; 
+ 
+  /* Assign state pointer */ 
+  S->pState = pState; 
+ 
+  /* Assign Step size value */ 
+  S->mu = mu; 
+ 
+  /* Initialize reciprocal pointer table */ 
+  S->recipTable = (q31_t*)armRecipTableQ31; 
+ 
+  /* Initialise Energy to zero */ 
+  S->energy = 0; 
+ 
+  /* Initialise x0 to zero */ 
+  S->x0 = 0; 
+ 
+} 
+ 
+/**  
+ * @} end of LMS_NORM group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,260 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_lms_norm_q15.c  
+*  
+* Description:	Q15 NLMS filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup LMS_NORM  
+ * @{  
+ */ 
+ 
+/**  
+* @brief Processing function for Q15 normalized LMS filter.  
+* @param[in] *S points to an instance of the Q15 normalized LMS filter structure.  
+* @param[in] *pSrc points to the block of input data.  
+* @param[in] *pRef points to the block of reference data.  
+* @param[out] *pOut points to the block of output data.  
+* @param[out] *pErr points to the block of error data.  
+* @param[in] blockSize number of samples to process.  
+* @return none.  
+*  
+* <b>Scaling and Overflow Behavior:</b>  
+* \par  
+* The function is implemented using a 64-bit internal accumulator.  
+* Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.  
+* The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.  
+* There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
+* After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.  
+* Lastly, the accumulator is saturated to yield a result in 1.15 format. 
+* 
+* \par 
+* 	In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted.  
+*  
+ */ 
+ 
+void arm_lms_norm_q15( 
+  arm_lms_norm_instance_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pRef, 
+  q15_t * pOut, 
+  q15_t * pErr, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pState = S->pState;                     /* State pointer */ 
+  q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q15_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q15_t *px, *pb;                                /* Temporary pointers for state and coefficient buffers */ 
+  q15_t mu = S->mu;                              /* Adaptive factor */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */ 
+  uint32_t tapCnt, blkCnt;                       /* Loop counters */ 
+  q31_t energy;                                  /* Energy of the input */ 
+  q63_t acc;                                     /* Accumulator */ 
+  q15_t e = 0, d = 0;                            /* error, reference data sample */ 
+  q15_t w = 0, in;                               /* weight factor and state */ 
+  q15_t x0;                                      /* temporary variable to hold input sample */ 
+  uint32_t shift = (uint32_t) S->postShift + 1u; /* Shift to be applied to the output */ 
+  q15_t errorXmu, oneByEnergy;                   /* Temporary variables to store error and mu product and reciprocal of energy */ 
+  q15_t postShift;                               /* Post shift to be applied to weight after reciprocal calculation */ 
+  q31_t coef;                                    /* Teporary variable for coefficient */ 
+ 
+  energy = S->energy; 
+  x0 = S->x0; 
+ 
+  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = &(S->pState[(numTaps - 1u)]); 
+ 
+  blkCnt = blockSize; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy the new input sample into the state buffer */ 
+    *pStateCurnt++ = *pSrc; 
+ 
+    /* Initialize pState pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = (pCoeffs); 
+ 
+    /* Read the sample from input buffer */ 
+    in = *pSrc++; 
+ 
+    /* Update the energy calculation */ 
+    energy -= (((q31_t) x0 * (x0)) >> 15); 
+    energy += (((q31_t) in * (in)) >> 15); 
+ 
+    /* Set the accumulator to zero */ 
+    acc = 0; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    while(tapCnt > 0u) 
+    { 
+ 
+      /* Perform the multiply-accumulate */ 
+      acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 
+      acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      acc += (((q31_t) * px++ * (*pb++))); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Converting the result to 1.15 format */ 
+    acc = __SSAT((acc >> (16u - shift)), 16u); 
+ 
+    /* Store the result from accumulator into the destination buffer. */ 
+    *pOut++ = (q15_t) acc; 
+ 
+    /* Compute and store error */ 
+    d = *pRef++; 
+    e = d - (q15_t) acc; 
+    *pErr++ = e; 
+ 
+    /* Calculation of 1/energy */ 
+    postShift = arm_recip_q15((q15_t) energy + DELTA_Q15, 
+                              &oneByEnergy, S->recipTable); 
+ 
+    /* Calculation of e * mu value */ 
+    errorXmu = (q15_t) (((q31_t) e * mu) >> 15); 
+ 
+    /* Calculation of (e * mu) * (1/energy) value */ 
+    acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift)); 
+ 
+    /* Weighting factor for the normalized version */ 
+    w = (q15_t) __SSAT((q31_t) acc, 16); 
+ 
+    /* Initialize pState pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = (pCoeffs); 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    /* Update filter coefficients */ 
+    while(tapCnt > 0u) 
+    { 
+      coef = *pb + (((q31_t) w * (*px++)) >> 15); 
+      *pb++ = (q15_t) __SSAT((coef), 16); 
+      coef = *pb + (((q31_t) w * (*px++)) >> 15); 
+      *pb++ = (q15_t) __SSAT((coef), 16); 
+      coef = *pb + (((q31_t) w * (*px++)) >> 15); 
+      *pb++ = (q15_t) __SSAT((coef), 16); 
+      coef = *pb + (((q31_t) w * (*px++)) >> 15); 
+      *pb++ = (q15_t) __SSAT((coef), 16); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      coef = *pb + (((q31_t) w * (*px++)) >> 15); 
+      *pb++ = (q15_t) __SSAT((coef), 16); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Read the sample from state buffer */ 
+    x0 = *pState; 
+ 
+    /* Advance state pointer by 1 for the next sample */ 
+    pState = pState + 1u; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Save energy and x0 values for the next frame */ 
+  S->energy = (q15_t) energy; 
+  S->x0 = x0; 
+ 
+  /* Processing is complete. Now copy the last numTaps - 1 samples to the  
+     satrt of the state buffer. This prepares the state buffer for the  
+     next function call. */ 
+ 
+  /* Points to the start of the pState buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  /* Calculation of count for copying integer writes */ 
+  tapCnt = (numTaps - 1u) >> 2; 
+ 
+  while(tapCnt > 0u) 
+  { 
+ 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+ 
+    tapCnt--; 
+ 
+  } 
+ 
+  /* Calculation of count for remaining q15_t data */ 
+  tapCnt = (numTaps - 1u) % 0x4u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+ 
+} 
+ 
+ 
+/**  
+   * @} end of LMS_NORM group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_norm_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,274 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_lms_norm_q31.c  
+*  
+* Description:	Processing function for the Q31 NLMS filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup LMS_NORM  
+ * @{  
+ */ 
+ 
+/**  
+* @brief Processing function for Q31 normalized LMS filter.  
+* @param[in] *S points to an instance of the Q31 normalized LMS filter structure.  
+* @param[in] *pSrc points to the block of input data.  
+* @param[in] *pRef points to the block of reference data.  
+* @param[out] *pOut points to the block of output data.  
+* @param[out] *pErr points to the block of error data.  
+* @param[in] blockSize number of samples to process.  
+* @return none.  
+*  
+* <b>Scaling and Overflow Behavior:</b>  
+* \par  
+* The function is implemented using an internal 64-bit accumulator.  
+* The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+* Thus, if the accumulator result overflows it wraps around rather than clip.  
+* In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits.  
+* The reference signal should not be scaled down.  
+* After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.  
+* The output signal and error signal are in 1.31 format.  
+* 
+* \par 
+* 	In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted. 
+*  
+*/ 
+ 
+void arm_lms_norm_q31( 
+  arm_lms_norm_instance_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pRef, 
+  q31_t * pOut, 
+  q31_t * pErr, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pState = S->pState;                     /* State pointer */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q31_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q31_t *px, *pb;                                /* Temporary pointers for state and coefficient buffers */ 
+  q31_t mu = S->mu;                              /* Adaptive factor */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */ 
+  uint32_t tapCnt, blkCnt;                       /* Loop counters */ 
+  q63_t energy;                                  /* Energy of the input */ 
+  q63_t acc;                                     /* Accumulator */ 
+  q31_t e = 0, d = 0;                            /* error, reference data sample */ 
+  q31_t w = 0, in;                               /* weight factor and state */ 
+  q31_t x0;                                      /* temporary variable to hold input sample */ 
+  uint32_t shift = 32u - ((uint32_t) S->postShift + 1u);        /* Shift to be applied to the output */ 
+  q31_t errorXmu, oneByEnergy;                   /* Temporary variables to store error and mu product and reciprocal of energy */ 
+  q31_t postShift;                               /* Post shift to be applied to weight after reciprocal calculation */ 
+  q31_t coef;                                    /* Temporary variable for coef */ 
+ 
+  energy = S->energy; 
+  x0 = S->x0; 
+ 
+  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = &(S->pState[(numTaps - 1u)]); 
+ 
+  blkCnt = blockSize; 
+ 
+  while(blkCnt > 0u) 
+  { 
+ 
+    /* Copy the new input sample into the state buffer */ 
+    *pStateCurnt++ = *pSrc; 
+ 
+    /* Initialize pState pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = (pCoeffs); 
+ 
+    /* Read the sample from input buffer */ 
+    in = *pSrc++; 
+ 
+    /* Update the energy calculation */ 
+    energy = (q31_t) ((((q63_t) energy << 32) - 
+                       (((q63_t) x0 * x0) << 1)) >> 32); 
+    energy = (q31_t) (((((q63_t) in * in) << 1) + (energy << 32)) >> 32); 
+ 
+    /* Set the accumulator to zero */ 
+    acc = 0; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      acc += ((q63_t) (*px++)) * (*pb++); 
+      acc += ((q63_t) (*px++)) * (*pb++); 
+      acc += ((q63_t) (*px++)) * (*pb++); 
+      acc += ((q63_t) (*px++)) * (*pb++); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      acc += ((q63_t) (*px++)) * (*pb++); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Converting the result to 1.31 format */ 
+    acc = (q31_t) (acc >> shift); 
+ 
+    /* Store the result from accumulator into the destination buffer. */ 
+    *pOut++ = (q31_t) acc; 
+ 
+    /* Compute and store error */ 
+    d = *pRef++; 
+    e = d - (q31_t) acc; 
+    *pErr++ = e; 
+ 
+    /* Calculates the reciprocal of energy */ 
+    postShift = arm_recip_q31(energy + DELTA_Q31, 
+                              &oneByEnergy, &S->recipTable[0]); 
+ 
+    /* Calculation of product of (e * mu) */ 
+    errorXmu = (q31_t) (((q63_t) e * mu) >> 31); 
+ 
+    /* Weighting factor for the normalized version */ 
+    w = clip_q63_to_q31(((q63_t) errorXmu * oneByEnergy) >> (31 - postShift)); 
+ 
+    /* Initialize pState pointer */ 
+    px = pState; 
+ 
+    /* Initialize coeff pointer */ 
+    pb = (pCoeffs); 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    /* Update filter coefficients */ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+ 
+      /* coef is in 2.30 format */ 
+      coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 
+      /* get coef in 1.31 format by left shifting */ 
+      *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
+      /* update coefficient buffer to next coefficient */ 
+      pb++; 
+ 
+      coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 
+      *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
+      pb++; 
+ 
+      coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 
+      *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
+      pb++; 
+ 
+      coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 
+      *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
+      pb++; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      coef = (q31_t) (((q63_t) w * (*px++)) >> (32)); 
+      *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
+      pb++; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Read the sample from state buffer */ 
+    x0 = *pState; 
+ 
+    /* Advance state pointer by 1 for the next sample */ 
+    pState = pState + 1; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Save energy and x0 values for the next frame */ 
+  S->energy = (q31_t) energy; 
+  S->x0 = x0; 
+ 
+  /* Processing is complete. Now copy the last numTaps - 1 samples to the  
+     satrt of the state buffer. This prepares the state buffer for the  
+     next function call. */ 
+ 
+  /* Points to the start of the pState buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  /* Loop unrolling for (numTaps - 1u) samples copy */ 
+  tapCnt = (numTaps - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  /* Calculate remaining number of copies */ 
+  tapCnt = (numTaps - 1u) % 0x4u; 
+ 
+  /* Copy the remaining q31_t data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of LMS_NORM group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,226 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_lms_q15.c  
+*  
+* Description:	Processing function for the Q15 LMS filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup LMS  
+ * @{  
+ */ 
+ 
+ /**  
+ * @brief Processing function for Q15 LMS filter.  
+ * @param[in] *S points to an instance of the Q15 LMS filter structure.  
+ * @param[in] *pSrc points to the block of input data.  
+ * @param[in] *pRef points to the block of reference data.  
+ * @param[out] *pOut points to the block of output data.  
+ * @param[out] *pErr points to the block of error data.  
+ * @param[in] blockSize number of samples to process.  
+ * @return none.  
+ *  
+ * \par Scaling and Overflow Behavior:  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.  
+ * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.  
+ * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.  
+ * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.  
+ * Lastly, the accumulator is saturated to yield a result in 1.15 format.  
+ * 
+ * \par 
+ * 	In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted. 
+ *  
+ */ 
+ 
+void arm_lms_q15( 
+  const arm_lms_instance_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pRef, 
+  q15_t * pOut, 
+  q15_t * pErr, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pState = S->pState;                     /* State pointer */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */ 
+  q15_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q15_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q15_t mu = S->mu;                              /* Adaptive factor */ 
+  q15_t *px;                                     /* Temporary pointer for state */ 
+  q15_t *pb;                                     /* Temporary pointer for coefficient buffer */ 
+  uint32_t tapCnt, blkCnt;                       /* Loop counters */ 
+  q63_t acc;                                     /* Accumulator */ 
+  q15_t e = 0;                                   /* error of data sample */ 
+  q15_t alpha;                                   /* Intermediate constant for taps update */ 
+  uint32_t shift = S->postShift + 1u;            /* Shift to be applied to the output */ 
+  q31_t coef;                                    /* Teporary variable for coefficient */ 
+ 
+  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = &(S->pState[(numTaps - 1u)]); 
+ 
+  /* Initializing blkCnt with blockSize */ 
+  blkCnt = blockSize; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy the new input sample into the state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize coefficient pointer */ 
+    pb = pCoeffs; 
+ 
+    /* Set the accumulator to zero */ 
+    acc = 0; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* acc +=  b[N] * x[n-N] + b[N-1] * x[n-N-1] */ 
+      /* Perform the multiply-accumulate */ 
+      acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 
+      acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      acc += (q63_t) (((q31_t) (*px++) * (*pb++))); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Converting the result to 1.15 format and saturate the output */ 
+    acc = __SSAT((acc >> (16 - shift)), 16); 
+ 
+    /* Store the result from accumulator into the destination buffer. */ 
+    *pOut++ = (q15_t) acc; 
+ 
+    /* Compute and store error */ 
+    e = *pRef++ - (q15_t) acc; 
+ 
+    *pErr++ = (q15_t) e; 
+ 
+    /* Compute alpha i.e. intermediate constant for taps update */ 
+    alpha = (q15_t) (((q31_t) e * (mu)) >> 15); 
+ 
+    /* Initialize state pointer */ 
+    /* Advance state pointer by 1 for the next sample */ 
+    px = pState++; 
+ 
+    /* Initialize coefficient pointer */ 
+    pb = pCoeffs; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2u; 
+ 
+    /* Update filter coefficients */ 
+    while(tapCnt > 0u) 
+    { 
+      coef = (q31_t) *pb + (((q31_t) alpha * (*px++)) >> 15); 
+      *pb++ = (q15_t) __SSAT((coef), 16); 
+      coef = (q31_t) *pb + (((q31_t) alpha * (*px++)) >> 15); 
+      *pb++ = (q15_t) __SSAT((coef), 16); 
+      coef = (q31_t) *pb + (((q31_t) alpha * (*px++)) >> 15); 
+      *pb++ = (q15_t) __SSAT((coef), 16); 
+      coef = (q31_t) *pb + (((q31_t) alpha * (*px++)) >> 15); 
+      *pb++ = (q15_t) __SSAT((coef), 16); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      coef = (q31_t) *pb + (((q31_t) alpha * (*px++)) >> 15); 
+      *pb++ = (q15_t) __SSAT((coef), 16); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+ 
+  } 
+ 
+  /* Processing is complete. Now copy the last numTaps - 1 samples to the  
+     satrt of the state buffer. This prepares the state buffer for the  
+     next function call. */ 
+ 
+  /* Points to the start of the pState buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  /* Calculation of count for copying integer writes */ 
+  tapCnt = (numTaps - 1u) >> 2; 
+ 
+  while(tapCnt > 0u) 
+  { 
+ 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+    *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 
+ 
+    tapCnt--; 
+ 
+  } 
+ 
+  /* Calculation of count for remaining q15_t data */ 
+  tapCnt = (numTaps - 1u) % 0x4u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+   * @} end of LMS group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/FilteringFunctions/arm_lms_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,246 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_lms_q31.c  
+*  
+* Description:	Processing function for the Q31 LMS filter.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+/**  
+ * @ingroup groupFilters  
+ */ 
+ 
+/**  
+ * @addtogroup LMS  
+ * @{  
+ */ 
+ 
+ /**  
+ * @brief Processing function for Q31 LMS filter.  
+ * @param[in]  *S points to an instance of the Q15 LMS filter structure.  
+ * @param[in]  *pSrc points to the block of input data.  
+ * @param[in]  *pRef points to the block of reference data.  
+ * @param[out] *pOut points to the block of output data.  
+ * @param[out] *pErr points to the block of error data.  
+ * @param[in]  blockSize number of samples to process.  
+ * @return     none.  
+ *  
+ * \par Scaling and Overflow Behavior:  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.  
+ * Thus, if the accumulator result overflows it wraps around rather than clips.  
+ * In order to avoid overflows completely the input signal must be scaled down by log2(numTaps) bits.  
+ * The reference signal should not be scaled down.  
+ * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.  
+ * The output signal and error signal are in 1.31 format.  
+ * 
+ * \par 
+ * 	In this filter, filter coefficients are updated for each sample and the updation of filter cofficients are saturted. 
+ */ 
+ 
+void arm_lms_q31( 
+  const arm_lms_instance_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pRef, 
+  q31_t * pOut, 
+  q31_t * pErr, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pState = S->pState;                     /* State pointer */ 
+  uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */ 
+  q31_t *pCoeffs = S->pCoeffs;                   /* Coefficient pointer */ 
+  q31_t *pStateCurnt;                            /* Points to the current sample of the state */ 
+  q31_t mu = S->mu;                              /* Adaptive factor */ 
+  q31_t *px;                                     /* Temporary pointer for state */ 
+  q31_t *pb;                                     /* Temporary pointer for coefficient buffer */ 
+  uint32_t tapCnt, blkCnt;                       /* Loop counters */ 
+  q63_t acc;                                     /* Accumulator */ 
+  q31_t e = 0;                                   /* error of data sample */ 
+  q31_t alpha;                                   /* Intermediate constant for taps update */ 
+  uint8_t shift = (uint8_t) (32u - (S->postShift + 1u));        /* Shift to be applied to the output */ 
+  q31_t coef;                                    /* Temporary variable for coef */ 
+ 
+  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */ 
+  /* pStateCurnt points to the location where the new input data should be written */ 
+  pStateCurnt = &(S->pState[(numTaps - 1u)]); 
+ 
+  /* Initializing blkCnt with blockSize */ 
+  blkCnt = blockSize; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Copy the new input sample into the state buffer */ 
+    *pStateCurnt++ = *pSrc++; 
+ 
+    /* Initialize state pointer */ 
+    px = pState; 
+ 
+    /* Initialize coefficient pointer */ 
+    pb = pCoeffs; 
+ 
+    /* Set the accumulator to zero */ 
+    acc = 0; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      /* acc +=  b[N] * x[n-N] */ 
+      acc += ((q63_t) (*px++)) * (*pb++); 
+ 
+      /* acc +=  b[N-1] * x[n-N-1] */ 
+      acc += ((q63_t) (*px++)) * (*pb++); 
+ 
+      /* acc +=  b[N-2] * x[n-N-2] */ 
+      acc += ((q63_t) (*px++)) * (*pb++); 
+ 
+      /* acc +=  b[N-3] * x[n-N-3] */ 
+      acc += ((q63_t) (*px++)) * (*pb++); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      acc += ((q63_t) (*px++)) * (*pb++); 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Converting the result to 1.31 format */ 
+    /* Store the result from accumulator into the destination buffer. */ 
+    acc = (q31_t) (acc >> shift); 
+ 
+    *pOut++ = (q31_t) acc; 
+ 
+    /* Compute and store error */ 
+    e = *pRef++ - (q31_t) acc; 
+ 
+    *pErr++ = (q31_t) e; 
+ 
+    /* Compute alpha i.e. intermediate constant for taps update */ 
+    alpha = (q31_t) (((q63_t) e * mu) >> 31); 
+ 
+    /* Initialize state pointer */ 
+    /* Advance state pointer by 1 for the next sample */ 
+    px = pState++; 
+ 
+    /* Initialize coefficient pointer */ 
+    pb = pCoeffs; 
+ 
+    /* Loop unrolling.  Process 4 taps at a time. */ 
+    tapCnt = numTaps >> 2; 
+ 
+    /* Update filter coefficients */ 
+    while(tapCnt > 0u) 
+    { 
+      /* coef is in 2.30 format */ 
+      coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32)); 
+      /* get coef in 1.31 format by left shifting */ 
+      *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
+      /* update coefficient buffer to next coefficient */ 
+      pb++; 
+ 
+      coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32)); 
+      *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
+      pb++; 
+ 
+      coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32)); 
+      *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
+      pb++; 
+ 
+      coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32)); 
+      *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
+      pb++; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 
+    tapCnt = numTaps % 0x4u; 
+ 
+    while(tapCnt > 0u) 
+    { 
+      /* Perform the multiply-accumulate */ 
+      coef = (q31_t) (((q63_t) alpha * (*px++)) >> (32)); 
+      *pb = clip_q63_to_q31((q63_t) *pb + (coef << 1u)); 
+      pb++; 
+ 
+      /* Decrement the loop counter */ 
+      tapCnt--; 
+    } 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Processing is complete. Now copy the last numTaps - 1 samples to the  
+     satrt of the state buffer. This prepares the state buffer for the  
+     next function call. */ 
+ 
+  /* Points to the start of the pState buffer */ 
+  pStateCurnt = S->pState; 
+ 
+  /* Loop unrolling for (numTaps - 1u) samples copy */ 
+  tapCnt = (numTaps - 1u) >> 2u; 
+ 
+  /* copy data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+  /* Calculate remaining number of copies */ 
+  tapCnt = (numTaps - 1u) % 0x4u; 
+ 
+  /* Copy the remaining q31_t data */ 
+  while(tapCnt > 0u) 
+  { 
+    *pStateCurnt++ = *pState++; 
+ 
+    /* Decrement the loop counter */ 
+    tapCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+   * @} end of LMS group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_add_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,136 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_mat_add_f32.c  
+*  
+* Description:	Floating-point matrix addition  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @defgroup MatrixAdd Matrix Addition  
+ *  
+ * Adds two matrices.  
+ * \image html MatrixAddition.gif "Addition of two 3 x 3 matrices"  
+ *  
+ * The functions check to make sure that  
+ * <code>pSrcA</code>, <code>pSrcB</code>, and <code>pDst</code> have the same  
+ * number of rows and columns.  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixAdd  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Floating-point matrix addition.  
+ * @param[in]       *pSrcA points to the first input matrix structure  
+ * @param[in]       *pSrcB points to the second input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ */ 
+ 
+arm_status arm_mat_add_f32( 
+  const arm_matrix_instance_f32 * pSrcA, 
+  const arm_matrix_instance_f32 * pSrcB, 
+  arm_matrix_instance_f32 * pDst) 
+{ 
+  float32_t *pIn1 = pSrcA->pData;                /* input data matrix pointer A  */ 
+  float32_t *pIn2 = pSrcB->pData;                /* input data matrix pointer B  */ 
+  float32_t *pOut = pDst->pData;                 /* output data matrix pointer   */ 
+  uint32_t numSamples;                           /* total number of elements in the matrix  */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+  arm_status status;                             /* status of matrix addition */ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrcA->numRows != pSrcB->numRows) || 
+     (pSrcA->numCols != pSrcB->numCols) || 
+     (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+ 
+    /* Total number of samples in the input matrix */ 
+    numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols; 
+ 
+    /* Loop unrolling */ 
+    blkCnt = numSamples >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) + B(m,n) */ 
+      /* Add and then store the results in the destination buffer. */ 
+      *pOut++ = (*pIn1++) + (*pIn2++); 
+      *pOut++ = (*pIn1++) + (*pIn2++); 
+      *pOut++ = (*pIn1++) + (*pIn2++); 
+      *pOut++ = (*pIn1++) + (*pIn2++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = numSamples % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) + B(m,n) */ 
+      /* Add and then store the results in the destination buffer. */ 
+      *pOut++ = (*pIn1++) + (*pIn2++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+ 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixAdd group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_add_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,127 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_add_q15.c  
+*  
+* Description:	Q15 matrix addition  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixAdd  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q15 matrix addition.  
+ * @param[in]       *pSrcA points to the first input matrix structure  
+ * @param[in]       *pSrcB points to the second input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.  
+ */ 
+ 
+arm_status arm_mat_add_q15( 
+  const arm_matrix_instance_q15 * pSrcA, 
+  const arm_matrix_instance_q15 * pSrcB, 
+  arm_matrix_instance_q15 * pDst) 
+{ 
+  q15_t *pInA = pSrcA->pData;                    /* input data matrix pointer A  */ 
+  q15_t *pInB = pSrcB->pData;                    /* input data matrix pointer B */ 
+  q15_t *pOut = pDst->pData;                     /* output data matrix pointer */ 
+  uint16_t numSamples;                           /* total number of elements in the matrix  */ 
+  uint32_t blkCnt;                               /* loop counters  */ 
+  arm_status status;                             /* status of matrix addition  */ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrcA->numRows != pSrcB->numRows) || 
+     (pSrcA->numCols != pSrcB->numCols) || 
+     (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Total number of samples in the input matrix */ 
+    numSamples = (uint16_t) (pSrcA->numRows * pSrcA->numCols); 
+ 
+    /* Loop unrolling */ 
+    blkCnt = (uint32_t) numSamples >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) + B(m,n) */ 
+      /* Add, Saturate and then store the results in the destination buffer. */ 
+      *__SIMD32(pOut)++ = __QADD16(*__SIMD32(pInA)++, *__SIMD32(pInB)++); 
+      *__SIMD32(pOut)++ = __QADD16(*__SIMD32(pInA)++, *__SIMD32(pInB)++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = (uint32_t) numSamples % 0x4u; 
+ 
+    /* q15 pointers of input and output are initialized */ 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) + B(m,n) */ 
+      /* Add, Saturate and then store the results in the destination buffer. */ 
+      *pOut++ = (q15_t) __QADD16(*pInA++, *pInB++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixAdd group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_add_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,128 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_add_q31.c  
+*  
+* Description:	Q31 matrix addition  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixAdd  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q31 matrix addition.  
+ * @param[in]       *pSrcA points to the first input matrix structure  
+ * @param[in]       *pSrcB points to the second input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.  
+ */ 
+ 
+arm_status arm_mat_add_q31( 
+  const arm_matrix_instance_q31 * pSrcA, 
+  const arm_matrix_instance_q31 * pSrcB, 
+  arm_matrix_instance_q31 * pDst) 
+{ 
+  q31_t *pIn1 = pSrcA->pData;                    /* input data matrix pointer A */ 
+  q31_t *pIn2 = pSrcB->pData;                    /* input data matrix pointer B */ 
+  q31_t *pOut = pDst->pData;                     /* output data matrix pointer */ 
+  uint32_t numSamples;                           /* total number of elements in the matrix  */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+  arm_status status;                             /* status of matrix addition */ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrcA->numRows != pSrcB->numRows) || 
+     (pSrcA->numCols != pSrcB->numCols) || 
+     (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Total number of samples in the input matrix */ 
+    numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols; 
+ 
+    /* Loop Unrolling */ 
+    blkCnt = numSamples >> 2u; 
+ 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) + B(m,n) */ 
+      /* Add, saturate and then store the results in the destination buffer. */ 
+      *pOut++ = __QADD(*pIn1++, *pIn2++); 
+      *pOut++ = __QADD(*pIn1++, *pIn2++); 
+      *pOut++ = __QADD(*pIn1++, *pIn2++); 
+      *pOut++ = __QADD(*pIn1++, *pIn2++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = numSamples % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) + B(m,n) */ 
+      /* Add, saturate and then store the results in the destination buffer. */ 
+      *pOut++ = __QADD(*pIn1++, *pIn2++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixAdd group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,80 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_mat_init_f32.c  
+*  
+* Description:	Floating-point matrix initialization.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @defgroup MatrixInit Matrix Initialization  
+ *  
+ * Initializes the underlying matrix data structure.  
+ * The functions set the <code>numRows</code>,  
+ * <code>numCols</code>, and <code>pData</code> fields  
+ * of the matrix data structure.  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixInit  
+ * @{  
+ */ 
+ 
+/**  
+   * @brief  Floating-point matrix initialization.  
+   * @param[in,out] *S             points to an instance of the floating-point matrix structure.  
+   * @param[in]     nRows          number of rows in the matrix.  
+   * @param[in]     nColumns       number of columns in the matrix.  
+   * @param[in]     *pData	   points to the matrix data array.  
+   * @return        none  
+   */ 
+ 
+void arm_mat_init_f32( 
+  arm_matrix_instance_f32 * S, 
+  uint16_t nRows, 
+  uint16_t nColumns, 
+  float32_t * pData) 
+{ 
+  /* Assign Number of Rows */ 
+  S->numRows = nRows; 
+ 
+  /* Assign Number of Columns */ 
+  S->numCols = nColumns; 
+ 
+  /* Assign Data pointer */ 
+  S->pData = pData; 
+} 
+ 
+/**  
+ * @} end of MatrixInit group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,72 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_mat_init_q15.c  
+*  
+* Description:	Q15 matrix initialization.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------------- */ 
+ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixInit  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief  Q15 matrix initialization.  
+   * @param[in,out] *S             points to an instance of the floating-point matrix structure.  
+   * @param[in]     nRows          number of rows in the matrix.  
+   * @param[in]     nColumns       number of columns in the matrix.  
+   * @param[in]     *pData	   points to the matrix data array.  
+   * @return        none  
+   */ 
+ 
+void arm_mat_init_q15( 
+  arm_matrix_instance_q15 * S, 
+  uint16_t nRows, 
+  uint16_t nColumns, 
+  q15_t * pData) 
+{ 
+  /* Assign Number of Rows */ 
+  S->numRows = nRows; 
+ 
+  /* Assign Number of Columns */ 
+  S->numCols = nColumns; 
+ 
+  /* Assign Data pointer */ 
+  S->pData = pData; 
+} 
+ 
+/**  
+ * @} end of MatrixInit group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,76 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_mat_init_q31.c  
+*  
+* Description:	Q31 matrix initialization.  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------------- */ 
+ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @defgroup MatrixInit Matrix Initialization  
+ *  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixInit  
+ * @{  
+ */ 
+ 
+  /**  
+   * @brief  Q31 matrix initialization.  
+   * @param[in,out] *S             points to an instance of the floating-point matrix structure.  
+   * @param[in]     nRows          number of rows in the matrix.  
+   * @param[in]     nColumns       number of columns in the matrix.  
+   * @param[in]     *pData	   points to the matrix data array.  
+   * @return        none  
+   */ 
+ 
+void arm_mat_init_q31( 
+  arm_matrix_instance_q31 * S, 
+  uint16_t nRows, 
+  uint16_t nColumns, 
+  q31_t * pData) 
+{ 
+  /* Assign Number of Rows */ 
+  S->numRows = nRows; 
+ 
+  /* Assign Number of Columns */ 
+  S->numCols = nColumns; 
+ 
+  /* Assign Data pointer */ 
+  S->pData = pData; 
+} 
+ 
+/**  
+ * @} end of MatrixInit group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_inverse_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,403 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_inverse_f32.c  
+*  
+* Description:	Floating-point matrix inverse.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @defgroup MatrixInv Matrix Inverse  
+ *  
+ * Computes the inverse of a matrix.  
+ *  
+ * The inverse is defined only if the input matrix is square and non-singular (the determinant  
+ * is non-zero). The function checks that the input and output matrices are square and of the  
+ * same size.  
+ *  
+ * Matrix inversion is numerically sensitive and the CMSIS DSP library only supports matrix  
+ * inversion of floating-point matrices.  
+ *  
+ * \par Algorithm  
+ * The Gauss-Jordan method is used to find the inverse.  
+ * The algorithm performs a sequence of elementary row-operations till it  
+ * reduces the input matrix to an identity matrix. Applying the same sequence  
+ * of elementary row-operations to an identity matrix yields the inverse matrix.  
+ * If the input matrix is singular, then the algorithm terminates and returns error status  
+ * <code>ARM_MATH_SINGULAR</code>.  
+ * \image html MatrixInverse.gif "Matrix Inverse of a 3 x 3 matrix using Gauss-Jordan Method"  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixInv  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Floating-point matrix inverse.  
+ * @param[in]       *pSrc points to input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> if the input matrix is not square or if the size  
+ * of the output matrix does not match the size of the input matrix.  
+ * If the input matrix is found to be singular (non-invertible), then the function returns  
+ * <code>ARM_MATH_SINGULAR</code>.  Otherwise, the function returns <code>ARM_MATH_SUCCESS</code>.  
+ */ 
+ 
+arm_status arm_mat_inverse_f32( 
+  const arm_matrix_instance_f32 * pSrc, 
+  arm_matrix_instance_f32 * pDst) 
+{ 
+  float32_t *pIn = pSrc->pData;                  /* input data matrix pointer */ 
+  float32_t *pOut = pDst->pData;                 /* output data matrix pointer */ 
+  float32_t *pInT1, *pInT2;                      /* Temporary input data matrix pointer */ 
+  float32_t *pInT3, *pInT4;                      /* Temporary output data matrix pointer */ 
+  float32_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;  /* Temporary input and output data matrix pointer */ 
+  uint32_t numRows = pSrc->numRows;              /* Number of rows in the matrix  */ 
+  uint32_t numCols = pSrc->numCols;              /* Number of Cols in the matrix  */ 
+  float32_t Xchg, in = 0.0f, in1;                /* Temporary input values  */ 
+  uint32_t i, rowCnt, flag = 0u, j, loopCnt, k, l;      /* loop counters */ 
+  arm_status status;                             /* status of matrix inverse */ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols) 
+     || (pSrc->numRows != pDst->numRows)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+ 
+    /*--------------------------------------------------------------------------------------------------------------  
+	 * Matrix Inverse can be solved using elementary row operations.  
+	 *  
+	 *	Gauss-Jordan Method:  
+	 *  
+	 *	   1. First combine the identity matrix and the input matrix separated by a bar to form an  
+	 *        augmented matrix as follows:  
+	 *				        _ 	      	       _         _	       _  
+	 *					   |  a11  a12 | 1   0  |       |  X11 X12  |  
+	 *					   |           |        |   =   |           |  
+	 *					   |_ a21  a22 | 0   1 _|       |_ X21 X21 _|  
+	 *  
+	 *		2. In our implementation, pDst Matrix is used as identity matrix.  
+	 *  
+	 *		3. Begin with the first row. Let i = 1.  
+	 *  
+	 *	    4. Check to see if the pivot for row i is zero.  
+	 *		   The pivot is the element of the main diagonal that is on the current row.  
+	 *		   For instance, if working with row i, then the pivot element is aii.  
+	 *		   If the pivot is zero, exchange that row with a row below it that does not  
+	 *		   contain a zero in column i. If this is not possible, then an inverse  
+	 *		   to that matrix does not exist.  
+	 *  
+	 *	    5. Divide every element of row i by the pivot.  
+	 *  
+	 *	    6. For every row below and  row i, replace that row with the sum of that row and  
+	 *		   a multiple of row i so that each new element in column i below row i is zero.  
+	 *  
+	 *	    7. Move to the next row and column and repeat steps 2 through 5 until you have zeros  
+	 *		   for every element below and above the main diagonal.  
+	 *  
+	 *		8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc).  
+	 *		   Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst).  
+	 *----------------------------------------------------------------------------------------------------------------*/ 
+ 
+    /* Working pointer for destination matrix */ 
+    pInT2 = pOut; 
+ 
+    /* Loop over the number of rows */ 
+    rowCnt = numRows; 
+ 
+    /* Making the destination matrix as identity matrix */ 
+    while(rowCnt > 0u) 
+    { 
+      /* Writing all zeroes in lower triangle of the destination matrix */ 
+      j = numRows - rowCnt; 
+      while(j > 0u) 
+      { 
+        *pInT2++ = 0.0f; 
+        j--; 
+      } 
+ 
+      /* Writing all ones in the diagonal of the destination matrix */ 
+      *pInT2++ = 1.0f; 
+ 
+      /* Writing all zeroes in upper triangle of the destination matrix */ 
+      j = rowCnt - 1u; 
+      while(j > 0u) 
+      { 
+        *pInT2++ = 0.0f; 
+        j--; 
+      } 
+ 
+      /* Decrement the loop counter */ 
+      rowCnt--; 
+    } 
+ 
+    /* Loop over the number of columns of the input matrix.  
+       All the elements in each column are processed by the row operations */ 
+    loopCnt = numCols; 
+ 
+    /* Index modifier to navigate through the columns */ 
+    l = 0u; 
+ 
+    while(loopCnt > 0u) 
+    { 
+      /* Check if the pivot element is zero..  
+       * If it is zero then interchange the row with non zero row below.  
+       * If there is no non zero element to replace in the rows below,  
+       * then the matrix is Singular. */ 
+ 
+      /* Working pointer for the input matrix that points  
+       * to the pivot element of the particular row  */ 
+      pInT1 = pIn + (l * numCols); 
+ 
+      /* Working pointer for the destination matrix that points  
+       * to the pivot element of the particular row  */ 
+      pInT3 = pOut + (l * numCols); 
+ 
+      /* Temporary variable to hold the pivot value */ 
+      in = *pInT1; 
+ 
+      /* Destination pointer modifier */ 
+      k = 1u; 
+ 
+      /* Check if the pivot element is zero */ 
+      if(*pInT1 == 0.0f) 
+      { 
+        /* Loop over the number rows present below */ 
+        i = numRows - (l + 1u); 
+ 
+        while(i > 0u) 
+        { 
+          /* Update the input and destination pointers */ 
+          pInT2 = pInT1 + (numCols * l); 
+          pInT4 = pInT3 + (numCols * k); 
+ 
+          /* Check if there is a non zero pivot element to  
+           * replace in the rows below */ 
+          if(*pInT2 != 0.0f) 
+          { 
+            /* Loop over number of columns  
+             * to the right of the pilot element */ 
+            j = numCols - l; 
+ 
+            while(j > 0u) 
+            { 
+              /* Exchange the row elements of the input matrix */ 
+              Xchg = *pInT2; 
+              *pInT2++ = *pInT1; 
+              *pInT1++ = Xchg; 
+ 
+              /* Decrement the loop counter */ 
+              j--; 
+            } 
+ 
+            /* Loop over number of columns of the destination matrix */ 
+            j = numCols; 
+ 
+            while(j > 0u) 
+            { 
+              /* Exchange the row elements of the destination matrix */ 
+              Xchg = *pInT4; 
+              *pInT4++ = *pInT3; 
+              *pInT3++ = Xchg; 
+ 
+              /* Decrement the loop counter */ 
+              j--; 
+            } 
+ 
+            /* Flag to indicate whether exchange is done or not */ 
+            flag = 1u; 
+ 
+            /* Break after exchange is done */ 
+            break; 
+          } 
+ 
+          /* Update the destination pointer modifier */ 
+          k++; 
+ 
+          /* Decrement the loop counter */ 
+          i--; 
+        } 
+      } 
+ 
+      /* Update the status if the matrix is singular */ 
+      if((flag != 1u) && (in == 0.0f)) 
+      { 
+        status = ARM_MATH_SINGULAR; 
+ 
+        break; 
+      } 
+ 
+      /* Points to the pivot row of input and destination matrices */ 
+      pPivotRowIn = pIn + (l * numCols); 
+      pPivotRowDst = pOut + (l * numCols); 
+ 
+      /* Temporary pointers to the pivot row pointers */ 
+      pInT1 = pPivotRowIn; 
+      pInT2 = pPivotRowDst; 
+ 
+      /* Pivot element of the row */ 
+      in = *(pIn + (l * numCols)); 
+ 
+      /* Loop over number of columns  
+       * to the right of the pilot element */ 
+      j = (numCols - l); 
+ 
+      while(j > 0u) 
+      { 
+        /* Divide each element of the row of the input matrix  
+         * by the pivot element */ 
+        in1 = *pInT1; 
+        *pInT1++ = in1 / in; 
+ 
+        /* Decrement the loop counter */ 
+        j--; 
+      } 
+ 
+      /* Loop over number of columns of the destination matrix */ 
+      j = numCols; 
+ 
+      while(j > 0u) 
+      { 
+        /* Divide each element of the row of the destination matrix  
+         * by the pivot element */ 
+        in1 = *pInT2; 
+        *pInT2++ = in1 / in; 
+ 
+        /* Decrement the loop counter */ 
+        j--; 
+      } 
+ 
+      /* Replace the rows with the sum of that row and a multiple of row i  
+       * so that each new element in column i above row i is zero.*/ 
+ 
+      /* Temporary pointers for input and destination matrices */ 
+      pInT1 = pIn; 
+      pInT2 = pOut; 
+ 
+      /* index used to check for pivot element */ 
+      i = 0u; 
+ 
+      /* Loop over number of rows */ 
+      /*  to be replaced by the sum of that row and a multiple of row i */ 
+      k = numRows; 
+ 
+      while(k > 0u) 
+      { 
+        /* Check for the pivot element */ 
+        if(i == l) 
+        { 
+          /* If the processing element is the pivot element,  
+             only the columns to the right are to be processed */ 
+          pInT1 += numCols - l; 
+ 
+          pInT2 += numCols; 
+        } 
+        else 
+        { 
+          /* Element of the reference row */ 
+          in = *pInT1; 
+ 
+          /* Working pointers for input and destination pivot rows */ 
+          pPRT_in = pPivotRowIn; 
+          pPRT_pDst = pPivotRowDst; 
+ 
+          /* Loop over the number of columns to the right of the pivot element,  
+             to replace the elements in the input matrix */ 
+          j = (numCols - l); 
+ 
+          while(j > 0u) 
+          { 
+            /* Replace the element by the sum of that row  
+               and a multiple of the reference row  */ 
+            in1 = *pInT1; 
+            *pInT1++ = in1 - (in * *pPRT_in++); 
+ 
+            /* Decrement the loop counter */ 
+            j--; 
+          } 
+ 
+          /* Loop over the number of columns to  
+             replace the elements in the destination matrix */ 
+          j = numCols; 
+ 
+          while(j > 0u) 
+          { 
+            /* Replace the element by the sum of that row  
+               and a multiple of the reference row  */ 
+            in1 = *pInT2; 
+            *pInT2++ = in1 - (in * *pPRT_pDst++); 
+ 
+            /* Decrement the loop counter */ 
+            j--; 
+          } 
+ 
+        } 
+ 
+        /* Increment the temporary input pointer */ 
+        pInT1 = pInT1 + l; 
+ 
+        /* Decrement the loop counter */ 
+        k--; 
+ 
+        /* Increment the pivot index */ 
+        i++; 
+      } 
+ 
+      /* Increment the input pointer */ 
+      pIn++; 
+ 
+      /* Decrement the loop counter */ 
+      loopCnt--; 
+ 
+      /* Increment the index modifier */ 
+      l++; 
+    } 
+ 
+    /* Set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+ 
+    if((flag != 1u) && (in == 0.0f)) 
+    { 
+      status = ARM_MATH_SINGULAR; 
+    } 
+ 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixInv group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,190 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_mult_f32.c  
+*  
+* Description:  Floating-point matrix multiplication.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @defgroup MatrixMult Matrix Multiplication  
+ *  
+ * Multiplies two matrices.  
+ *  
+ * \image html MatrixMultiplication.gif "Multiplication of two 3 x 3 matrices"  
+  
+ * Matrix multiplication is only defined if the number of columns of the  
+ * first matrix equals the number of rows of the second matrix.  
+ * Multiplying an <code>M x N</code> matrix with an <code>N x P</code> matrix results  
+ * in an <code>M x P</code> matrix.  
+ * When matrix size checking is enabled, the functions check: (1) that the inner dimensions of  
+ * <code>pSrcA</code> and <code>pSrcB</code> are equal; and (2) that the size of the output  
+ * matrix equals the outer dimensions of <code>pSrcA</code> and <code>pSrcB</code>.  
+ */ 
+ 
+ 
+/**  
+ * @addtogroup MatrixMult  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Floating-point matrix multiplication.  
+ * @param[in]       *pSrcA points to the first input matrix structure  
+ * @param[in]       *pSrcB points to the second input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ */ 
+ 
+arm_status arm_mat_mult_f32( 
+  const arm_matrix_instance_f32 * pSrcA, 
+  const arm_matrix_instance_f32 * pSrcB, 
+  arm_matrix_instance_f32 * pDst) 
+{ 
+  float32_t *pIn1 = pSrcA->pData;                /* input data matrix pointer A */ 
+  float32_t *pIn2 = pSrcB->pData;                /* input data matrix pointer B */ 
+  float32_t *pInA = pSrcA->pData;                /* input data matrix pointer A  */ 
+//  float32_t *pSrcB = pSrcB->pData;                /* input data matrix pointer B */  
+  float32_t *pOut = pDst->pData;                 /* output data matrix pointer */ 
+  float32_t *px;                                 /* Temporary output data matrix pointer */ 
+  float32_t sum;                                 /* Accumulator */ 
+  uint16_t numRowsA = pSrcA->numRows;            /* number of rows of input matrix A */ 
+  uint16_t numColsB = pSrcB->numCols;            /* number of columns of input matrix B */ 
+  uint16_t numColsA = pSrcA->numCols;            /* number of columns of input matrix A */ 
+  uint16_t col, i = 0u, j, row = numRowsA, colCnt;      /* loop counters */ 
+  arm_status status;                             /* status of matrix multiplication */ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrcA->numCols != pSrcB->numRows) || 
+     (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 
+  { 
+ 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 
+    /* row loop */ 
+    do 
+    { 
+      /* Output pointer is set to starting address of the row being processed */ 
+      px = pOut + i; 
+ 
+      /* For every row wise process, the column loop counter is to be initiated */ 
+      col = numColsB; 
+ 
+      /* For every row wise process, the pIn2 pointer is set  
+       ** to the starting address of the pSrcB data */ 
+      pIn2 = pSrcB->pData; 
+ 
+      j = 0u; 
+ 
+      /* column loop */ 
+      do 
+      { 
+        /* Set the variable sum, that acts as accumulator, to zero */ 
+        sum = 0.0f; 
+ 
+        /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ 
+        pIn1 = pInA; 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        colCnt = numColsA >> 2; 
+ 
+        /* matrix multiplication        */ 
+        while(colCnt > 0u) 
+        { 
+          /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 
+          sum += *pIn1++ * (*pIn2); 
+          pIn2 += numColsB; 
+          sum += *pIn1++ * (*pIn2); 
+          pIn2 += numColsB; 
+          sum += *pIn1++ * (*pIn2); 
+          pIn2 += numColsB; 
+          sum += *pIn1++ * (*pIn2); 
+          pIn2 += numColsB; 
+ 
+          /* Decrement the loop count */ 
+          colCnt--; 
+        } 
+ 
+        /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here.  
+         ** No loop unrolling is used. */ 
+        colCnt = numColsA % 0x4u; 
+ 
+        while(colCnt > 0u) 
+        { 
+          /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 
+          sum += *pIn1++ * (*pIn2); 
+          pIn2 += numColsB; 
+ 
+          /* Decrement the loop counter */ 
+          colCnt--; 
+        } 
+ 
+        /* Store the result in the destination buffer */ 
+        *px++ = sum; 
+ 
+        /* Update the pointer pIn2 to point to the  starting address of the next column */ 
+        j++; 
+        pIn2 = pSrcB->pData + j; 
+ 
+        /* Decrement the column loop counter */ 
+        col--; 
+ 
+      } while(col > 0u); 
+ 
+      /* Update the pointer pInA to point to the  starting address of the next row */ 
+      i = i + numColsB; 
+      pInA = pInA + numColsA; 
+ 
+      /* Decrement the row loop counter */ 
+      row--; 
+ 
+    } while(row > 0u); 
+ 
+    /* Set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_fast_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,243 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_mult_fast_q15.c  
+*  
+* Description:	 Q15 matrix multiplication (fast variant)  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixMult  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Q15 matrix multiplication (fast variant)  
+ * @param[in]       *pSrcA points to the first input matrix structure  
+ * @param[in]       *pSrcB points to the second input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @param[in]		*pState points to the array for storing intermediate results  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The difference between the function arm_mat_mult_q15() and this fast variant is that  
+ * the fast variant use a 32-bit rather than a 64-bit accumulator.  
+ * The result of each 1.15 x 1.15 multiplication is truncated to  
+ * 2.30 format. These intermediate results are accumulated in a 32-bit register in 2.30  
+ * format. Finally, the accumulator is saturated and converted to a 1.15 result.  
+ *  
+ * \par  
+ * The fast version has the same overflow behavior as the standard version but provides  
+ * less precision since it discards the low 16 bits of each multiplication result.  
+ * In order to avoid overflows completely the input signals must be scaled down.  
+ * Scale down one of the input matrices by log2(numColsA) bits to  
+ * avoid overflows, as a total of numColsA additions are computed internally for each  
+ * output element.  
+ *  
+ * \par  
+ * See <code>arm_mat_mult_q15()</code> for a slower implementation of this function  
+ * which uses 64-bit accumulation to provide higher precision.  
+ */ 
+ 
+arm_status arm_mat_mult_fast_q15( 
+  const arm_matrix_instance_q15 * pSrcA, 
+  const arm_matrix_instance_q15 * pSrcB, 
+  arm_matrix_instance_q15 * pDst, 
+  q15_t * pState) 
+{ 
+  q31_t sum;                                     /* accumulator */ 
+  q31_t in;                                      /* Temporary variable to hold the input value */ 
+  q15_t *pSrcBT = pState;                        /* input data matrix pointer for transpose */ 
+  q15_t *pInA = pSrcA->pData;                    /* input data matrix pointer A of Q15 type */ 
+  q15_t *pInB = pSrcB->pData;                    /* input data matrix pointer B of Q15 type */ 
+//  q15_t *pDst = pDst->pData;                     /* output data matrix pointer */  
+  q15_t *px;                                     /* Temporary output data matrix pointer */ 
+  uint16_t numRowsA = pSrcA->numRows;            /* number of rows of input matrix A    */ 
+  uint16_t numColsB = pSrcB->numCols;            /* number of columns of input matrix B */ 
+  uint16_t numColsA = pSrcA->numCols;            /* number of columns of input matrix A */ 
+  uint16_t numRowsB = pSrcB->numRows;            /* number of rows of input matrix A    */ 
+  uint16_t col, i = 0u, row = numRowsB, colCnt;  /* loop counters */ 
+  arm_status status;                             /* status of matrix multiplication */ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrcA->numCols != pSrcB->numRows) || 
+     (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Matrix transpose */ 
+    do 
+    { 
+      /* Apply loop unrolling and exchange the columns with row elements */ 
+      col = numColsB >> 2; 
+ 
+      /* The pointer px is set to starting address of the column being processed */ 
+      px = pSrcBT + i; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+       ** a second loop below computes the remaining 1 to 3 samples. */ 
+      while(col > 0u) 
+      { 
+        /* Read two elements from the row */ 
+        in = *__SIMD32(pInB)++; 
+ 
+        /* Unpack and store one element in the destination */ 
+        *px = (q15_t) in; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += numRowsB; 
+ 
+        /* Unpack and store the second element in the destination */ 
+        *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16); 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += numRowsB; 
+ 
+        /* Read two elements from the row */ 
+        in = *__SIMD32(pInB)++; 
+ 
+        /* Unpack and store one element in the destination */ 
+        *px = (q15_t) in; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += numRowsB; 
+ 
+        /* Unpack and store the second element in the destination */ 
+        *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16); 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += numRowsB; 
+ 
+        /* Decrement the column loop counter */ 
+        col--; 
+      } 
+ 
+      /* If the columns of pSrcB is not a multiple of 4, compute any remaining output samples here.  
+       ** No loop unrolling is used. */ 
+      col = numColsB % 0x4u; 
+ 
+      while(col > 0u) 
+      { 
+        /* Read and store the input element in the destination */ 
+        *px = *pInB++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += numRowsB; 
+ 
+        /* Decrement the column loop counter */ 
+        col--; 
+      } 
+ 
+      i++; 
+ 
+      /* Decrement the row loop counter */ 
+      row--; 
+ 
+    } while(row > 0u); 
+ 
+    /* Reset the variables for the usage in the following multiplication process */ 
+    row = numRowsA; 
+    i = 0u; 
+    px = pDst->pData; 
+ 
+    /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 
+    /* row loop */ 
+    do 
+    { 
+      /* For every row wise process, the column loop counter is to be initiated */ 
+      col = numColsB; 
+ 
+      /* For every row wise process, the pIn2 pointer is set  
+       ** to the starting address of the transposed pSrcB data */ 
+      pInB = pSrcBT; 
+ 
+      /* column loop */ 
+      do 
+      { 
+        /* Set the variable sum, that acts as accumulator, to zero */ 
+        sum = 0; 
+ 
+        /* Apply loop unrolling and compute 2 MACs simultaneously. */ 
+        colCnt = numColsA >> 1; 
+ 
+        /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ 
+        pInA = pSrcA->pData + i; 
+ 
+        /* matrix multiplication */ 
+        while(colCnt > 0u) 
+        { 
+          /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 
+          sum = __SMLAD(*__SIMD32(pInA)++, *__SIMD32(pInB)++, sum); 
+ 
+          /* Decrement the loop counter */ 
+          colCnt--; 
+        } 
+ 
+        /* process odd column samples */ 
+        if((numColsA & 0x1u) > 0u) 
+        { 
+          /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 
+          sum += (q31_t) * pInA * (*pInB++); 
+        } 
+ 
+        /* Saturate and store the result in the destination buffer */ 
+        *px = (q15_t) (sum >> 15); 
+        px++; 
+ 
+        /* Decrement the column loop counter */ 
+        col--; 
+ 
+      } while(col > 0u); 
+ 
+      i = i + numColsA; 
+ 
+      /* Decrement the row loop counter */ 
+      row--; 
+ 
+    } while(row > 0u); 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_fast_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,196 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_mult_fast_q31.c  
+*  
+* Description:	 Q31 matrix multiplication (fast variant).  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixMult  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q31 matrix multiplication (fast variant)  
+ * @param[in]       *pSrcA points to the first input matrix structure  
+ * @param[in]       *pSrcB points to the second input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The difference between the function arm_mat_mult_q31() and this fast variant is that  
+ * the fast variant use a 32-bit rather than a 64-bit accumulator.  
+ * The result of each 1.31 x 1.31 multiplication is truncated to  
+ * 2.30 format. These intermediate results are accumulated in a 32-bit register in 2.30  
+ * format. Finally, the accumulator is saturated and converted to a 1.31 result.  
+ *  
+ * \par  
+ * The fast version has the same overflow behavior as the standard version but provides  
+ * less precision since it discards the low 32 bits of each multiplication result.  
+ * In order to avoid overflows completely the input signals must be scaled down.  
+ * Scale down one of the input matrices by log2(numColsA) bits to  
+ * avoid overflows, as a total of numColsA additions are computed internally for each  
+ * output element.  
+ *  
+ * \par  
+ * See <code>arm_mat_mult_q31()</code> for a slower implementation of this function  
+ * which uses 64-bit accumulation to provide higher precision.  
+ */ 
+ 
+arm_status arm_mat_mult_fast_q31( 
+  const arm_matrix_instance_q31 * pSrcA, 
+  const arm_matrix_instance_q31 * pSrcB, 
+  arm_matrix_instance_q31 * pDst) 
+{ 
+  q31_t *pIn1 = pSrcA->pData;                    /* input data matrix pointer A */ 
+  q31_t *pIn2 = pSrcB->pData;                    /* input data matrix pointer B */ 
+  q31_t *pInA = pSrcA->pData;                    /* input data matrix pointer A */ 
+//  q31_t *pSrcB = pSrcB->pData;                    /* input data matrix pointer B */  
+  q31_t *pOut = pDst->pData;                     /* output data matrix pointer */ 
+  q31_t *px;                                     /* Temporary output data matrix pointer */ 
+  q31_t sum;                                     /* Accumulator */ 
+  uint16_t numRowsA = pSrcA->numRows;            /* number of rows of input matrix A    */ 
+  uint16_t numColsB = pSrcB->numCols;            /* number of columns of input matrix B */ 
+  uint16_t numColsA = pSrcA->numCols;            /* number of columns of input matrix A */ 
+  uint16_t col, i = 0u, j, row = numRowsA, colCnt;      /* loop counters */ 
+  arm_status status;                             /* status of matrix multiplication */ 
+ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrcA->numCols != pSrcB->numRows) || 
+     (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 
+    /* row loop */ 
+    do 
+    { 
+      /* Output pointer is set to starting address of the row being processed */ 
+      px = pOut + i; 
+ 
+      /* For every row wise process, the column loop counter is to be initiated */ 
+      col = numColsB; 
+ 
+      /* For every row wise process, the pIn2 pointer is set  
+       ** to the starting address of the pSrcB data */ 
+      pIn2 = pSrcB->pData; 
+ 
+      j = 0u; 
+ 
+      /* column loop */ 
+      do 
+      { 
+        /* Set the variable sum, that acts as accumulator, to zero */ 
+        sum = 0; 
+ 
+        /* Initiate the pointer pIn1 to point to the starting address of pInA */ 
+        pIn1 = pInA; 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        colCnt = numColsA >> 2; 
+ 
+ 
+        /* matrix multiplication */ 
+        while(colCnt > 0u) 
+        { 
+          /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 
+          /* Perform the multiply-accumulates */ 
+          sum = (q31_t) ((((q63_t) sum << 32) + 
+                          ((q63_t) * pIn1++ * (*pIn2))) >> 32); 
+          pIn2 += numColsB; 
+          sum = (q31_t) ((((q63_t) sum << 32) + 
+                          ((q63_t) * pIn1++ * (*pIn2))) >> 32); 
+          pIn2 += numColsB; 
+          sum = (q31_t) ((((q63_t) sum << 32) + 
+                          ((q63_t) * pIn1++ * (*pIn2))) >> 32); 
+          pIn2 += numColsB; 
+          sum = (q31_t) ((((q63_t) sum << 32) + 
+                          ((q63_t) * pIn1++ * (*pIn2))) >> 32); 
+          pIn2 += numColsB; 
+ 
+          /* Decrement the loop counter */ 
+          colCnt--; 
+        } 
+ 
+        /* If the columns of pSrcA is not a multiple of 4, compute any remaining output samples here.  
+         ** No loop unrolling is used. */ 
+        colCnt = numColsA % 0x4u; 
+ 
+        while(colCnt > 0u) 
+        { 
+          /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 
+          /* Perform the multiply-accumulates */ 
+          sum = (q31_t) ((((q63_t) sum << 32) + 
+                          ((q63_t) * pIn1++ * (*pIn2))) >> 32); 
+          pIn2 += numColsB; 
+ 
+          /* Decrement the loop counter */ 
+          colCnt--; 
+        } 
+ 
+        /* Convert the result from 2.30 to 1.31 format and store in destination buffer */ 
+        *px++ = sum << 1; 
+ 
+        /* Update the pointer pIn2 to point to the  starting address of the next column */ 
+        j++; 
+        pIn2 = pSrcB->pData + j; 
+ 
+        /* Decrement the column loop counter */ 
+        col--; 
+ 
+      } while(col > 0u); 
+ 
+      /* Update the pointer pInA to point to the  starting address of the next row */ 
+      i = i + numColsB; 
+      pInA = pInA + numColsA; 
+ 
+      /* Decrement the row loop counter */ 
+      row--; 
+ 
+    } while(row > 0u); 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,243 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_mult_q15.c  
+*  
+* Description:	 Q15 matrix multiplication.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixMult  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Q15 matrix multiplication  
+ * @param[in]       *pSrcA points to the first input matrix structure  
+ * @param[in]       *pSrcB points to the second input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @param[in]		*pState points to the array for storing intermediate results 
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator. The inputs to the  
+ * multiplications are in 1.15 format and multiplications yield a 2.30 result.  
+ * The 2.30 intermediate  
+ * results are accumulated in a 64-bit accumulator in 34.30 format. This approach  
+ * provides 33 guard bits and there is no risk of overflow. The 34.30 result is then  
+ * truncated to 34.15 format by discarding the low 15 bits and then saturated to  
+ * 1.15 format.  
+ *  
+ * \par  
+ * Refer to <code>arm_mat_mult_fast_q15()</code> for a faster but less precise version of this function.  
+ *  
+ */ 
+ 
+arm_status arm_mat_mult_q15( 
+  const arm_matrix_instance_q15 * pSrcA, 
+  const arm_matrix_instance_q15 * pSrcB, 
+  arm_matrix_instance_q15 * pDst, 
+  q15_t * pState) 
+{ 
+  q63_t sum;                                     /* accumulator */ 
+  q31_t in;                                      /* Temporary variable to hold the input value */ 
+  q15_t *pSrcBT = pState;                        /* input data matrix pointer for transpose */ 
+  q15_t *pInA = pSrcA->pData;                    /* input data matrix pointer A of Q15 type */ 
+  q15_t *pInB = pSrcB->pData;                    /* input data matrix pointer B of Q15 type */ 
+//  q15_t *pDst = pDst->pData;                     /* output data matrix pointer */  
+  q15_t *px;                                     /* Temporary output data matrix pointer */ 
+  uint16_t numRowsA = pSrcA->numRows;            /* number of rows of input matrix A    */ 
+  uint16_t numColsB = pSrcB->numCols;            /* number of columns of input matrix B */ 
+  uint16_t numColsA = pSrcA->numCols;            /* number of columns of input matrix A */ 
+  uint16_t numRowsB = pSrcB->numRows;            /* number of rows of input matrix A    */ 
+  uint16_t col, i = 0u, row = numRowsB, colCnt;  /* loop counters */ 
+  arm_status status;                             /* status of matrix multiplication */ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrcA->numCols != pSrcB->numRows) || 
+     (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Matrix transpose */ 
+    do 
+    { 
+      /* Apply loop unrolling and exchange the columns with row elements */ 
+      col = numColsB >> 2; 
+ 
+      /* The pointer px is set to starting address of the column being processed */ 
+      px = pSrcBT + i; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+       ** a second loop below computes the remaining 1 to 3 samples. */ 
+      while(col > 0u) 
+      { 
+        /* Read two elements from the row */ 
+        in = *__SIMD32(pInB)++; 
+ 
+        /* Unpack and store one element in the destination */ 
+        *px = (q15_t) in; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += numRowsB; 
+ 
+        /* Unpack and store the second element in the destination */ 
+        *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16); 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += numRowsB; 
+ 
+        /* Read two elements from the row */ 
+        in = *__SIMD32(pInB)++; 
+ 
+        /* Unpack and store one element in the destination */ 
+        *px = (q15_t) in; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += numRowsB; 
+ 
+        /* Unpack and store the second element in the destination */ 
+        *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16); 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += numRowsB; 
+ 
+        /* Decrement the column loop counter */ 
+        col--; 
+      } 
+ 
+      /* If the columns of pSrcB is not a multiple of 4, compute any remaining output samples here.  
+       ** No loop unrolling is used. */ 
+      col = numColsB % 0x4u; 
+ 
+      while(col > 0u) 
+      { 
+        /* Read and store the input element in the destination */ 
+        *px = *pInB++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += numRowsB; 
+ 
+        /* Decrement the column loop counter */ 
+        col--; 
+      } 
+ 
+      i++; 
+ 
+      /* Decrement the row loop counter */ 
+      row--; 
+ 
+    } while(row > 0u); 
+ 
+    /* Reset the variables for the usage in the following multiplication process */ 
+    row = numRowsA; 
+    i = 0u; 
+    px = pDst->pData; 
+ 
+    /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 
+    /* row loop */ 
+    do 
+    { 
+      /* For every row wise process, the column loop counter is to be initiated */ 
+      col = numColsB; 
+ 
+      /* For every row wise process, the pIn2 pointer is set  
+       ** to the starting address of the transposed pSrcB data */ 
+      pInB = pSrcBT; 
+ 
+      /* column loop */ 
+      do 
+      { 
+        /* Set the variable sum, that acts as accumulator, to zero */ 
+        sum = 0; 
+ 
+        /* Apply loop unrolling and compute 2 MACs simultaneously. */ 
+        colCnt = numColsA >> 1; 
+ 
+        /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ 
+        pInA = pSrcA->pData + i; 
+ 
+        /* matrix multiplication */ 
+        while(colCnt > 0u) 
+        { 
+          /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 
+          sum = __SMLALD(*__SIMD32(pInA)++, *__SIMD32(pInB)++, sum); 
+ 
+          /* Decrement the loop counter */ 
+          colCnt--; 
+        } 
+ 
+        /* process odd column samples */ 
+        if((numColsA & 0x1u) > 0u) 
+        { 
+          /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 
+          sum += ((q31_t) * pInA * (*pInB++)); 
+        } 
+ 
+        /* Saturate and store the result in the destination buffer */ 
+        *px = (q15_t) (__SSAT((sum >> 15), 16)); 
+        px++; 
+ 
+        /* Decrement the column loop counter */ 
+        col--; 
+ 
+      } while(col > 0u); 
+ 
+      i = i + numColsA; 
+ 
+      /* Decrement the row loop counter */ 
+      row--; 
+ 
+    } while(row > 0u); 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,195 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_mult_q31.c  
+*  
+* Description:	 Q31 matrix multiplication.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixMult  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q31 matrix multiplication  
+ * @param[in]       *pSrcA points to the first input matrix structure  
+ * @param[in]       *pSrcB points to the second input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The accumulator has a 2.62 format and maintains full precision of the intermediate  
+ * multiplication results but provides only a single guard bit. There is no saturation  
+ * on intermediate additions. Thus, if the accumulator overflows it wraps around and  
+ * distorts the result. The input signals should be scaled down to avoid intermediate  
+ * overflows. The input is thus scaled down by log2(numColsA) bits  
+ * to avoid overflows, as a total of numColsA additions are performed internally.  
+ * The 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result.  
+ *  
+ * \par  
+ * See <code>arm_mat_mult_fast_q31()</code> for a faster but less precise implementation of this function.  
+ *  
+ */ 
+ 
+arm_status arm_mat_mult_q31( 
+  const arm_matrix_instance_q31 * pSrcA, 
+  const arm_matrix_instance_q31 * pSrcB, 
+  arm_matrix_instance_q31 * pDst) 
+{ 
+  q31_t *pIn1 = pSrcA->pData;                    /* input data matrix pointer A */ 
+  q31_t *pIn2 = pSrcB->pData;                    /* input data matrix pointer B */ 
+  q31_t *pInA = pSrcA->pData;                    /* input data matrix pointer A */ 
+//  q31_t *pSrcB = pSrcB->pData;                    /* input data matrix pointer B */  
+  q31_t *pOut = pDst->pData;                     /* output data matrix pointer */ 
+  q31_t *px;                                     /* Temporary output data matrix pointer */ 
+  q63_t sum;                                     /* Accumulator */ 
+  uint16_t numRowsA = pSrcA->numRows;            /* number of rows of input matrix A    */ 
+  uint16_t numColsB = pSrcB->numCols;            /* number of columns of input matrix B */ 
+  uint16_t numColsA = pSrcA->numCols;            /* number of columns of input matrix A */ 
+  uint16_t col, i = 0u, j, row = numRowsA, colCnt;      /* loop counters */ 
+  arm_status status;                             /* status of matrix multiplication */ 
+ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrcA->numCols != pSrcB->numRows) || 
+     (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 
+    /* row loop */ 
+    do 
+    { 
+      /* Output pointer is set to starting address of the row being processed */ 
+      px = pOut + i; 
+ 
+      /* For every row wise process, the column loop counter is to be initiated */ 
+      col = numColsB; 
+ 
+      /* For every row wise process, the pIn2 pointer is set  
+       ** to the starting address of the pSrcB data */ 
+      pIn2 = pSrcB->pData; 
+ 
+      j = 0u; 
+ 
+      /* column loop */ 
+      do 
+      { 
+        /* Set the variable sum, that acts as accumulator, to zero */ 
+        sum = 0; 
+ 
+        /* Initiate the pointer pIn1 to point to the starting address of pInA */ 
+        pIn1 = pInA; 
+ 
+        /* Apply loop unrolling and compute 4 MACs simultaneously. */ 
+        colCnt = numColsA >> 2; 
+ 
+ 
+        /* matrix multiplication */ 
+        while(colCnt > 0u) 
+        { 
+          /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 
+          /* Perform the multiply-accumulates */ 
+          sum += (q63_t) * pIn1++ * *pIn2; 
+          pIn2 += numColsB; 
+ 
+          sum += (q63_t) * pIn1++ * *pIn2; 
+          pIn2 += numColsB; 
+ 
+          sum += (q63_t) * pIn1++ * *pIn2; 
+          pIn2 += numColsB; 
+ 
+          sum += (q63_t) * pIn1++ * *pIn2; 
+          pIn2 += numColsB; 
+ 
+          /* Decrement the loop counter */ 
+          colCnt--; 
+        } 
+ 
+        /* If the columns of pSrcA is not a multiple of 4, compute any remaining output samples here.  
+         ** No loop unrolling is used. */ 
+        colCnt = numColsA % 0x4u; 
+ 
+        while(colCnt > 0u) 
+        { 
+          /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 
+          /* Perform the multiply-accumulates */ 
+          sum += (q63_t) * pIn1++ * *pIn2; 
+          pIn2 += numColsB; 
+ 
+          /* Decrement the loop counter */ 
+          colCnt--; 
+        } 
+ 
+        /* Convert the result from 2.30 to 1.31 format and store in destination buffer */ 
+        *px++ = (q31_t) (sum >> 31); 
+ 
+        /* Update the pointer pIn2 to point to the  starting address of the next column */ 
+        j++; 
+        pIn2 = (pSrcB->pData) + j; 
+ 
+        /* Decrement the column loop counter */ 
+        col--; 
+ 
+      } while(col > 0u); 
+ 
+      /* Update the pointer pInA to point to the  starting address of the next row */ 
+      i = i + numColsB; 
+      pInA = pInA + numColsA; 
+ 
+      /* Decrement the row loop counter */ 
+      row--; 
+ 
+    } while(row > 0u); 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixMult group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_scale_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,138 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:        arm_mat_scale_f32.c  
+*  
+* Description:	Multiplies a floating-point matrix by a scalar.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @defgroup MatrixScale Matrix Scale  
+ *  
+ * Multiplies a matrix by a scalar.  This is accomplished by multiplying each element in the  
+ * matrix by the scalar.  For example:  
+ * \image html MatrixScale.gif "Matrix Scaling of a 3 x 3 matrix"  
+ *  
+ * The function checks to make sure that the input and output matrices are of the same size.  
+ *  
+ * In the fixed-point Q15 and Q31 functions, <code>scale</code> is represented by  
+ * a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.  
+ * The shift allows the gain of the scaling operation to exceed 1.0.  
+ * The overall scale factor applied to the fixed-point data is  
+ * <pre>  
+ *     scale = scaleFract * 2^shift.  
+ * </pre>  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixScale  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Floating-point matrix scaling.  
+ * @param[in]       *pSrc points to input matrix structure  
+ * @param[in]       scale scale factor to be applied   
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either <code>ARM_MATH_SIZE_MISMATCH</code>   
+ * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ *  
+ */ 
+ 
+arm_status arm_mat_scale_f32( 
+  const arm_matrix_instance_f32 * pSrc, 
+  float32_t scale, 
+  arm_matrix_instance_f32 * pDst) 
+{ 
+  float32_t *pIn = pSrc->pData;                  /* input data matrix pointer */ 
+  float32_t *pOut = pDst->pData;                 /* output data matrix pointer */ 
+  uint32_t numSamples;                           /* total number of elements in the matrix */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+  arm_status status;                             /* status of matrix scaling     */ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Total number of samples in the input matrix */ 
+    numSamples = (uint32_t) pSrc->numRows * pSrc->numCols; 
+ 
+    /* Loop Unrolling */ 
+    blkCnt = numSamples >> 2; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) * scale */ 
+      /* Scaling and results are stored in the destination buffer. */ 
+      *pOut++ = (*pIn++) * scale; 
+      *pOut++ = (*pIn++) * scale; 
+      *pOut++ = (*pIn++) * scale; 
+      *pOut++ = (*pIn++) * scale; 
+ 
+      /* Decrement the numSamples loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = numSamples % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) * scale */ 
+      /* The results are stored in the destination buffer. */ 
+      *pOut++ = (*pIn++) * scale; 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* Set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixScale group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_scale_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,133 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_scale_q15.c  
+*  
+* Description:	Multiplies a Q15 matrix by a scalar.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixScale  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q15 matrix scaling.  
+ * @param[in]       *pSrc points to input matrix  
+ * @param[in]       scaleFract fractional portion of the scale factor  
+ * @param[in]       shift number of bits to shift the result by  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.  
+ * These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.  
+ */ 
+ 
+arm_status arm_mat_scale_q15( 
+  const arm_matrix_instance_q15 * pSrc, 
+  q15_t scaleFract, 
+  int32_t shift, 
+  arm_matrix_instance_q15 * pDst) 
+{ 
+  q15_t *pIn = pSrc->pData;                      /* input data matrix pointer */ 
+  q15_t *pOut = pDst->pData;                     /* output data matrix pointer */ 
+  uint32_t numSamples;                           /* total number of elements in the matrix */ 
+  int32_t totShift = 15 - shift;                 /* total shift to apply after scaling */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+  arm_status status;                             /* status of matrix scaling     */ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch */ 
+  if((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Total number of samples in the input matrix */ 
+    numSamples = (uint32_t) pSrc->numRows * pSrc->numCols; 
+ 
+    /* Loop Unrolling */ 
+    blkCnt = numSamples >> 2; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) * k */ 
+      /* Scale, saturate and then store the results in the destination buffer. */ 
+      *pOut++ = 
+        (q15_t) (__SSAT(((q31_t) (*pIn++) * scaleFract) >> totShift, 16)); 
+      *pOut++ = 
+        (q15_t) (__SSAT(((q31_t) (*pIn++) * scaleFract) >> totShift, 16)); 
+      *pOut++ = 
+        (q15_t) (__SSAT(((q31_t) (*pIn++) * scaleFract) >> totShift, 16)); 
+      *pOut++ = 
+        (q15_t) (__SSAT(((q31_t) (*pIn++) * scaleFract) >> totShift, 16)); 
+ 
+      /* Decrement the numSamples loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = numSamples % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) * k */ 
+      /* Scale, saturate and then store the results in the destination buffer. */ 
+      *pOut++ = 
+        (q15_t) (__SSAT(((q31_t) (*pIn++) * scaleFract) >> totShift, 16)); 
+ 
+      /* Decrement the numSamples loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* Set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixScale group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_scale_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,134 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_scale_q31.c  
+*  
+* Description:	Multiplies a Q31 matrix by a scalar.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixScale  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q31 matrix scaling.  
+ * @param[in]       *pSrc points to input matrix  
+ * @param[in]       scaleFract fractional portion of the scale factor  
+ * @param[in]       shift number of bits to shift the result by  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.  
+ * These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.  
+ */ 
+ 
+arm_status arm_mat_scale_q31( 
+  const arm_matrix_instance_q31 * pSrc, 
+  q31_t scaleFract, 
+  int32_t shift, 
+  arm_matrix_instance_q31 * pDst) 
+{ 
+  q31_t *pIn = pSrc->pData;                      /* input data matrix pointer */ 
+  q31_t *pOut = pDst->pData;                     /* output data matrix pointer */ 
+  q63_t out;                                     /* temporary variable to hold output value */ 
+  uint32_t numSamples;                           /* total number of elements in the matrix */ 
+  int32_t totShift = 31 - shift;                 /* shift to apply after scaling */ 
+  uint32_t blkCnt;                               /* loop counters  */ 
+  arm_status status;                             /* status of matrix scaling      */ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch  */ 
+  if((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Total number of samples in the input matrix */ 
+    numSamples = (uint32_t) pSrc->numRows * pSrc->numCols; 
+ 
+    /* Loop Unrolling */ 
+    blkCnt = numSamples >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) * k */ 
+      /* Scale, saturate and then store the results in the destination buffer. */ 
+      out = ((q63_t) * pIn++ * scaleFract) >> totShift; 
+      *pOut++ = clip_q63_to_q31(out); 
+      out = ((q63_t) * pIn++ * scaleFract) >> totShift; 
+      *pOut++ = clip_q63_to_q31(out); 
+      out = ((q63_t) * pIn++ * scaleFract) >> totShift; 
+      *pOut++ = clip_q63_to_q31(out); 
+      out = ((q63_t) * pIn++ * scaleFract) >> totShift; 
+      *pOut++ = clip_q63_to_q31(out); 
+ 
+      /* Decrement the numSamples loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = numSamples % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) * k */ 
+      /* Scale, saturate and then store the results in the destination buffer. */ 
+      out = ((q63_t) * pIn++ * scaleFract) >> totShift; 
+      *pOut++ = clip_q63_to_q31(out); 
+ 
+      /* Decrement the numSamples loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* Set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixScale group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_sub_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,133 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_sub_f32.c  
+*  
+* Description:	Floating-point matrix subtraction.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @defgroup MatrixSub Matrix Subtraction  
+ *  
+ * Subtract two matrices.  
+ * \image html MatrixSubtraction.gif "Subraction of two 3 x 3 matrices"  
+ *  
+ * The functions check to make sure that  
+ * <code>pSrcA</code>, <code>pSrcB</code>, and <code>pDst</code> have the same  
+ * number of rows and columns.  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixSub  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Floating-point matrix subtraction  
+ * @param[in]       *pSrcA points to the first input matrix structure  
+ * @param[in]       *pSrcB points to the second input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ */ 
+ 
+arm_status arm_mat_sub_f32( 
+  const arm_matrix_instance_f32 * pSrcA, 
+  const arm_matrix_instance_f32 * pSrcB, 
+  arm_matrix_instance_f32 * pDst) 
+{ 
+  float32_t *pIn1 = pSrcA->pData;                /* input data matrix pointer A */ 
+  float32_t *pIn2 = pSrcB->pData;                /* input data matrix pointer B */ 
+  float32_t *pOut = pDst->pData;                 /* output data matrix pointer  */ 
+  uint32_t numSamples;                           /* total number of elements in the matrix  */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+  arm_status status;                             /* status of matrix subtraction */ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrcA->numRows != pSrcB->numRows) || 
+     (pSrcA->numCols != pSrcB->numCols) || 
+     (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Total number of samples in the input matrix */ 
+    numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols; 
+ 
+    /* Loop Unrolling */ 
+    blkCnt = numSamples >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) - B(m,n) */ 
+      /* Subtract and then store the results in the destination buffer. */ 
+      *pOut++ = (*pIn1++) - (*pIn2++); 
+      *pOut++ = (*pIn1++) - (*pIn2++); 
+      *pOut++ = (*pIn1++) - (*pIn2++); 
+      *pOut++ = (*pIn1++) - (*pIn2++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = numSamples % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) - B(m,n) */ 
+      /* Subtract and then store the results in the destination buffer. */ 
+      *pOut++ = (*pIn1++) - (*pIn2++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* Set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixSub group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_sub_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,126 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_sub_q15.c  
+*  
+* Description:	Q15 Matrix subtraction  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixSub  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q15 matrix subtraction.  
+ * @param[in]       *pSrcA points to the first input matrix structure  
+ * @param[in]       *pSrcB points to the second input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.  
+ */ 
+ 
+arm_status arm_mat_sub_q15( 
+  const arm_matrix_instance_q15 * pSrcA, 
+  const arm_matrix_instance_q15 * pSrcB, 
+  arm_matrix_instance_q15 * pDst) 
+{ 
+  q15_t *pInA = pSrcA->pData;                    /* input data matrix pointer A */ 
+  q15_t *pInB = pSrcB->pData;                    /* input data matrix pointer B */ 
+  q15_t *pOut = pDst->pData;                     /* output data matrix pointer */ 
+  uint32_t numSamples;                           /* total number of elements in the matrix */ 
+  uint32_t blkCnt;                               /* loop counters  */ 
+  arm_status status;                             /* status of matrix subtraction  */ 
+ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrcA->numRows != pSrcB->numRows) || 
+     (pSrcA->numCols != pSrcB->numCols) || 
+     (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Total number of samples in the input matrix */ 
+    numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols; 
+ 
+    /* Apply loop unrolling */ 
+    blkCnt = numSamples >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) - B(m,n) */ 
+      /* Subtract, Saturate and then store the results in the destination buffer. */ 
+      *__SIMD32(pOut)++ = __QSUB16(*__SIMD32(pInA)++, *__SIMD32(pInB)++); 
+      *__SIMD32(pOut)++ = __QSUB16(*__SIMD32(pInA)++, *__SIMD32(pInB)++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = numSamples % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) - B(m,n) */ 
+      /* Subtract and then store the results in the destination buffer. */ 
+      *pOut++ = (q15_t) __QSUB16(*pInA++, *pInB++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* Set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixSub group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_sub_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,129 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_sub_q31.c  
+*  
+* Description:	Q31 matrix subtraction  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixSub  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Q31 matrix subtraction.  
+ * @param[in]       *pSrcA points to the first input matrix structure  
+ * @param[in]       *pSrcB points to the second input matrix structure  
+ * @param[out]      *pDst points to output matrix structure  
+ * @return     		The function returns either  
+ * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ *  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.  
+ */ 
+ 
+ 
+arm_status arm_mat_sub_q31( 
+  const arm_matrix_instance_q31 * pSrcA, 
+  const arm_matrix_instance_q31 * pSrcB, 
+  arm_matrix_instance_q31 * pDst) 
+{ 
+  q31_t *pIn1 = pSrcA->pData;                    /* input data matrix pointer A */ 
+  q31_t *pIn2 = pSrcB->pData;                    /* input data matrix pointer B */ 
+  q31_t *pOut = pDst->pData;                     /* output data matrix pointer */ 
+  uint32_t numSamples;                           /* total number of elements in the matrix  */ 
+  uint32_t blkCnt;                               /* loop counters */ 
+  arm_status status;                             /* status of matrix subtraction */ 
+ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition  */ 
+  if((pSrcA->numRows != pSrcB->numRows) || 
+     (pSrcA->numCols != pSrcB->numCols) || 
+     (pSrcA->numRows != pDst->numRows) || (pSrcA->numCols != pDst->numCols)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Total number of samples in the input matrix */ 
+    numSamples = (uint32_t) pSrcA->numRows * pSrcA->numCols; 
+ 
+    /* Loop Unrolling */ 
+    blkCnt = numSamples >> 2u; 
+ 
+    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+     ** a second loop below computes the remaining 1 to 3 samples. */ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) - B(m,n) */ 
+      /* Subtract, saturate and then store the results in the destination buffer. */ 
+      *pOut++ = __QSUB(*pIn1++, *pIn2++); 
+      *pOut++ = __QSUB(*pIn1++, *pIn2++); 
+      *pOut++ = __QSUB(*pIn1++, *pIn2++); 
+      *pOut++ = __QSUB(*pIn1++, *pIn2++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* If the numSamples is not a multiple of 4, compute any remaining output samples here.  
+     ** No loop unrolling is used. */ 
+    blkCnt = numSamples % 0x4u; 
+ 
+    while(blkCnt > 0u) 
+    { 
+      /* C(m,n) = A(m,n) - B(m,n) */ 
+      /* Subtract, saturate and then store the results in the destination buffer. */ 
+      *pOut++ = __QSUB(*pIn1++, *pIn2++); 
+ 
+      /* Decrement the loop counter */ 
+      blkCnt--; 
+    } 
+ 
+    /* Set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixSub group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_trans_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,158 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_trans_f32.c  
+*  
+* Description:	Floating-point matrix transpose.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+/**  
+ * @defgroup MatrixTrans Matrix Transpose  
+ *  
+ * Tranposes a matrix.  
+ * Transposing an <code>M x N</code> matrix flips it around the center diagonal and results in an <code>N x M</code> matrix.  
+ * \image html MatrixTranspose.gif "Transpose of a 3 x 3 matrix"  
+ */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixTrans  
+ * @{  
+ */ 
+ 
+/**  
+  * @brief Floating-point matrix transpose.  
+  * @param[in]  *pSrc points to the input matrix  
+  * @param[out] *pDst points to the output matrix  
+  * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>  
+  * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+  */ 
+ 
+ 
+arm_status arm_mat_trans_f32( 
+  const arm_matrix_instance_f32 * pSrc, 
+  arm_matrix_instance_f32 * pDst) 
+{ 
+  float32_t *pIn = pSrc->pData;                  /* input data matrix pointer */ 
+  float32_t *pOut = pDst->pData;                 /* output data matrix pointer */ 
+  float32_t *px;                                 /* Temporary output data matrix pointer */ 
+  uint16_t nRows = pSrc->numRows;                /* number of rows */ 
+  uint16_t nColumns = pSrc->numCols;             /* number of columns */ 
+  uint16_t blkCnt, i = 0u, row = nRows;          /* loop counters */ 
+  arm_status status;                             /* status of matrix transpose  */ 
+ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Matrix transpose by exchanging the rows with columns */ 
+    /* row loop     */ 
+    do 
+    { 
+      /* Loop Unrolling */ 
+      blkCnt = nColumns >> 2; 
+ 
+      /* The pointer px is set to starting address of the column being processed */ 
+      px = pOut + i; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+       ** a second loop below computes the remaining 1 to 3 samples. */ 
+      while(blkCnt > 0u)        /* column loop */ 
+      { 
+        /* Read and store the input element in the destination */ 
+        *px = *pIn++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += nRows; 
+ 
+        /* Read and store the input element in the destination */ 
+        *px = *pIn++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += nRows; 
+ 
+        /* Read and store the input element in the destination */ 
+        *px = *pIn++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += nRows; 
+ 
+        /* Read and store the input element in the destination */ 
+        *px = *pIn++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += nRows; 
+ 
+        /* Decrement the column loop counter */ 
+        blkCnt--; 
+      } 
+ 
+      /* Perform matrix transpose for last 3 samples here. */ 
+      blkCnt = nColumns % 0x4u; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Read and store the input element in the destination */ 
+        *px = *pIn++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += nRows; 
+ 
+        /* Decrement the column loop counter */ 
+        blkCnt--; 
+      } 
+ 
+      i++; 
+ 
+      /* Decrement the row loop counter */ 
+      row--; 
+ 
+    } while(row > 0u);          /* row loop end  */ 
+ 
+    /* Set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixTrans group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_trans_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,155 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_trans_q15.c  
+*  
+* Description:	Q15 matrix transpose.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixTrans  
+ * @{  
+ */ 
+ 
+/*  
+ * @brief Q15 matrix transpose.  
+ * @param[in]  *pSrc points to the input matrix  
+ * @param[out] *pDst points to the output matrix  
+ * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>  
+ * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ */ 
+ 
+arm_status arm_mat_trans_q15( 
+  const arm_matrix_instance_q15 * pSrc, 
+  arm_matrix_instance_q15 * pDst) 
+{ 
+  q15_t *pSrcA = pSrc->pData;                    /* input data matrix pointer */ 
+  q15_t *pOut = pDst->pData;                     /* output data matrix pointer */ 
+//  q15_t *pDst = pDst->pData;                     /* output data matrix pointer */  
+  uint16_t nRows = pSrc->numRows;                /* number of nRows */ 
+  uint16_t nColumns = pSrc->numCols;             /* number of nColumns */ 
+  uint16_t col, row = nRows, i = 0u;             /* row and column loop counters */ 
+  q31_t in;                                      /* variable to hold temporary output  */ 
+  arm_status status;                             /* status of matrix transpose */ 
+ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Matrix transpose by exchanging the rows with columns */ 
+    /* row loop     */ 
+    do 
+    { 
+      /* Apply loop unrolling and exchange the columns with row elements */ 
+      col = nColumns >> 2u; 
+ 
+      /* The pointer pOut is set to starting address of the column being processed */ 
+      pOut = pDst->pData + i; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+       ** a second loop below computes the remaining 1 to 3 samples. */ 
+      while(col > 0u) 
+      { 
+        /* Read two elements from the row */ 
+        in = *__SIMD32(pSrcA)++; 
+ 
+        /* Unpack and store one element in the destination */ 
+        *pOut = (q15_t) in; 
+ 
+        /* Update the pointer pOut to point to the next row of the transposed matrix */ 
+        pOut += nRows; 
+ 
+        /* Unpack and store the second element in the destination */ 
+        *pOut = (q15_t) ((in & (q31_t) 0xffff0000) >> 16); 
+ 
+        /* Update the pointer pOut to point to the next row of the transposed matrix */ 
+        pOut += nRows; 
+ 
+        /* Read two elements from the row */ 
+        in = *__SIMD32(pSrcA)++; 
+ 
+        /* Unpack and store one element in the destination */ 
+        *pOut = (q15_t) in; 
+ 
+        /* Update the pointer pOut to point to the next row of the transposed matrix */ 
+        pOut += nRows; 
+ 
+        /* Unpack and store the second element in the destination */ 
+        *pOut = (q15_t) ((in & (q31_t) 0xffff0000) >> 16); 
+ 
+        /* Update the pointer pOut to point to the next row of the transposed matrix */ 
+        pOut += nRows; 
+ 
+        /* Decrement the column loop counter */ 
+        col--; 
+      } 
+ 
+      /* Perform matrix transpose for last 3 samples here. */ 
+      col = nColumns % 0x4u; 
+ 
+      while(col > 0u) 
+      { 
+        /* Read and store the input element in the destination */ 
+        *pOut = *pSrcA++; 
+ 
+        /* Update the pointer pOut to point to the next row of the transposed matrix */ 
+        pOut += nRows; 
+ 
+        /* Decrement the column loop counter */ 
+        col--; 
+      } 
+ 
+      i++; 
+ 
+      /* Decrement the row loop counter */ 
+      row--; 
+ 
+    } while(row > 0u); 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixTrans group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/MatrixFunctions/arm_mat_trans_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,150 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_mat_trans_q31.c  
+*  
+* Description:	Q31 matrix transpose.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+*    incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupMatrix  
+ */ 
+ 
+/**  
+ * @addtogroup MatrixTrans  
+ * @{  
+ */ 
+ 
+/*  
+  * @brief Q31 matrix transpose.  
+  * @param[in]  *pSrc points to the input matrix  
+  * @param[out] *pDst points to the output matrix  
+  * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>  
+  * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.  
+ */ 
+ 
+arm_status arm_mat_trans_q31( 
+  const arm_matrix_instance_q31 * pSrc, 
+  arm_matrix_instance_q31 * pDst) 
+{ 
+  q31_t *pIn = pSrc->pData;                      /* input data matrix pointer  */ 
+  q31_t *pOut = pDst->pData;                     /* output data matrix pointer  */ 
+  q31_t *px;                                     /* Temporary output data matrix pointer */ 
+  uint16_t nRows = pSrc->numRows;                /* number of nRows */ 
+  uint16_t nColumns = pSrc->numCols;             /* number of nColumns  */ 
+  uint16_t blkCnt, i = 0u, row = nRows;          /* loop counters */ 
+  arm_status status;                             /* status of matrix transpose */ 
+ 
+ 
+#ifdef ARM_MATH_MATRIX_CHECK 
+  /* Check for matrix mismatch condition */ 
+  if((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows)) 
+  { 
+    /* Set status as ARM_MATH_SIZE_MISMATCH */ 
+    status = ARM_MATH_SIZE_MISMATCH; 
+  } 
+  else 
+#endif 
+  { 
+    /* Matrix transpose by exchanging the rows with columns */ 
+    /* row loop     */ 
+    do 
+    { 
+      /* Apply loop unrolling and exchange the columns with row elements */ 
+      blkCnt = nColumns >> 2u; 
+ 
+      /* The pointer px is set to starting address of the column being processed */ 
+      px = pOut + i; 
+ 
+      /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+       ** a second loop below computes the remaining 1 to 3 samples. */ 
+      while(blkCnt > 0u) 
+      { 
+        /* Read and store the input element in the destination */ 
+        *px = *pIn++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += nRows; 
+ 
+        /* Read and store the input element in the destination */ 
+        *px = *pIn++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += nRows; 
+ 
+        /* Read and store the input element in the destination */ 
+        *px = *pIn++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += nRows; 
+ 
+        /* Read and store the input element in the destination */ 
+        *px = *pIn++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += nRows; 
+ 
+        /* Decrement the column loop counter */ 
+        blkCnt--; 
+      } 
+ 
+      /* Perform matrix transpose for last 3 samples here. */ 
+      blkCnt = nColumns % 0x4u; 
+ 
+      while(blkCnt > 0u) 
+      { 
+        /* Read and store the input element in the destination */ 
+        *px = *pIn++; 
+ 
+        /* Update the pointer px to point to the next row of the transposed matrix */ 
+        px += nRows; 
+ 
+        /* Decrement the column loop counter */ 
+        blkCnt--; 
+      } 
+ 
+      i++; 
+ 
+      /* Decrement the row loop counter */ 
+      row--; 
+ 
+    } 
+    while(row > 0u);            /* row loop end */ 
+ 
+    /* set status as ARM_MATH_SUCCESS */ 
+    status = ARM_MATH_SUCCESS; 
+  } 
+ 
+  /* Return to application */ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of MatrixTrans group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_max_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,97 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_max_f32.c  
+*  
+* Description:	Maximum value of a floating-point vector.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @defgroup Max Maximum  
+ *  
+ * Computes the maximum value of an array of data.   
+ * The function returns both the maximum value and its position within the array.   
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup Max  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Maximum value of a floating-point vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult maximum value returned here  
+ * @param[out]      *pIndex index of maximum value returned here  
+ * @return none.  
+ */ 
+ 
+void arm_max_f32( 
+  float32_t * pSrc, 
+  uint32_t blockSize, 
+  float32_t * pResult, 
+  uint32_t * pIndex) 
+{ 
+  float32_t maxVal, out;                         /* Temporary variables to store the output value. */ 
+  uint32_t blkCnt, outIndex;                     /* loop counter */ 
+ 
+  /* Initialise the index value to zero. */ 
+  outIndex = 0u; 
+  /* Load first input value that act as reference value for comparision */ 
+  out = *pSrc++; 
+ 
+  /* Loop over blockSize number of values */ 
+  blkCnt = (blockSize - 1u); 
+ 
+  do 
+  { 
+    /* Initialize maxVal to the next consecutive values one by one */ 
+    maxVal = *pSrc++; 
+ 
+    /* compare for the maximum value */ 
+    if(out < maxVal) 
+    { 
+      /* Update the maximum value and it's index */ 
+      out = maxVal; 
+      outIndex = blockSize - blkCnt; 
+    } 
+ 
+    blkCnt--; 
+ 
+  } while(blkCnt > 0u); 
+ 
+  /* Store the maximum value and it's index into destination pointers */ 
+  *pResult = out; 
+  *pIndex = outIndex; 
+} 
+ 
+/**  
+ * @} end of Max group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_max_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,89 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_max_q15.c  
+*  
+* Description:	Maximum value of a Q15 vector.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup Max  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Maximum value of a Q15 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult maximum value returned here  
+ * @param[out]      *pIndex index of maximum value returned here  
+ * @return none.  
+ */ 
+ 
+void arm_max_q15( 
+  q15_t * pSrc, 
+  uint32_t blockSize, 
+  q15_t * pResult, 
+  uint32_t * pIndex) 
+{ 
+  q15_t maxVal, out;                             /* Temporary variables to store the output value. */ 
+  uint32_t blkCnt, outIndex;                     /* loop counter */ 
+ 
+  /* Initialise the index value to zero. */ 
+  outIndex = 0u; 
+  /* Load first input value that act as reference value for comparision */ 
+  out = *pSrc++; 
+ 
+  /* Loop over blockSize number of values */ 
+  blkCnt = (blockSize - 1u); 
+ 
+  do 
+  { 
+    /* Initialize maxVal to the next consecutive values one by one */ 
+    maxVal = *pSrc++; 
+ 
+    /* compare for the maximum value */ 
+    if(out < maxVal) 
+    { 
+      /* Update the maximum value and its index */ 
+      out = maxVal; 
+      outIndex = blockSize - blkCnt; 
+    } 
+ 
+    blkCnt--; 
+ 
+  } while(blkCnt > 0u); 
+ 
+  /* Store the maximum value and its index into destination pointers */ 
+  *pResult = out; 
+  *pIndex = outIndex; 
+} 
+ 
+/**  
+ * @} end of Max group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_max_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,89 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_max_q31.c  
+*  
+* Description:	Maximum value of a Q31 vector.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup Max  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Maximum value of a Q31 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult maximum value returned here  
+ * @param[out]      *pIndex index of maximum value returned here  
+ * @return none.  
+ */ 
+ 
+void arm_max_q31( 
+  q31_t * pSrc, 
+  uint32_t blockSize, 
+  q31_t * pResult, 
+  uint32_t * pIndex) 
+{ 
+  q31_t maxVal, out;                             /* Temporary variables to store the output value. */ 
+  uint32_t blkCnt, outIndex;                     /* loop counter */ 
+ 
+  /* Initialise the index value to zero. */ 
+  outIndex = 0u; 
+  /* Load first input value that act as reference value for comparision */ 
+  out = *pSrc++; 
+ 
+  /* Loop over blockSize number of values */ 
+  blkCnt = (blockSize - 1u); 
+ 
+  do 
+  { 
+    /* Initialize maxVal to the next consecutive values one by one */ 
+    maxVal = *pSrc++; 
+ 
+    /* compare for the maximum value */ 
+    if(out < maxVal) 
+    { 
+      /* Update the maximum value and its index */ 
+      out = maxVal; 
+      outIndex = blockSize - blkCnt; 
+    } 
+ 
+    blkCnt--; 
+ 
+  } while(blkCnt > 0u); 
+ 
+  /* Store the maximum value and its index into destination pointers */ 
+  *pResult = out; 
+  *pIndex = outIndex; 
+} 
+ 
+/**  
+ * @} end of Max group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_max_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,155 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_max_q7.c  
+*  
+* Description:	Maximum value of a Q7 vector.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup Max  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Maximum value of a Q7 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult maximum value returned here  
+ * @param[out]      *pIndex index of maximum value returned here  
+  * @return none.  
+ */ 
+ 
+void arm_max_q7( 
+  q7_t * pSrc, 
+  uint32_t blockSize, 
+  q7_t * pResult, 
+  uint32_t * pIndex) 
+{ 
+  q7_t res, maxVal, x0, x1, maxVal2, maxVal1;    /* Temporary variables to store the output value. */ 
+  uint32_t blkCnt, index1, index2, index3, indx, indxMod;       /* loop counter */ 
+ 
+  /* Initialise the index value to zero. */ 
+  indx = 0u; 
+ 
+  /* Load first input value that act as reference value for comparision */ 
+  res = *pSrc++; 
+ 
+  /* Loop over blockSize number of values */ 
+  blkCnt = (blockSize - 1u) >> 2u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    indxMod = blockSize - (blkCnt * 4u); 
+ 
+    /* Load two input values for comparision */ 
+    x0 = *pSrc++; 
+    x1 = *pSrc++; 
+ 
+    if(x0 < x1) 
+    { 
+      /* Update the minimum value and its index */ 
+      maxVal1 = x1; 
+      index1 = indxMod + 1u; 
+    } 
+    else 
+    { 
+      /* Update the minimum value and its index */ 
+      maxVal1 = x0; 
+      index1 = indxMod; 
+    } 
+ 
+    /* Load two input values for comparision */ 
+    x0 = *pSrc++; 
+    x1 = *pSrc++; 
+ 
+    if(x0 < x1) 
+    { 
+      /* Update the minimum value and its index */ 
+      maxVal2 = x1; 
+      index2 = indxMod + 3u; 
+    } 
+    else 
+    { 
+      /* Update the minimum value and its index */ 
+      maxVal2 = x0; 
+      index2 = indxMod + 2u; 
+    } 
+ 
+    if(maxVal1 < maxVal2) 
+    { 
+      /* Update the minimum value and its index */ 
+      maxVal = maxVal2; 
+      index3 = index2; 
+    } 
+    else 
+    { 
+      /* Update the minimum value and its index */ 
+      maxVal = maxVal1; 
+      index3 = index1; 
+    } 
+ 
+    if(res < maxVal) 
+    { 
+      /* Update the minimum value and its index */ 
+      res = maxVal; 
+      indx = index3; 
+    } 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+ 
+  } 
+ 
+  blkCnt = (blockSize - 1u) % 0x04u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Initialize maxVal to the next consecutive values one by one */ 
+    maxVal = *pSrc++; 
+ 
+    /* compare for the maximum value */ 
+    if(res < maxVal) 
+    { 
+      /* Update the maximum value and its index */ 
+      res = maxVal; 
+      indx = blockSize - blkCnt; 
+    } 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the maximum value and its index into destination pointers */ 
+  *pResult = res; 
+  *pIndex = indx; 
+} 
+ 
+/**  
+ * @} end of Max group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_mean_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,106 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_mean_f32.c  
+*  
+* Description:	Mean value of two floating point arrays.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @defgroup mean Mean  
+ *  
+ * Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.  
+ * The underlying algorithm is used:  
+ *  
+ * <pre>  
+ * 	Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;  
+ * </pre>  
+ *  
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup mean  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Mean value of a floating-point vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult mean value returned here  
+ * @return none.  
+ */ 
+ 
+ 
+void arm_mean_f32( 
+  float32_t * pSrc, 
+  uint32_t blockSize, 
+  float32_t * pResult) 
+{ 
+  float32_t sum = 0.0f;                          /* Temporary result storage */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */ 
+  /* Store the result to the destination */ 
+  *pResult = sum / (float32_t) blockSize; 
+} 
+ 
+/**  
+ * @} end of mean group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_mean_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,103 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_mean_q15.c  
+*  
+* Description:	Mean value of two Q15 arrays.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup mean  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Mean value of a Q15 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult mean value returned here  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using a 32-bit internal accumulator.  
+ * The input is represented in 1.15 format and is accumulated in a 32-bit   
+ * accumulator in 17.15 format.   
+ * There is no risk of internal overflow with this approach, and the   
+ * full precision of intermediate result is preserved.   
+ * Finally, the accumulator is saturated and truncated to yield a result of 1.15 format.  
+ *  
+ */ 
+ 
+ 
+void arm_mean_q15( 
+  q15_t * pSrc, 
+  uint32_t blockSize, 
+  q15_t * pResult) 
+{ 
+  q31_t sum = 0;                                 /* Temporary result storage */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */ 
+  /* Store the result to the destination */ 
+  *pResult = (q15_t) (sum / blockSize); 
+} 
+ 
+/**  
+ * @} end of mean group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_mean_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,103 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_mean_q31.c  
+*  
+* Description:	Mean value of two Q31 arrays.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup mean  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Mean value of a Q31 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult mean value returned here  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *\par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * The input is represented in 1.31 format and is accumulated in a 64-bit  
+ * accumulator in 33.31 format.  
+ * There is no risk of internal overflow with this approach, and the   
+ * full precision of intermediate result is preserved.   
+ * Finally, the accumulator is truncated to yield a result of 1.31 format.  
+ *  
+ */ 
+ 
+ 
+void arm_mean_q31( 
+  q31_t * pSrc, 
+  uint32_t blockSize, 
+  q31_t * pResult) 
+{ 
+  q63_t sum = 0;                                 /* Temporary result storage */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */ 
+  /* Store the result to the destination */ 
+  *pResult = (q31_t) (sum / (int32_t) blockSize); 
+} 
+ 
+/**  
+ * @} end of mean group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_mean_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,103 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_mean_q7.c  
+*  
+* Description:	Mean value of two Q7 arrays.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup mean  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Mean value of a Q7 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult mean value returned here  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function is implemented using a 32-bit internal accumulator.   
+ * The input is represented in 1.7 format and is accumulated in a 32-bit  
+ * accumulator in 25.7 format.  
+ * There is no risk of internal overflow with this approach, and the   
+ * full precision of intermediate result is preserved.   
+ * Finally, the accumulator is truncated to yield a result of 1.7 format.  
+ *  
+ */ 
+ 
+ 
+void arm_mean_q7( 
+  q7_t * pSrc, 
+  uint32_t blockSize, 
+  q7_t * pResult) 
+{ 
+  q31_t sum = 0;                                 /* Temporary result storage */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */ 
+  /* Store the result to the destination */ 
+  *pResult = (q7_t) (sum / (int32_t) blockSize); 
+} 
+ 
+/**  
+ * @} end of mean group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_min_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,98 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_min_f32.c  
+*  
+* Description:	Minimum value of two floating point arrays.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @defgroup Min Minimum  
+ *  
+ * Computes the minimum value of an array of data.   
+ * The function returns both the minimum value and its position within the array.   
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup Min  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Minimum value of a floating-point vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult minimum value returned here  
+ * @param[out]      *pIndex index of minimum value returned here  
+  * @return none.  
+ *  
+ */ 
+ 
+void arm_min_f32( 
+  float32_t * pSrc, 
+  uint32_t blockSize, 
+  float32_t * pResult, 
+  uint32_t * pIndex) 
+{ 
+  float32_t minVal, out;                         /* Temporary variables to store the output value. */ 
+  uint32_t blkCnt, outIndex;                     /* loop counter */ 
+ 
+  /* Initialise the index value to zero. */ 
+  outIndex = 0u; 
+  /* Load first input value that act as reference value for comparision */ 
+  out = *pSrc++; 
+ 
+  /* Loop over blockSize number of values */ 
+  blkCnt = (blockSize - 1u); 
+ 
+  do 
+  { 
+    /* Initialize minVal to the next consecutive values one by one */ 
+    minVal = *pSrc++; 
+ 
+    /* compare for the minimum value */ 
+    if(out > minVal) 
+    { 
+      /* Update the minimum value and it's index */ 
+      out = minVal; 
+      outIndex = blockSize - blkCnt; 
+    } 
+ 
+    blkCnt--; 
+ 
+  } while(blkCnt > 0u); 
+ 
+  /* Store the minimum value and it's index into destination pointers */ 
+  *pResult = out; 
+  *pIndex = outIndex; 
+} 
+ 
+/**  
+ * @} end of Min group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_min_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,91 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_min_q15.c  
+*  
+* Description:	Minimum value of two Q15 arrays.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+ 
+/**  
+ * @addtogroup Min  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Minimum value of a Q15 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult minimum value returned here  
+ * @param[out]      *pIndex index of minimum value returned here  
+ * @return none.  
+ *  
+ */ 
+ 
+void arm_min_q15( 
+  q15_t * pSrc, 
+  uint32_t blockSize, 
+  q15_t * pResult, 
+  uint32_t * pIndex) 
+{ 
+  q15_t minVal, out;                             /* Temporary variables to store the output value. */ 
+  uint32_t blkCnt, outIndex;                     /* loop counter */ 
+ 
+  /* Initialise the index value to zero. */ 
+  outIndex = 0u; 
+  /* Load first input value that act as reference value for comparision */ 
+  out = *pSrc++; 
+ 
+  /* Loop over blockSize number of values */ 
+  blkCnt = (blockSize - 1u); 
+ 
+  do 
+  { 
+    /* Initialize minVal to the next consecutive values one by one */ 
+    minVal = *pSrc++; 
+ 
+    /* compare for the minimum value */ 
+    if(out > minVal) 
+    { 
+      /* Update the minimum value and its index */ 
+      out = minVal; 
+      outIndex = blockSize - blkCnt; 
+    } 
+ 
+    blkCnt--; 
+ 
+  } while(blkCnt > 0u); 
+ 
+  /* Store the minimum value and its index into destination pointers */ 
+  *pResult = out; 
+  *pIndex = outIndex; 
+} 
+ 
+/**  
+ * @} end of Min group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_min_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,91 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_min_q31.c  
+*  
+* Description:	Minimum value of two Q31 arrays.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+ 
+/**  
+ * @addtogroup Min  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Minimum value of a Q31 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult minimum value returned here  
+ * @param[out]      *pIndex index of minimum value returned here  
+ * @return none.  
+ *  
+ */ 
+ 
+void arm_min_q31( 
+  q31_t * pSrc, 
+  uint32_t blockSize, 
+  q31_t * pResult, 
+  uint32_t * pIndex) 
+{ 
+  q31_t minVal, out;                             /* Temporary variables to store the output value. */ 
+  uint32_t blkCnt, outIndex;                     /* loop counter */ 
+ 
+  /* Initialise the index value to zero. */ 
+  outIndex = 0u; 
+  /* Load first input value that act as reference value for comparision */ 
+  out = *pSrc++; 
+ 
+  /* Loop over blockSize number of values */ 
+  blkCnt = (blockSize - 1u); 
+ 
+  do 
+  { 
+    /* Initialize minVal to the next consecutive values one by one */ 
+    minVal = *pSrc++; 
+ 
+    /* compare for the minimum value */ 
+    if(out > minVal) 
+    { 
+      /* Update the minimum value and its index */ 
+      out = minVal; 
+      outIndex = blockSize - blkCnt; 
+    } 
+ 
+    blkCnt--; 
+ 
+  } while(blkCnt > 0u); 
+ 
+  /* Store the minimum value and its index into destination pointers */ 
+  *pResult = out; 
+  *pIndex = outIndex; 
+} 
+ 
+/**  
+ * @} end of Min group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_min_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,156 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_min_q7.c  
+*  
+* Description:	Processing function for the Q7 Minimum.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup Min  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Minimum value of a Q7 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult minimum value returned here  
+ * @param[out]      *pIndex index of minimum value returned here  
+ * @return none.  
+ *  
+ */ 
+ 
+void arm_min_q7( 
+  q7_t * pSrc, 
+  uint32_t blockSize, 
+  q7_t * pResult, 
+  uint32_t * pIndex) 
+{ 
+  q7_t minVal, minVal1, minVal2, res, x0, x1;    /* Temporary variables to store the output value. */ 
+  uint32_t blkCnt, indx, index1, index2, index3, indxMod;       /* loop counter */ 
+ 
+  /* Initialise the index value to zero. */ 
+  indx = 0u; 
+ 
+  /* Load first input value that act as reference value for comparision */ 
+  res = *pSrc++; 
+ 
+  /* Loop over blockSize number of values */ 
+  blkCnt = (blockSize - 1u) >> 2u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    indxMod = blockSize - (blkCnt * 4u); 
+ 
+    /* Load two input values for comparision */ 
+    x0 = *pSrc++; 
+    x1 = *pSrc++; 
+ 
+    if(x0 > x1) 
+    { 
+      /* Update the minimum value and its index */ 
+      minVal1 = x1; 
+      index1 = indxMod + 1u; 
+    } 
+    else 
+    { 
+      /* Update the minimum value and its index */ 
+      minVal1 = x0; 
+      index1 = indxMod; 
+    } 
+ 
+    /* Load two input values for comparision */ 
+    x0 = *pSrc++; 
+    x1 = *pSrc++; 
+ 
+    if(x0 > x1) 
+    { 
+      /* Update the minimum value and its index */ 
+      minVal2 = x1; 
+      index2 = indxMod + 3u; 
+    } 
+    else 
+    { 
+      /* Update the minimum value and its index */ 
+      minVal2 = x0; 
+      index2 = indxMod + 2u; 
+    } 
+ 
+    if(minVal1 > minVal2) 
+    { 
+      /* Update the minimum value and its index */ 
+      minVal = minVal2; 
+      index3 = index2; 
+    } 
+    else 
+    { 
+      /* Update the minimum value and its index */ 
+      minVal = minVal1; 
+      index3 = index1; 
+    } 
+ 
+    if(res > minVal) 
+    { 
+      /* Update the minimum value and its index */ 
+      res = minVal; 
+      indx = index3; 
+    } 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+ 
+  } 
+ 
+  blkCnt = (blockSize - 1u) % 0x04u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* Initialize minVal to the next consecutive values one by one */ 
+    minVal = *pSrc++; 
+ 
+    /* compare for the minimum value */ 
+    if(res > minVal) 
+    { 
+      /* Update the minimum value and its index */ 
+      res = minVal; 
+      indx = blockSize - blkCnt; 
+    } 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the minimum value and its index into destination pointers */ 
+  *pResult = res; 
+  *pIndex = indx; 
+} 
+ 
+/**  
+ * @} end of Min group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_power_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,117 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_power_f32.c  
+*  
+* Description:	sum of the square of the elements in a floating point array  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @defgroup power Power  
+ *  
+ * Calculates the sum of the squares of the elements in the input vector.  
+ * The underlying algorithm is used:  
+ *  
+ * <pre>  
+ * 	Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];  
+ * </pre>  
+ * 
+ * There are separate functions for floating point, Q31, Q15, and Q7 data types.   
+ */ 
+ 
+/**  
+ * @addtogroup power  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Sum of the squares of the elements of a floating-point vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult sum of the squares value returned here  
+ * @return none.  
+ *  
+ */ 
+ 
+ 
+void arm_power_f32( 
+  float32_t * pSrc, 
+  uint32_t blockSize, 
+  float32_t * pResult) 
+{ 
+  float32_t sum = 0.0f;                          /* accumulator */ 
+  float32_t in;                                  /* Temporary variable to store input value */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute Power and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* compute power and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += in * in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the result to the destination */ 
+  *pResult = sum; 
+} 
+ 
+/**  
+ * @} end of power group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_power_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,109 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_power_q15.c  
+*  
+* Description:	sum of the square of the elements in an array of Q15 type  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup power  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Sum of the squares of the elements of a Q15 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult sum of the squares value returned here  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.   
+ * The input is represented in 1.15 format. 
+ * Intermediate multiplication yields a 2.30 format, and this  
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.  
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the  
+ * full precision of the intermediate multiplication is preserved.  
+ * Finally, the return result is in 34.30 format.   
+ *  
+ */ 
+ 
+void arm_power_q15( 
+  q15_t * pSrc, 
+  uint32_t blockSize, 
+  q63_t * pResult) 
+{ 
+  q63_t sum = 0;                                 /* Temporary result storage */ 
+  q31_t in32;                                    /* Temporary variable to store input value */ 
+  q15_t in16;                                    /* Temporary variable to store input value */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute Power and then store the result in a temporary variable, sum. */ 
+    in32 = *__SIMD32(pSrc)++; 
+    sum = __SMLALD(in32, in32, sum); 
+    in32 = *__SIMD32(pSrc)++; 
+    sum = __SMLALD(in32, in32, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute Power and then store the result in a temporary variable, sum. */ 
+    in16 = *pSrc++; 
+    sum = __SMLALD(in16, in16, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the results in 34.30 format  */ 
+  *pResult = sum; 
+} 
+ 
+/**  
+ * @} end of power group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_power_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,116 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_power_q31.c  
+*  
+* Description:	sum of the square of the elements in an array of Q31 type  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup power  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Sum of the squares of the elements of a Q31 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult sum of the squares value returned here  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * The input is represented in 1.31 format.  
+ * Intermediate multiplication yields a 2.62 format, and this  
+ * result is truncated to 2.48 format by discarding the lower 14 bits.  
+ * The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.  
+ * With 15 guard bits in the accumulator, there is no risk of overflow, and the  
+ * full precision of the intermediate multiplication is preserved.  
+ * Finally, the return result is in 16.48 format.   
+ *  
+ */ 
+ 
+void arm_power_q31( 
+  q31_t * pSrc, 
+  uint32_t blockSize, 
+  q63_t * pResult) 
+{ 
+  q63_t sum = 0;                                 /* Temporary result storage */ 
+  q31_t in; 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and then store the result in a temporary variable sum, providing 15 guard bits. */ 
+    in = *pSrc++; 
+    sum += ((q63_t) in * in) >> 14u; 
+ 
+    in = *pSrc++; 
+    sum += ((q63_t) in * in) >> 14u; 
+ 
+    in = *pSrc++; 
+    sum += ((q63_t) in * in) >> 14u; 
+ 
+    in = *pSrc++; 
+    sum += ((q63_t) in * in) >> 14u; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute Power and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += ((q63_t) in * in) >> 14u; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the results in 16.48 format  */ 
+  *pResult = sum; 
+} 
+ 
+/**  
+ * @} end of power group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_power_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,121 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_power_q7.c  
+*  
+* Description:	sum of the square of the elements in an array of Q7 type  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup power  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Sum of the squares of the elements of a Q7 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult sum of the squares value returned here  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using a 32-bit internal accumulator.   
+ * The input is represented in 1.7 format. 
+ * Intermediate multiplication yields a 2.14 format, and this  
+ * result is added without saturation to an accumulator in 18.14 format.  
+ * With 17 guard bits in the accumulator, there is no risk of overflow, and the  
+ * full precision of the intermediate multiplication is preserved.  
+ * Finally, the return result is in 18.14 format.   
+ *  
+ */ 
+ 
+void arm_power_q7( 
+  q7_t * pSrc, 
+  uint32_t blockSize, 
+  q31_t * pResult) 
+{ 
+  q31_t sum = 0;                                 /* Temporary result storage */ 
+  q31_t input1;                                  /* Temporary variable to store packed input */ 
+  q15_t in1, in2;                                /* Temporary variables to store input */ 
+  q7_t in;                                       /* Temporary variable to store input */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* Reading two inputs of pSrc vector and packing */ 
+    in1 = (q15_t) * pSrc++; 
+    in2 = (q15_t) * pSrc++; 
+    input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute Power and then store the result in a temporary variable, sum. */ 
+    sum = __SMLAD(input1, input1, sum); 
+ 
+    /* Reading two inputs of pSrc vector and packing */ 
+    in1 = (q15_t) * pSrc++; 
+    in2 = (q15_t) * pSrc++; 
+    input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 
+ 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute Power and then store the result in a temporary variable, sum. */ 
+    sum = __SMLAD(input1, input1, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute Power and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += ((q15_t) in * in); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Store the result in 18.14 format  */ 
+  *pResult = sum; 
+} 
+ 
+/**  
+ * @} end of power group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_rms_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,114 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_rms_f32.c  
+*  
+* Description:	Root mean square value of an array of F32 type  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @defgroup RMS Root mean square (RMS)  
+ *  
+ *   
+ * Calculates the Root Mean Sqaure of the elements in the input vector.  
+ * The underlying algorithm is used:  
+ *  
+ * <pre>  
+ * 	Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));  
+ * </pre>  
+ * 
+ * There are separate functions for floating point, Q31, and Q15 data types.   
+ */ 
+ 
+/**  
+ * @addtogroup RMS  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Root Mean Square of the elements of a floating-point vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult rms value returned here  
+ * @return none.  
+ *  
+ */ 
+ 
+void arm_rms_f32( 
+  float32_t * pSrc, 
+  uint32_t blockSize, 
+  float32_t * pResult) 
+{ 
+  float32_t sum = 0.0f;                          /* Accumulator */ 
+  float32_t in;                                  /* Tempoprary variable to store input value */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute sum of the squares and then store the result in a temporary variable, sum  */ 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute sum of the squares and then store the results in a temporary variable, sum  */ 
+    in = *pSrc++; 
+    sum += in * in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Compute Rms and store the result in the destination */ 
+  arm_sqrt_f32(sum / (float32_t) blockSize, pResult); 
+} 
+ 
+/**  
+ * @} end of RMS group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_rms_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,110 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_rms_q15.c  
+*  
+* Description:	Processing function for the Q15 RMS  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @addtogroup RMS  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Root Mean Square of the elements of a Q15 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult rms value returned here  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * The input is represented in 1.15 format.  
+ * Intermediate multiplication yields a 2.30 format, and this  
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.  
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the  
+ * full precision of the intermediate multiplication is preserved.  
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower   
+ * 15 bits, and then saturated to yield a result in 1.15 format.  
+ *  
+ */ 
+ 
+void arm_rms_q15( 
+  q15_t * pSrc, 
+  uint32_t blockSize, 
+  q15_t * pResult) 
+{ 
+  q63_t sum = 0;                                 /* accumulator */ 
+  q31_t in;                                      /* temporary variable to store the input value */ 
+  q15_t in1;                                     /* temporary variable to store the input value */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /* loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */ 
+    in = *__SIMD32(pSrc)++; 
+    sum = __SMLALD(in, in, sum); 
+    in = *__SIMD32(pSrc)++; 
+    sum = __SMLALD(in, in, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */ 
+    in1 = *pSrc++; 
+    sum = __SMLALD(in1, in1, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Truncating and saturating the accumulator to 1.15 format */ 
+  sum = __SSAT((q31_t) (sum >> 15), 16); 
+ 
+  in1 = (q15_t) (sum / blockSize); 
+ 
+  /* Store the result in the destination */ 
+  arm_sqrt_q15(in1, pResult); 
+} 
+ 
+/**  
+ * @} end of RMS group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_rms_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,115 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_rms_q31.c  
+*  
+* Description:	root mean square value of an array of Q31 type  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @addtogroup RMS  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Root Mean Square of the elements of a Q31 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult rms value returned here  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ *\par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The input is represented in 1.31 format, and intermediate multiplication  
+ * yields a 2.62 format.  
+ * The accumulator maintains full precision of the intermediate multiplication results,   
+ * but provides only a single guard bit.  
+ * There is no saturation on intermediate additions.  
+ * If the accumulator overflows, it wraps around and distorts the result.   
+ * In order to avoid overflows completely, the input signal must be scaled down by   
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.   
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.  
+ *  
+ */ 
+ 
+void arm_rms_q31( 
+  q31_t * pSrc, 
+  uint32_t blockSize, 
+  q31_t * pResult) 
+{ 
+  q31_t *pIn1 = pSrc;                            /* SrcA pointer */ 
+  q63_t sum = 0;                                 /* accumulator */ 
+  q31_t in;                                      /* Temporary variable to store the input */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute sum of the squares and then store the result in a temporary variable, sum */ 
+    in = *pIn1++; 
+    sum += (q63_t) in *in; 
+    in = *pIn1++; 
+    sum += (q63_t) in *in; 
+    in = *pIn1++; 
+    sum += (q63_t) in *in; 
+    in = *pIn1++; 
+    sum += (q63_t) in *in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ 
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */ 
+    in = *pIn1++; 
+    sum += (q63_t) in *in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Convert data in 2.62 to 1.31 by 31 right shifts */ 
+  sum = sum >> 31; 
+ 
+  /* Compute Rms and store the result in the destination vector */ 
+  arm_sqrt_q31((q31_t) (sum / (int32_t) blockSize), pResult); 
+} 
+ 
+/**  
+ * @} end of RMS group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_std_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,173 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_std_f32.c  
+*  
+* Description:	Standard deviation of an array of F32 type.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @defgroup STD Standard deviation  
+ *  
+ * Calculates the standard deviation of the elements in the input vector.   
+ * The underlying algorithm is used:  
+ * 
+ * <pre>  
+ * 	Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1)) 
+ * 
+ *	   where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1] 
+ * 
+ *	                   sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1] 
+ * </pre> 
+ *  
+ * There are separate functions for floating point, Q31, and Q15 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup STD  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Standard deviation of the elements of a floating-point vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult standard deviation value returned here  
+ * @return none.  
+ *  
+ */ 
+ 
+ 
+void arm_std_f32( 
+  float32_t * pSrc, 
+  uint32_t blockSize, 
+  float32_t * pResult) 
+{ 
+  float32_t sum = 0.0f;                            /* Temporary result storage */ 
+  float32_t meanOfSquares, mean, in, squareOfMean; 
+  uint32_t blkCnt;                                 /* loop counter */ 
+  float32_t *pIn;								   /* Temporary pointer */ 
+ 
+  pIn = pSrc; 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += in * in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Compute Mean of squares of the input samples  
+   * and then store the result in a temporary variable, meanOfSquares. */ 
+  meanOfSquares = sum / ((float32_t) blockSize - 1.0f); 
+ 
+  /* Reset the accumulator */ 
+  sum = 0.0f; 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Reset the input working pointer */ 
+  pSrc = pIn; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+  /* Compute mean of all input values */ 
+  mean = sum / (float32_t) blockSize; 
+ 
+  /* Compute square of mean */ 
+  squareOfMean = (mean * mean) * (((float32_t) blockSize) / 
+                                  ((float32_t) blockSize - 1.0f)); 
+ 
+  /* Compute standard deviation and then store the result to the destination */ 
+  arm_sqrt_f32((meanOfSquares - squareOfMean), pResult); 
+ 
+} 
+ 
+/**  
+ * @} end of STD group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_std_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,171 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_std_q15.c  
+*  
+* Description:	Standard deviation of an array of Q15 type.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup STD  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Standard deviation of the elements of a Q15 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult standard deviation value returned here  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * The input is represented in 1.15 format. 
+ * Intermediate multiplication yields a 2.30 format, and this  
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.  
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the  
+ * full precision of the intermediate multiplication is preserved.  
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower   
+ * 15 bits, and then saturated to yield a result in 1.15 format.  
+ */ 
+ 
+void arm_std_q15( 
+  q15_t * pSrc, 
+  uint32_t blockSize, 
+  q15_t * pResult) 
+{ 
+  q63_t sum = 0;                                 /* Accumulator */ 
+  q31_t meanOfSquares, squareOfMean;             /* square of mean and mean of square */ 
+  q15_t mean;                                    /* mean */ 
+  q31_t in;                                      /* input value */ 
+  q15_t in1;                                     /* input value */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q15_t t;                                       /* Temporary variable */ 
+  q15_t *pIn;									 /* Temporary pointer */ 
+ 
+  pIn = pSrc; 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in = *__SIMD32(pSrc)++; 
+    sum = __SMLALD(in, in, sum); 
+    in = *__SIMD32(pSrc)++; 
+    sum = __SMLALD(in, in, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in1 = *pSrc++; 
+    sum = __SMLALD(in1, in1, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Compute Mean of squares of the input samples  
+   * and then store the result in a temporary variable, meanOfSquares. */ 
+  t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL); 
+  sum = __SSAT((sum >> 15u), 16u); 
+ 
+  meanOfSquares = (q31_t) ((sum * t) >> 14u); 
+ 
+  /* Reset the accumulator */ 
+  sum = 0; 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Reset the input working pointer */ 
+  pSrc = pIn; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+  /* Compute mean of all input values */ 
+  t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL); 
+  mean = (q15_t) __SSAT(sum, 16u); 
+ 
+  /* Compute square of mean */ 
+  squareOfMean = ((q31_t) mean * mean) >> 15; 
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15); 
+ 
+  /* mean of the squares minus the square of the mean. */ 
+  in1 = (q15_t) (meanOfSquares - squareOfMean); 
+ 
+  /* Compute standard deviation and store the result to the destination */ 
+  arm_sqrt_q15(in1, pResult); 
+} 
+ 
+/**  
+ * @} end of STD group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_std_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,176 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_std_q31.c  
+*  
+* Description:	Standard deviation of an array of Q31 type.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup STD  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Standard deviation of the elements of a Q31 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult standard deviation value returned here  
+ * @return none.  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ *\par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The input is represented in 1.31 format, and intermediate multiplication  
+ * yields a 2.62 format.  
+ * The accumulator maintains full precision of the intermediate multiplication results,   
+ * but provides only a single guard bit.  
+ * There is no saturation on intermediate additions.  
+ * If the accumulator overflows it wraps around and distorts the result.  
+ * In order to avoid overflows completely the input signal must be scaled down by   
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.   
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.  
+ *  
+ */ 
+ 
+ 
+void arm_std_q31( 
+  q31_t * pSrc, 
+  uint32_t blockSize, 
+  q31_t * pResult) 
+{ 
+  q63_t sum = 0;                                 /* Accumulator */ 
+  q31_t meanOfSquares, squareOfMean;             /* square of mean and mean of square */ 
+  q31_t mean;                                    /* mean */ 
+  q31_t in;                                      /* input value */ 
+  q31_t t;                                       /* Temporary variable */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q31_t *pIn;									 /* Temporary pointer */ 
+ 
+  pIn = pSrc; 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += ((q63_t) (in) * (in)); 
+    in = *pSrc++; 
+    sum += ((q63_t) (in) * (in)); 
+    in = *pSrc++; 
+    sum += ((q63_t) (in) * (in)); 
+    in = *pSrc++; 
+    sum += ((q63_t) (in) * (in)); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += ((q63_t) (in) * (in)); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f); 
+ 
+  /* Compute Mean of squares of the input samples  
+   * and then store the result in a temporary variable, meanOfSquares. */ 
+  sum = (sum >> 31); 
+  meanOfSquares = (q31_t) ((sum * t) >> 30); 
+ 
+  /* Reset the accumulator */ 
+  sum = 0; 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Reset the input working pointer */ 
+  pSrc = pIn; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+  /* Compute mean of all input values */ 
+  t = (q31_t) ((1.0f / (blockSize * (blockSize - 1u))) * 2147483648.0f); 
+  mean = (q31_t) (sum); 
+ 
+  /* Compute square of mean */ 
+  squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31); 
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31); 
+ 
+  /* Compute standard deviation and then store the result to the destination */ 
+  arm_sqrt_q31(meanOfSquares - squareOfMean, pResult); 
+ 
+} 
+ 
+/**  
+ * @} end of STD group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_var_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,173 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_var_f32.c  
+*  
+* Description:	Variance of an array of F32 type.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @defgroup variance  Variance  
+ *  
+ * Calculates the variance of the elements in the input vector.  
+ * The underlying algorithm is used:  
+ *  
+ * <pre>  
+ * 	Result = (sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1) 
+ * 
+ *	   where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1] 
+ * 
+ *	                   sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1] 
+ * </pre> 
+ *  
+ * There are separate functions for floating point, Q31, and Q15 data types.  
+ */ 
+ 
+/**  
+ * @addtogroup variance  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Variance of the elements of a floating-point vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult variance value returned here  
+ * @return none.  
+ *  
+ */ 
+ 
+ 
+void arm_var_f32( 
+  float32_t * pSrc, 
+  uint32_t blockSize, 
+  float32_t * pResult) 
+{ 
+  float32_t sum = (float32_t) 0.0;               /* Accumulator */ 
+  float32_t meanOfSquares, mean, in, squareOfMean;      /* Temporary variables */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  float32_t *pIn;                                /* Temporary pointer */ 
+ 
+  /* updating temporary pointer */ 
+  pIn = pSrc; 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+    in = *pSrc++; 
+    sum += in * in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += in * in; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Compute Mean of squares of the input samples  
+   * and then store the result in a temporary variable, meanOfSquares. */ 
+  meanOfSquares = sum / ((float32_t) blockSize - 1.0f); 
+ 
+  /* Reset the accumulator */ 
+  sum = 0.0f; 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Reset the input working pointer */ 
+  pSrc = pIn; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+  /* Compute mean of all input values */ 
+  mean = sum / (float32_t) blockSize; 
+ 
+  /* Compute square of mean */ 
+  squareOfMean = (mean * mean) * (((float32_t) blockSize) / 
+                                  ((float32_t) blockSize - 1.0f)); 
+ 
+  /* Compute variance and then store the result to the destination */ 
+  *pResult = meanOfSquares - squareOfMean; 
+ 
+} 
+ 
+/**  
+ * @} end of variance group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_var_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,172 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_var_q15.c  
+*  
+* Description:	Variance of an array of Q15 type.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup variance  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Variance of the elements of a Q15 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult variance value returned here  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ * \par  
+ * The function is implemented using a 64-bit internal accumulator.  
+ * The input is represented in 1.15 format. 
+ * Intermediate multiplication yields a 2.30 format, and this  
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.  
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the  
+ * full precision of the intermediate multiplication is preserved.  
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower   
+ * 15 bits, and then saturated to yield a result in 1.15 format.  
+ *  
+ */ 
+ 
+ 
+void arm_var_q15( 
+  q15_t * pSrc, 
+  uint32_t blockSize, 
+  q31_t * pResult) 
+{ 
+  q63_t sum = 0;                                 /* Accumulator */ 
+  q31_t meanOfSquares, squareOfMean;             /* Mean of square and square of mean */ 
+  q15_t mean;                                    /* mean */ 
+  q31_t in;                                      /* Input variable */ 
+  q15_t in1;                                     /* Temporary variable */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q15_t t;                                       /* Temporary variable */ 
+  q15_t *pIn;									 /* Temporary pointer */ 
+ 
+  pIn = pSrc; 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in = *__SIMD32(pSrc)++; 
+    sum = __SMLALD(in, in, sum); 
+    in = *__SIMD32(pSrc)++; 
+    sum = __SMLALD(in, in, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in1 = *pSrc++; 
+    sum = __SMLALD(in1, in1, sum); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Compute Mean of squares of the input samples  
+   * and then store the result in a temporary variable, meanOfSquares. */ 
+  t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384); 
+  sum = __SSAT((sum >> 15u), 16u); 
+ 
+  meanOfSquares = (q31_t) ((sum * t) >> 14u); 
+ 
+  /* Reset the accumulator */ 
+  sum = 0; 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Reset the input working pointer */ 
+  pSrc = pIn; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Compute mean of all input values */ 
+  t = (q15_t) ((1.0f / (float32_t) (blockSize * (blockSize - 1u))) * 32768); 
+  mean = __SSAT(sum, 16u); 
+ 
+  /* Compute square of mean */ 
+  squareOfMean = ((q31_t) mean * mean) >> 15; 
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15); 
+ 
+  /* Compute variance and then store the result to the destination */ 
+  *pResult = (meanOfSquares - squareOfMean); 
+ 
+} 
+ 
+/**  
+ * @} end of variance group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/StatisticsFunctions/arm_var_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,176 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_var_q31.c  
+*  
+* Description:	Variance of an array of Q31 type.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupStats  
+ */ 
+ 
+/**  
+ * @addtogroup variance  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Variance of the elements of a Q31 vector.  
+ * @param[in]       *pSrc points to the input vector  
+ * @param[in]       blockSize length of the input vector  
+ * @param[out]      *pResult variance value returned here  
+ * @return none.  
+ *  
+ * @details  
+ * <b>Scaling and Overflow Behavior:</b>  
+ *  
+ *\par  
+ * The function is implemented using an internal 64-bit accumulator.  
+ * The input is represented in 1.31 format, and intermediate multiplication  
+ * yields a 2.62 format.  
+ * The accumulator maintains full precision of the intermediate multiplication results,   
+ * but provides only a single guard bit.  
+ * There is no saturation on intermediate additions.  
+ * If the accumulator overflows it wraps around and distorts the result.  
+ * In order to avoid overflows completely the input signal must be scaled down by   
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.   
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.  
+ *  
+ */ 
+ 
+ 
+void arm_var_q31( 
+  q31_t * pSrc, 
+  uint32_t blockSize, 
+  q63_t * pResult) 
+{ 
+  q63_t sum = 0;                                 /* Accumulator */ 
+  q31_t meanOfSquares, squareOfMean;             /* Mean of square and square of mean */ 
+  q31_t mean;                                    /* Mean */ 
+  q31_t in;                                      /* Input variable */ 
+  q31_t t;                                       /* Temporary variable */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q31_t *pIn;									 /* Temporary pointer */ 
+ 
+  pIn = pSrc; 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += ((q63_t) (in) * (in)); 
+    in = *pSrc++; 
+    sum += ((q63_t) (in) * (in)); 
+    in = *pSrc++; 
+    sum += ((q63_t) (in) * (in)); 
+    in = *pSrc++; 
+    sum += ((q63_t) (in) * (in)); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 
+    /* Compute Sum of squares of the input samples  
+     * and then store the result in a temporary variable, sum. */ 
+    in = *pSrc++; 
+    sum += ((q63_t) (in) * (in)); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Compute Mean of squares of the input samples  
+   * and then store the result in a temporary variable, meanOfSquares. */ 
+  t = (q31_t) ((1.0 / (blockSize - 1)) * 1073741824LL); 
+  sum = (sum >> 31); 
+  meanOfSquares = (q31_t) ((sum * t) >> 30); 
+ 
+  /* Reset the accumulator */ 
+  sum = 0; 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Reset the input working pointer */ 
+  pSrc = pIn; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ 
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */ 
+    sum += *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* Compute mean of all input values */ 
+  t = (q31_t) ((1.0 / (blockSize * (blockSize - 1u))) * 2147483648LL); 
+  mean = (q31_t) (sum); 
+ 
+  /* Compute square of mean */ 
+  squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31); 
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31); 
+ 
+  /* Compute variance and then store the result to the destination */ 
+  *pResult = (q63_t) meanOfSquares - squareOfMean; 
+ 
+} 
+ 
+/**  
+ * @} end of variance group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_copy_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,105 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_copy_f32.c  
+*  
+* Description:	Processing function for the floating-point copy  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @defgroup copy Vector Copy  
+ *  
+ * Copies sample by sample from source vector to destination vector.  
+ *  
+ * <pre>  
+ * 	pDst[n] = pSrc[n];   0 <= n < blockSize.  
+ * </pre>  
+ * 
+ * There are separate functions for floating point, Q31, Q15, and Q7 data types.   
+ */ 
+ 
+/**  
+ * @addtogroup copy  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Copies the elements of a floating-point vector.   
+ * @param[in]       *pSrc points to input vector  
+ * @param[out]      *pDst points to output vector  
+ * @param[in]       blockSize length of the input vector 
+ * @return none.  
+ *  
+ */ 
+ 
+ 
+void arm_copy_f32( 
+  float32_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A */ 
+    /* Copy and then store the results in the destination buffer */ 
+    *pDst++ = *pSrc++; 
+    *pDst++ = *pSrc++; 
+    *pDst++ = *pSrc++; 
+    *pDst++ = *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A */ 
+    /* Copy and then store the results in the destination buffer */ 
+    *pDst++ = *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicCopy group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_copy_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,97 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_copy_q15.c  
+*  
+* Description:	Processing function for the Q15 copy  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup copy  
+ * @{  
+ */ 
+/**  
+ * @brief Copies the elements of a Q15 vector.   
+ * @param[in]       *pSrc points to input vector  
+ * @param[out]      *pDst points to output vector  
+ * @param[in]       blockSize length of the input vector 
+ * @return none.  
+ *  
+ */ 
+ 
+void arm_copy_q15( 
+  q15_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q15_t in1, in2;                                /* Temporary variables */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A */ 
+    /* Read two inputs */ 
+    in1 = *pSrc++; 
+    in2 = *pSrc++; 
+    /* Store the values in the destination buffer by packing the two inputs */ 
+    *__SIMD32(pDst)++ = __PKHBT(in1, in2, 16); 
+ 
+    in1 = *pSrc++; 
+    in2 = *pSrc++; 
+    *__SIMD32(pDst)++ = __PKHBT(in1, in2, 16); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A */ 
+    /* Copy and then store the value in the destination buffer */ 
+    *pDst++ = *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicCopy group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_copy_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,93 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_copy_q31.c  
+*  
+* Description:	Processing function for the Q31 copy  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup copy  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Copies the elements of a Q31 vector.   
+ * @param[in]       *pSrc points to input vector  
+ * @param[out]      *pDst points to output vector  
+ * @param[in]       blockSize length of the input vector 
+ * @return none.  
+ *  
+ */ 
+ 
+void arm_copy_q31( 
+  q31_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A */ 
+    /* Copy and then store the values in the destination buffer */ 
+    *pDst++ = *pSrc++; 
+    *pDst++ = *pSrc++; 
+    *pDst++ = *pSrc++; 
+    *pDst++ = *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A */ 
+    /* Copy and then store the value in the destination buffer */ 
+    *pDst++ = *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicCopy group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_copy_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,90 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_copy_q7.c  
+*  
+* Description:	Processing function for the Q7 copy  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup copy  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Copies the elements of a Q7 vector.  
+ * @param[in]       *pSrc points to input vector  
+ * @param[out]      *pDst points to output vector  
+ * @param[in]       blockSize length of the input vector 
+ * @return none.  
+ *  
+ */ 
+ 
+void arm_copy_q7( 
+  q7_t * pSrc, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A */ 
+    /* Copy and then store the results in the destination buffer */ 
+    /* 4 samples are copied and stored at a time using SIMD */ 
+    *__SIMD32(pDst)++ = *__SIMD32(pSrc)++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = A */ 
+    /* Copy and then store the results in the destination buffer */ 
+    *pDst++ = *pSrc++; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of BasicCopy group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_fill_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,105 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_fill_f32.c  
+*  
+* Description:	Processing function for the floating point Fill  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @defgroup Fill Vector Fill  
+ *  
+ * Fills the destination vector with a constant value.  
+ *  
+ * <pre>  
+ * 	pDst[n] = value;   0 <= n < blockSize.  
+ * </pre>  
+ * 
+ * There are separate functions for floating point, Q31, Q15, and Q7 data types.   
+ */ 
+ 
+/**  
+ * @addtogroup Fill  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Fills a constant value into a floating-point vector.   
+ * @param[in]       value input value to be filled 
+ * @param[out]      *pDst points to output vector  
+ * @param[in]       blockSize length of the output vector 
+ * @return none.  
+ *  
+ */ 
+ 
+ 
+void arm_fill_f32( 
+  float32_t value, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = value */ 
+    /* Fill the value in the destination buffer */ 
+    *pDst++ = value; 
+    *pDst++ = value; 
+    *pDst++ = value; 
+    *pDst++ = value; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = value */ 
+    /* Fill the value in the destination buffer */ 
+    *pDst++ = value; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of Fill group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_fill_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,95 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_fill_q15.c  
+*  
+* Description:	Processing function for the Q15 fill  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup Fill  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Fills a constant value into a Q15 vector.  
+ * @param[in]       value input value to be filled 
+ * @param[out]      *pDst points to output vector  
+ * @param[in]       blockSize length of the output vector 
+ * @return none.  
+ *  
+ */ 
+ 
+void arm_fill_q15( 
+  q15_t value, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q31_t packedValue;                             /* value packed to 32 bits */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Packing two 16 bit values to 32 bit value in order to use SIMD */ 
+  packedValue = __PKHBT(value, value, 16u); 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = value */ 
+    /* Fill the value in the destination buffer */ 
+    *__SIMD32(pDst)++ = packedValue; 
+    *__SIMD32(pDst)++ = packedValue; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = value */ 
+    /* Fill the value in the destination buffer */ 
+    *pDst++ = value; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of Fill group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_fill_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,93 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_fill_q31.c  
+*  
+* Description:	Processing function for the Q31 fill  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup Fill  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Fills a constant value into a Q31 vector.  
+ * @param[in]       value input value to be filled 
+ * @param[out]      *pDst points to output vector  
+ * @param[in]       blockSize length of the output vector 
+ * @return none.  
+ *  
+ */ 
+ 
+void arm_fill_q31( 
+  q31_t value, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = value */ 
+    /* Fill the value in the destination buffer */ 
+    *pDst++ = value; 
+    *pDst++ = value; 
+    *pDst++ = value; 
+    *pDst++ = value; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = value */ 
+    /* Fill the value in the destination buffer */ 
+    *pDst++ = value; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of Fill group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_fill_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,93 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_fill_q7.c  
+*  
+* Description:	Processing function for the Q7 fill  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup Fill  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Fills a constant value into a Q7 vector.  
+ * @param[in]       value input value to be filled 
+ * @param[out]      *pDst points to output vector  
+ * @param[in]       blockSize length of the output vector 
+ * @return none.  
+ *  
+ */ 
+ 
+void arm_fill_q7( 
+  q7_t value, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  uint32_t blkCnt;                               /* loop counter */ 
+  q31_t packedValue;                             /* value packed to 32 bits */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* Packing four 8 bit values to 32 bit value in order to use SIMD */ 
+  packedValue = __PACKq7(value, value, value, value); 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = value */ 
+    /* Fill the value in the destination buffer */ 
+    *__SIMD32(pDst)++ = packedValue; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = value */ 
+    /* Fill the value in the destination buffer */ 
+    *pDst++ = value; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of Fill group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_float_to_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,152 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_float_to_q15.c  
+*  
+* Description:	Processing function for the Conversion from float to Q15  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup float_to_x  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Converts the elements of the floating-point vector to Q15 vector.  
+ * @param[in]       *pSrc points to the floating-point input vector  
+ * @param[out]      *pDst points to the Q15 output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *  
+ * \par Description:  
+ * \par 
+ * The equation used for the conversion process is:  
+ * <pre>  
+ * 	pDst[n] = (q15_t)(pSrc[n] * 32768);   0 <= n < blockSize.  
+ * </pre>  
+ * \par Scaling and Overflow Behavior:  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.  
+ * \note 
+ * In order to apply rounding, the library should be rebuilt with the ROUNDING macro   
+ * defined in the preprocessor section of project options.   
+ *  
+ */ 
+ 
+ 
+void arm_float_to_q15( 
+  float32_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  float32_t *pIn = pSrc;                         /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+#ifdef ARM_MATH_ROUNDING 
+ 
+  float32_t in; 
+ 
+#endif 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+ 
+#ifdef ARM_MATH_ROUNDING 
+    /* C = A * 32768 */ 
+    /* convert from float to q15 and then store the results in the destination buffer */ 
+    in = *pIn++; 
+    in = (in * 32768.0f); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16)); 
+ 
+    in = *pIn++; 
+    in = (in * 32768.0f); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16)); 
+ 
+    in = *pIn++; 
+    in = (in * 32768.0f); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16)); 
+ 
+    in = *pIn++; 
+    in = (in * 32768.0f); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16)); 
+ 
+#else 
+ 
+    /* C = A * 32768 */ 
+    /* convert from float to q15 and then store the results in the destination buffer */ 
+    *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16); 
+    *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16); 
+    *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16); 
+    *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16); 
+ 
+#endif 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+ 
+#ifdef ARM_MATH_ROUNDING 
+    /* C = A * 32768 */ 
+    /* convert from float to q15 and then store the results in the destination buffer */ 
+    in = *pIn++; 
+    in = (in * 32768LL); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16)); 
+ 
+#else 
+ 
+    /* C = A * 32768 */ 
+    /* convert from float to q15 and then store the results in the destination buffer */ 
+    *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16); 
+ 
+#endif 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of float_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_float_to_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,156 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_float_to_q31.c  
+*  
+* Description:	Processing function for the Conversion from float to Q31  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @defgroup float_to_x  Convert 32-bit floating point value  
+ */ 
+ 
+/**  
+ * @addtogroup float_to_x  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Converts the elements of the floating-point vector to Q31 vector.  
+ * @param[in]       *pSrc points to the floating-point input vector  
+ * @param[out]      *pDst points to the Q31 output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *  
+ *\par Description:  
+ * \par 
+ * The equation used for the conversion process is:  
+ * 
+ * <pre>  
+ * 	pDst[n] = (q31_t)(pSrc[n] * 2147483648);   0 <= n < blockSize.  
+ * </pre>  
+ * <b>Scaling and Overflow Behavior:</b>  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.  
+ * 
+ * \note In order to apply rounding, the library should be rebuilt with the ROUNDING macro   
+ * defined in the preprocessor section of project options.   
+ */ 
+ 
+ 
+void arm_float_to_q31( 
+  float32_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  float32_t *pIn = pSrc;                         /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+#ifdef ARM_MATH_ROUNDING 
+ 
+  float32_t in; 
+ 
+#endif 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+#ifdef ARM_MATH_ROUNDING 
+    /* C = A * 32768 */ 
+    /* convert from float to Q31 and then store the results in the destination buffer */ 
+    in = *pIn++; 
+    in = (in * 2147483648.0f); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = clip_q63_to_q31((q63_t) (in)); 
+ 
+    in = *pIn++; 
+    in = (in * 2147483648.0f); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = clip_q63_to_q31((q63_t) (in)); 
+ 
+    in = *pIn++; 
+    in = (in * 2147483648.0f); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = clip_q63_to_q31((q63_t) (in)); 
+ 
+    in = *pIn++; 
+    in = (in * 2147483648.0f); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = clip_q63_to_q31((q63_t) (in)); 
+ 
+#else 
+ 
+    /* C = A * 2147483648 */ 
+    /* convert from float to Q31 and then store the results in the destination buffer */ 
+    *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f)); 
+    *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f)); 
+    *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f)); 
+    *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f)); 
+ 
+#endif 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+#ifdef ARM_MATH_ROUNDING 
+ 
+    /* C = A * 2147483648 */ 
+    /* convert from float to Q31 and then store the results in the destination buffer */ 
+    in = *pIn++; 
+    in = (in * 2147483648.0f); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = clip_q63_to_q31((q63_t) (in)); 
+ 
+#else 
+ 
+    /* C = A * 2147483648 */ 
+    /* convert from float to Q31 and then store the results in the destination buffer */ 
+    *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f)); 
+ 
+#endif 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of float_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_float_to_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,149 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_float_to_q7.c  
+*  
+* Description:	Processing function for the Conversion from float to Q7  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup float_to_x  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Converts the elements of the floating-point vector to Q7 vector.  
+ * @param[in]       *pSrc points to the floating-point input vector  
+ * @param[out]      *pDst points to the Q7 output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *  
+ *\par Description:  
+ * \par 
+ * The equation used for the conversion process is:  
+ * <pre>  
+ * 	pDst[n] = (q7_t)(pSrc[n] * 128);   0 <= n < blockSize.  
+ * </pre>  
+ * \par Scaling and Overflow Behavior:  
+ * \par  
+ * The function uses saturating arithmetic.  
+ * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.  
+ * \note 
+ * In order to apply rounding, the library should be rebuilt with the ROUNDING macro   
+ * defined in the preprocessor section of project options.   
+ */ 
+ 
+ 
+void arm_float_to_q7( 
+  float32_t * pSrc, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  float32_t *pIn = pSrc;                         /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+#ifdef ARM_MATH_ROUNDING 
+ 
+  float32_t in; 
+ 
+#endif 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+#ifdef ARM_MATH_ROUNDING 
+    /* C = A * 128 */ 
+    /* convert from float to q7 and then store the results in the destination buffer */ 
+    in = *pIn++; 
+    in = (in * 128); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8)); 
+ 
+    in = *pIn++; 
+    in = (in * 128); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8)); 
+ 
+    in = *pIn++; 
+    in = (in * 128); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8)); 
+ 
+    in = *pIn++; 
+    in = (in * 128); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8)); 
+ 
+#else 
+ 
+    /* C = A * 128 */ 
+    /* convert from float to q7 and then store the results in the destination buffer */ 
+    *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8); 
+    *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8); 
+    *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8); 
+    *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8); 
+ 
+#endif 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+#ifdef ARM_MATH_ROUNDING 
+    /* C = A * 128 */ 
+    /* convert from float to q7 and then store the results in the destination buffer */ 
+    in = *pIn++; 
+    in = (in * 128); 
+    in += in > 0 ? 0.5 : -0.5; 
+    *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8)); 
+ 
+#else 
+ 
+    /* C = A * 128 */ 
+    /* convert from float to q7 and then store the results in the destination buffer */ 
+    *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8); 
+ 
+#endif 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of float_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_q15_to_float.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,107 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_q15_to_float.c  
+*  
+* Description:	Processing function for the Conversion from Q15 to float  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @defgroup q15_to_x  Convert 16-bit Integer value  
+ */ 
+ 
+/**  
+ * @addtogroup q15_to_x  
+ * @{  
+ */ 
+ 
+ 
+ 
+ 
+/**  
+ * @brief  Converts the elements of the Q15 vector to floating-point vector.   
+ * @param[in]       *pSrc points to the Q15 input vector  
+ * @param[out]      *pDst points to the floating-point output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *  
+ * \par Description:  
+ *  
+ * The equation used for the conversion process is:  
+ * 
+ * <pre>  
+ * 	pDst[n] = (float32_t) pSrc[n] / 32768;   0 <= n < blockSize.  
+ * </pre>  
+ * 
+ */ 
+ 
+ 
+void arm_q15_to_float( 
+  q15_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pIn = pSrc;                             /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (float32_t) A / 32768 */ 
+    /* convert from q15 to float and then store the results in the destination buffer */ 
+    *pDst++ = ((float32_t) * pIn++ / 32768.0f); 
+    *pDst++ = ((float32_t) * pIn++ / 32768.0f); 
+    *pDst++ = ((float32_t) * pIn++ / 32768.0f); 
+    *pDst++ = ((float32_t) * pIn++ / 32768.0f); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (float32_t) A / 32768 */ 
+    /* convert from q15 to float and then store the results in the destination buffer */ 
+    *pDst++ = ((float32_t) * pIn++ / 32768.0f); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of q15_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_q15_to_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,100 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_q15_to_q31.c  
+*  
+* Description:	Processing function for the Conversion from Q15 to Q31  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup q15_to_x  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Converts the elements of the Q15 vector to Q31 vector.   
+ * @param[in]       *pSrc points to the Q15 input vector  
+ * @param[out]      *pDst points to the Q31 output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *  
+ * \par Description:  
+ *  
+ * The equation used for the conversion process is: 
+ * 
+ * <pre>  
+ * 	pDst[n] = (q31_t) pSrc[n] << 16;   0 <= n < blockSize.  
+ * </pre>  
+ * 
+ */ 
+ 
+ 
+void arm_q15_to_q31( 
+  q15_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pIn = pSrc;                             /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q31_t)A << 16 */ 
+    /* convert from q15 to q31 and then store the results in the destination buffer */ 
+    *pDst++ = (q31_t) * pIn++ << 16; 
+    *pDst++ = (q31_t) * pIn++ << 16; 
+    *pDst++ = (q31_t) * pIn++ << 16; 
+    *pDst++ = (q31_t) * pIn++ << 16; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q31_t)A << 16 */ 
+    /* convert from q15 to q31 and then store the results in the destination buffer */ 
+    *pDst++ = (q31_t) * pIn++ << 16; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of q15_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_q15_to_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,101 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_q15_to_q7.c  
+*  
+* Description:	Processing function for the Conversion from Q15 to Q7  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup q15_to_x  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @brief Converts the elements of the Q15 vector to Q7 vector.   
+ * @param[in]       *pSrc points to the Q15 input vector  
+ * @param[out]      *pDst points to the Q7 output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *  
+ * \par Description:  
+ *  
+ * The equation used for the conversion process is:  
+ * 
+ * <pre>  
+ * 	pDst[n] = (q7_t) pSrc[n] >> 8;   0 <= n < blockSize.  
+ * </pre> 
+ * 
+ */ 
+ 
+ 
+void arm_q15_to_q7( 
+  q15_t * pSrc, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q15_t *pIn = pSrc;                             /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q7_t) A >> 8 */ 
+    /* convert from q15 to q7 and then store the results in the destination buffer */ 
+    *pDst++ = (q7_t) (*pIn++ >> 8); 
+    *pDst++ = (q7_t) (*pIn++ >> 8); 
+    *pDst++ = (q7_t) (*pIn++ >> 8); 
+    *pDst++ = (q7_t) (*pIn++ >> 8); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q7_t) A >> 8 */ 
+    /* convert from q15 to q7 and then store the results in the destination buffer */ 
+    *pDst++ = (q7_t) (*pIn++ >> 8); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of q15_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_q31_to_float.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,104 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_q31_to_float.c  
+*  
+* Description:	Processing function for the Conversion from Q31 to float  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @defgroup q31_to_x  Convert 32-bit Integer value  
+ */ 
+ 
+/**  
+ * @addtogroup q31_to_x  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Converts the elements of the Q31 vector to floating-point vector.  
+ * @param[in]       *pSrc points to the Q31 input vector  
+ * @param[out]      *pDst points to the floating-point output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *  
+ * \par Description:  
+ *  
+ * The equation used for the conversion process is:  
+ * 
+ * <pre>  
+ * 	pDst[n] = (float32_t) pSrc[n] / 2147483648;   0 <= n < blockSize.  
+ * </pre>  
+ * 
+ */ 
+ 
+ 
+void arm_q31_to_float( 
+  q31_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pIn = pSrc;                             /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (float32_t) A / 2147483648 */ 
+    /* convert from q31 to float and then store the results in the destination buffer */ 
+    *pDst++ = ((float32_t) * pIn++ / 2147483648.0f); 
+    *pDst++ = ((float32_t) * pIn++ / 2147483648.0f); 
+    *pDst++ = ((float32_t) * pIn++ / 2147483648.0f); 
+    *pDst++ = ((float32_t) * pIn++ / 2147483648.0f); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (float32_t) A / 2147483648 */ 
+    /* convert from q31 to float and then store the results in the destination buffer */ 
+    *pDst++ = ((float32_t) * pIn++ / 2147483648.0f); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of q31_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_q31_to_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,100 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_q31_to_q15.c  
+*  
+* Description:	Processing function for the Conversion from Q31 to Q15  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup q31_to_x  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Converts the elements of the Q31 vector to Q15 vector.  
+ * @param[in]       *pSrc points to the Q31 input vector  
+ * @param[out]      *pDst points to the Q15 output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *   
+ * \par Description:  
+ *  
+ * The equation used for the conversion process is:  
+ * 
+ * <pre>  
+ * 	pDst[n] = (q15_t) pSrc[n] >> 16;   0 <= n < blockSize.  
+ * </pre>  
+ * 
+ */ 
+ 
+ 
+void arm_q31_to_q15( 
+  q31_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pIn = pSrc;                             /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q15_t) A >> 16 */ 
+    /* convert from q31 to q15 and then store the results in the destination buffer */ 
+    *pDst++ = (q15_t) (*pIn++ >> 16); 
+    *pDst++ = (q15_t) (*pIn++ >> 16); 
+    *pDst++ = (q15_t) (*pIn++ >> 16); 
+    *pDst++ = (q15_t) (*pIn++ >> 16); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q15_t) A >> 16 */ 
+    /* convert from q31 to q15 and then store the results in the destination buffer */ 
+    *pDst++ = (q15_t) (*pIn++ >> 16); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of q31_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_q31_to_q7.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,100 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_q31_to_q7.c  
+*  
+* Description:	Processing function for the Conversion from Q31 to Q7  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup q31_to_x  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Converts the elements of the Q31 vector to Q7 vector.  
+ * @param[in]       *pSrc points to the Q31 input vector  
+ * @param[out]      *pDst points to the Q7 output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *  
+ * \par Description:  
+ *  
+ * The equation used for the conversion process is:  
+ * 
+ * <pre>  
+ * 	pDst[n] = (q7_t) pSrc[n] >> 24;   0 <= n < blockSize.   
+ * </pre>  
+ * 
+ */ 
+ 
+ 
+void arm_q31_to_q7( 
+  q31_t * pSrc, 
+  q7_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q31_t *pIn = pSrc;                             /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q7_t) A >> 24 */ 
+    /* convert from q31 to q7 and then store the results in the destination buffer */ 
+    *pDst++ = (q7_t) (*pIn++ >> 24); 
+    *pDst++ = (q7_t) (*pIn++ >> 24); 
+    *pDst++ = (q7_t) (*pIn++ >> 24); 
+    *pDst++ = (q7_t) (*pIn++ >> 24); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q7_t) A >> 24 */ 
+    /* convert from q31 to q7 and then store the results in the destination buffer */ 
+    *pDst++ = (q7_t) (*pIn++ >> 24); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of q31_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_q7_to_float.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,104 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_q7_to_float.c  
+*  
+* Description:	Processing function for the Conversion from Q7 to float  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @defgroup q7_to_x  Convert 8-bit Integer value  
+ */ 
+ 
+/**  
+ * @addtogroup q7_to_x  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Converts the elements of the Q7 vector to floating-point vector.  
+ * @param[in]       *pSrc points to the Q7 input vector  
+ * @param[out]      *pDst points to the floating-point output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *		   
+ * \par Description:  
+ *  
+ * The equation used for the conversion process is:  
+ * 
+ * <pre>  
+ * 	pDst[n] = (float32_t) pSrc[n] / 128;   0 <= n < blockSize.  
+ * </pre>  
+ * 
+ */ 
+ 
+ 
+void arm_q7_to_float( 
+  q7_t * pSrc, 
+  float32_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q7_t *pIn = pSrc;                              /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (float32_t) A / 128 */ 
+    /* convert from q7 to float and then store the results in the destination buffer */ 
+    *pDst++ = ((float32_t) * pIn++ / 128.0f); 
+    *pDst++ = ((float32_t) * pIn++ / 128.0f); 
+    *pDst++ = ((float32_t) * pIn++ / 128.0f); 
+    *pDst++ = ((float32_t) * pIn++ / 128.0f); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (float32_t) A / 128 */ 
+    /* convert from q7 to float and then store the results in the destination buffer */ 
+    *pDst++ = ((float32_t) * pIn++ / 128.0f); 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+} 
+ 
+/**  
+ * @} end of q7_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_q7_to_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,103 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_q7_to_q15.c  
+*  
+* Description:	Processing function for the Conversion from Q7 to Q15  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup q7_to_x  
+ * @{  
+ */ 
+ 
+ 
+ 
+ 
+/**  
+ * @brief Converts the elements of the Q7 vector to Q15 vector.  
+ * @param[in]       *pSrc points to the Q7 input vector  
+ * @param[out]      *pDst points to the Q15 output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *  
+ * \par Description:  
+ *  
+ * The equation used for the conversion process is:  
+ * 
+ * <pre>  
+ * 	pDst[n] = (q15_t) pSrc[n] << 8;   0 <= n < blockSize.  
+ * </pre>  
+ * 
+ */ 
+ 
+ 
+void arm_q7_to_q15( 
+  q7_t * pSrc, 
+  q15_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q7_t *pIn = pSrc;                              /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q15_t) A << 8 */ 
+    /* convert from q7 to q15 and then store the results in the destination buffer */ 
+    *pDst++ = (q15_t) * pIn++ << 8; 
+    *pDst++ = (q15_t) * pIn++ << 8; 
+    *pDst++ = (q15_t) * pIn++ << 8; 
+    *pDst++ = (q15_t) * pIn++ << 8; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q15_t) A << 8 */ 
+    /* convert from q7 to q15 and then store the results in the destination buffer */ 
+    *pDst++ = (q15_t) * pIn++ << 8; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of q7_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/SupportFunctions/arm_q7_to_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,100 @@
+/* ----------------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:		arm_q7_to_q31.c  
+*  
+* Description:	Processing function for the Conversion from Q7 to Q31  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* ---------------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupSupport  
+ */ 
+ 
+/**  
+ * @addtogroup q7_to_x  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Converts the elements of the Q7 vector to Q31 vector.  
+ * @param[in]       *pSrc points to the Q7 input vector  
+ * @param[out]      *pDst points to the Q31 output vector 
+ * @param[in]       blockSize length of the input vector  
+ * @return none.  
+ *  
+ * \par Description:  
+ *  
+ * The equation used for the conversion process is:  
+ * 
+ * <pre>  
+ * 	pDst[n] = (q31_t) pSrc[n] << 24;   0 <= n < blockSize. 
+ * </pre>   
+ * 
+ */ 
+ 
+ 
+void arm_q7_to_q31( 
+  q7_t * pSrc, 
+  q31_t * pDst, 
+  uint32_t blockSize) 
+{ 
+  q7_t *pIn = pSrc;                              /* Src pointer */ 
+  uint32_t blkCnt;                               /* loop counter */ 
+ 
+  /*loop Unrolling */ 
+  blkCnt = blockSize >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q31_t) A << 24 */ 
+    /* convert from q7 to q31 and then store the results in the destination buffer */ 
+    *pDst++ = (q31_t) * pIn++ << 24; 
+    *pDst++ = (q31_t) * pIn++ << 24; 
+    *pDst++ = (q31_t) * pIn++ << 24; 
+    *pDst++ = (q31_t) * pIn++ << 24; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  blkCnt = blockSize % 0x4u; 
+ 
+  while(blkCnt > 0u) 
+  { 
+    /* C = (q31_t) A << 24 */ 
+    /* convert from q7 to q31 and then store the results in the destination buffer */ 
+    *pDst++ = (q31_t) * pIn++ << 24; 
+ 
+    /* Decrement the loop counter */ 
+    blkCnt--; 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of q7_to_x group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,918 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cfft_radix4_f32.c  
+*  
+* Description:	Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function  
+*  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @defgroup CFFT_CIFFT Complex FFT Functions  
+ *  
+ * \par  
+ * Complex Fast Fourier Transform(CFFT) and Complex Inverse Fast Fourier Transform(CIFFT) is an efficient algorithm to compute Discrete Fourier Transform(DFT) and Inverse Discrete Fourier Transform(IDFT).  
+ * Computational complexity of CFFT reduces drastically when compared to DFT.  
+ * \par  
+ * This set of functions implements CFFT/CIFFT  
+ * for Q15, Q31, and floating-point data types.  The functions operates on in-place buffer which uses same buffer for input and output.  
+ * Complex input is stored in input buffer in an interleaved fashion.  
+ *  
+ * \par  
+ * The functions operate on blocks of input and output data and each call to the function processes  
+ * <code>2*fftLen</code> samples through the transform.  <code>pSrc</code>  points to In-place arrays containing <code>2*fftLen</code> values.  
+ * \par 
+ * The <code>pSrc</code> points to the array of in-place buffer of size <code>2*fftLen</code> and inputs and outputs are stored in an interleaved fashion as shown below.  
+ * <pre> {real[0], imag[0], real[1], imag[1],..} </pre>  
+ *  
+ * \par Lengths supported by the transform: 
+ * \par  
+ * Internally, the function utilize a radix-4 decimation in frequency(DIF) algorithm  
+ * and the size of the FFT supported are of the lengths [16, 64, 256, 1024]. 
+ *   
+ *  
+ * \par Algorithm:  
+ *  
+ * <b>Complex Fast Fourier Transform:</b>  
+ * \par   
+ * Input real and imaginary data:  
+ * <pre>  
+ * x(n) = xa + j * ya  
+ * x(n+N/4 ) = xb + j * yb  
+ * x(n+N/2 ) = xc + j * yc  
+ * x(n+3N 4) = xd + j * yd  
+ * </pre>  
+ * where N is length of FFT  
+ * \par  
+ * Output real and imaginary data:  
+ * <pre>  
+ * X(4r) = xa'+ j * ya'  
+ * X(4r+1) = xb'+ j * yb'  
+ * X(4r+2) = xc'+ j * yc'  
+ * X(4r+3) = xd'+ j * yd'  
+ * </pre>  
+ * \par  
+ * Twiddle factors for radix-4 FFT:  
+ * <pre>  
+ * Wn = co1 + j * (- si1)  
+ * W2n = co2 + j * (- si2)  
+ * W3n = co3 + j * (- si3)  
+ * </pre>  
+ *  
+ * \par  
+ * \image html CFFT.gif "Radix-4 Decimation-in Frequency Complex Fast Fourier Transform"  
+ *  
+ * \par  
+ * Output from Radix-4 CFFT Results in Digit reversal order. Interchange middle two branches of every butterfly results in Bit reversed output.  
+ * \par  
+ * <b> Butterfly CFFT equations:</b>  
+ * <pre>  
+ * xa' = xa + xb + xc + xd  
+ * ya' = ya + yb + yc + yd  
+ * xc' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)  
+ * yc' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)  
+ * xb' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)  
+ * yb' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)  
+ * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)  
+ * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)  
+ * </pre>  
+ *  
+ *  
+ * <b>Complex Inverse Fast Fourier Transform:</b>  
+ * \par  
+ * CIFFT uses same twiddle factor table as CFFT with modifications in the design equation as shown below.  
+ *  
+ * \par  
+ * <b> Modified Butterfly CIFFT equations:</b>  
+ * <pre>  
+ * xa' = xa + xb + xc + xd  
+ * ya' = ya + yb + yc + yd  
+ * xc' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)  
+ * yc' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)  
+ * xb' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)  
+ * yb' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)  
+ * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)  
+ * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)  
+ * </pre>  
+ *  
+ * \par Instance Structure  
+ * A separate instance structure must be defined for each Instance but the twiddle factors and bit reversal tables can be reused.  
+ * There are separate instance structure declarations for each of the 3 supported data types.  
+ *  
+ * \par Initialization Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Initializes twiddle factor table and bit reversal table pointers  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * Manually initialize the instance structure as follows:  
+ * <pre>  
+ *arm_cfft_radix4_instance_f32 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor, onebyfftLen};  
+ *arm_cfft_radix4_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};  
+ *arm_cfft_radix4_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};  
+ * </pre>  
+ * \par  
+ * where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for selection of CFFT or CIFFT(Set ifftFlag to calculate CIFFT otherwise calculates CFFT);  
+ * <code>bitReverseFlag</code> Flag for selection of output order(Set bitReverseFlag to output in normal order otherwise output in bit reversed order);   
+ * <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the array of bit reversal table.  
+ * <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;   
+ * <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table.  
+ * <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT;  
+ * 
+ * \par Fixed-Point Behavior  
+ * Care must be taken when using the fixed-point versions of the CFFT/CIFFT function.  
+ * Refer to the function specific documentation below for usage guidelines.  
+ */ 
+ 
+ 
+/**  
+ * @addtogroup CFFT_CIFFT  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+ * @brief Processing function for the floating-point CFFT/CIFFT. 
+ * @param[in]      *S    points to an instance of the floating-point CFFT/CIFFT structure. 
+ * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place. 
+ * @return none. 
+ */ 
+ 
+void arm_cfft_radix4_f32( 
+  const arm_cfft_radix4_instance_f32 * S, 
+  float32_t * pSrc) 
+{ 
+ 
+  if(S->ifftFlag == 1u) 
+  { 
+    /*  Complex IFFT radix-4  */ 
+    arm_radix4_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle, 
+                                     S->twidCoefModifier, S->onebyfftLen); 
+  } 
+  else 
+  { 
+    /*  Complex FFT radix-4  */ 
+    arm_radix4_butterfly_f32(pSrc, S->fftLen, S->pTwiddle, 
+                             S->twidCoefModifier); 
+  } 
+ 
+  if(S->bitReverseFlag == 1u) 
+  { 
+    /*  Bit Reversal */ 
+    arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 
+  } 
+ 
+} 
+ 
+ 
+/**  
+ * @} end of CFFT_CIFFT group  
+ */ 
+ 
+ 
+ 
+/* ----------------------------------------------------------------------  
+** Internal helper function used by the FFTs  
+** ------------------------------------------------------------------- */ 
+ 
+/*  
+ * @brief  Core function for the floating-point CFFT butterfly process. 
+ * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type. 
+ * @param[in]      fftLen           length of the FFT. 
+ * @param[in]      *pCoef           points to the twiddle coefficient buffer. 
+ * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
+ * @return none. 
+ */ 
+ 
+void arm_radix4_butterfly_f32( 
+  float32_t * pSrc, 
+  uint16_t fftLen, 
+  float32_t * pCoef, 
+  uint16_t twidCoefModifier) 
+{ 
+ 
+  float32_t co1, co2, co3, si1, si2, si3; 
+  float32_t t1, t2, r1, r2, s1, s2; 
+  uint32_t ia1, ia2, ia3; 
+  uint32_t i0, i1, i2, i3; 
+  uint32_t n1, n2, j, k; 
+ 
+  /*  Initializations for the first stage */ 
+  n2 = fftLen; 
+  n1 = n2; 
+ 
+  /* n2 = fftLen/4 */ 
+  n2 >>= 2u; 
+  i0 = 0u; 
+  ia1 = 0u; 
+ 
+  j = n2; 
+ 
+  /*  Calculation of first stage */ 
+  do 
+  { 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /*  Butterfly implementation */ 
+ 
+    /* xa + xc */ 
+    r1 = pSrc[(2u * i0)] + pSrc[(2u * i2)]; 
+ 
+    /* xa - xc */ 
+    r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
+ 
+    /* ya + yc */ 
+    s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
+ 
+    /* ya - yc */ 
+    s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
+ 
+    /* xb + xd */ 
+    t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
+ 
+    /* xa' = xa + xb + xc + xd */ 
+    pSrc[2u * i0] = r1 + t1; 
+ 
+    /* (xa + xc) - (xb + xd) */ 
+    r1 = r1 - t1; 
+ 
+    /* yb + yd */ 
+    t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
+ 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[(2u * i0) + 1u] = s1 + t2; 
+ 
+    /* (ya + yc) - (yb + yd) */ 
+    s1 = s1 - t2; 
+ 
+    /* yb - yd */ 
+    t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
+ 
+    /* xb - xd */ 
+    t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
+ 
+    /*  index calculation for the coefficients */ 
+    ia2 = ia1 + ia1; 
+    co2 = pCoef[ia2 * 2u]; 
+    si2 = pCoef[(ia2 * 2u) + 1u]; 
+ 
+    /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 
+    pSrc[2u * i1] = (r1 * co2) + (s1 * si2); 
+ 
+    /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 
+    pSrc[(2u * i1) + 1u] = (s1 * co2) - (r1 * si2); 
+ 
+    /* (xa - xc) + (yb - yd) */ 
+    r1 = r2 + t1; 
+ 
+    /* (xa - xc) - (yb - yd) */ 
+    r2 = r2 - t1; 
+ 
+    /* (ya - yc) - (xb - xd) */ 
+    s1 = s2 - t2; 
+ 
+    /* (ya - yc) + (xb - xd) */ 
+    s2 = s2 + t2; 
+ 
+    co1 = pCoef[ia1 * 2u]; 
+    si1 = pCoef[(ia1 * 2u) + 1u]; 
+ 
+    /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 
+    pSrc[2u * i2] = (r1 * co1) + (s1 * si1); 
+ 
+    /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 
+    pSrc[(2u * i2) + 1u] = (s1 * co1) - (r1 * si1); 
+ 
+    /*  index calculation for the coefficients */ 
+    ia3 = ia2 + ia1; 
+    co3 = pCoef[ia3 * 2u]; 
+    si3 = pCoef[(ia3 * 2u) + 1u]; 
+ 
+ 
+    /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 
+    pSrc[2u * i3] = (r2 * co3) + (s2 * si3); 
+ 
+    /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 
+    pSrc[(2u * i3) + 1u] = (s2 * co3) - (r2 * si3); 
+ 
+    /*  Twiddle coefficients index modifier */ 
+    ia1 = ia1 + twidCoefModifier; 
+ 
+    /*  Updating input index */ 
+    i0 = i0 + 1u; 
+ 
+  } 
+  while(--j); 
+ 
+  twidCoefModifier <<= 2u; 
+ 
+  /*  Calculation of second stage to excluding last stage */ 
+  for (k = fftLen / 4; k > 4u; k >>= 2u) 
+  { 
+    /*  Initializations for the first stage */ 
+    n1 = n2; 
+    n2 >>= 2u; 
+    ia1 = 0u; 
+ 
+    /*  Calculation of first stage */ 
+    for (j = 0u; j <= (n2 - 1u); j++) 
+    { 
+      /*  index calculation for the coefficients */ 
+      ia2 = ia1 + ia1; 
+      ia3 = ia2 + ia1; 
+      co1 = pCoef[ia1 * 2u]; 
+      si1 = pCoef[(ia1 * 2u) + 1u]; 
+      co2 = pCoef[ia2 * 2u]; 
+      si2 = pCoef[(ia2 * 2u) + 1u]; 
+      co3 = pCoef[ia3 * 2u]; 
+      si3 = pCoef[(ia3 * 2u) + 1u]; 
+ 
+      /*  Twiddle coefficients index modifier */ 
+      ia1 = ia1 + twidCoefModifier; 
+ 
+      for (i0 = j; i0 < fftLen; i0 += n1) 
+      { 
+        /*  index calculation for the input as, */ 
+        /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+        i1 = i0 + n2; 
+        i2 = i1 + n2; 
+        i3 = i2 + n2; 
+ 
+        /* xa + xc */ 
+        r1 = pSrc[(2u * i0)] + pSrc[(2u * i2)]; 
+ 
+        /* xa - xc */ 
+        r2 = pSrc[(2u * i0)] - pSrc[(2u * i2)]; 
+ 
+        /* ya + yc */ 
+        s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
+ 
+        /* ya - yc */ 
+        s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
+ 
+        /* xb + xd */ 
+        t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
+ 
+        /* xa' = xa + xb + xc + xd */ 
+        pSrc[2u * i0] = r1 + t1; 
+ 
+        /* xa + xc -(xb + xd) */ 
+        r1 = r1 - t1; 
+ 
+        /* yb + yd */ 
+        t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
+ 
+        /* ya' = ya + yb + yc + yd */ 
+        pSrc[(2u * i0) + 1u] = s1 + t2; 
+ 
+        /* (ya + yc) - (yb + yd) */ 
+        s1 = s1 - t2; 
+ 
+        /* (yb - yd) */ 
+        t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
+ 
+        /* (xb - xd) */ 
+        t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
+ 
+        /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 
+        pSrc[2u * i1] = (r1 * co2) + (s1 * si2); 
+ 
+        /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 
+        pSrc[(2u * i1) + 1u] = (s1 * co2) - (r1 * si2); 
+ 
+        /* (xa - xc) + (yb - yd) */ 
+        r1 = r2 + t1; 
+ 
+        /* (xa - xc) - (yb - yd) */ 
+        r2 = r2 - t1; 
+ 
+        /* (ya - yc) -  (xb - xd) */ 
+        s1 = s2 - t2; 
+ 
+        /* (ya - yc) +  (xb - xd) */ 
+        s2 = s2 + t2; 
+ 
+        /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 
+        pSrc[2u * i2] = (r1 * co1) + (s1 * si1); 
+ 
+        /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 
+        pSrc[(2u * i2) + 1u] = (s1 * co1) - (r1 * si1); 
+ 
+        /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 
+        pSrc[2u * i3] = (r2 * co3) + (s2 * si3); 
+ 
+        /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 
+        pSrc[(2u * i3) + 1u] = (s2 * co3) - (r2 * si3); 
+      } 
+    } 
+    twidCoefModifier <<= 2u; 
+  } 
+ 
+  /*  Initializations of last stage */ 
+  n1 = n2; 
+  n2 >>= 2u; 
+ 
+  /*  Calculations of last stage */ 
+  for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 
+  { 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /*  Butterfly implementation */ 
+ 
+    /* xa + xb */ 
+    r1 = pSrc[2u * i0] + pSrc[2u * i2]; 
+ 
+    /* xa - xb */ 
+    r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
+ 
+    /* ya + yc */ 
+    s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
+ 
+    /* ya - yc */ 
+    s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
+ 
+    /* xc + xd */ 
+    t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
+ 
+    /* xa' = xa + xb + xc + xd */ 
+    pSrc[2u * i0] = r1 + t1; 
+ 
+    /* (xa + xb) - (xc + xd) */ 
+    r1 = r1 - t1; 
+ 
+    /* yb + yd */ 
+    t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
+ 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[(2u * i0) + 1u] = s1 + t2; 
+ 
+    /* (ya + yc) - (yb + yd) */ 
+    s1 = s1 - t2; 
+ 
+    /* (yb-yd) */ 
+    t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
+ 
+    /* (xb-xd) */ 
+    t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
+ 
+    /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 
+    pSrc[2u * i1] = r1; 
+ 
+    /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 
+    pSrc[(2u * i1) + 1u] = s1; 
+ 
+    /* (xa+yb-xc-yd) */ 
+    r1 = r2 + t1; 
+ 
+    /* (xa-yb-xc+yd) */ 
+    r2 = r2 - t1; 
+ 
+    /* (ya-xb-yc+xd) */ 
+    s1 = s2 - t2; 
+ 
+    /* (ya+xb-yc-xd) */ 
+    s2 = s2 + t2; 
+ 
+    /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 
+    pSrc[2u * i2] = r1; 
+ 
+    /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 
+    pSrc[(2u * i2) + 1u] = s1; 
+ 
+    /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 
+    pSrc[2u * i3] = r2; 
+ 
+    /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 
+    pSrc[(2u * i3) + 1u] = s2; 
+  } 
+} 
+ 
+/*  
+ * @brief  Core function for the floating-point CIFFT butterfly process. 
+ * @param[in, out] *pSrc            points to the in-place buffer of floating-point data type. 
+ * @param[in]      fftLen           length of the FFT. 
+ * @param[in]      *pCoef           points to twiddle coefficient buffer. 
+ * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
+ * @param[in]      onebyfftLen      value of 1/fftLen. 
+ * @return none. 
+ */ 
+ 
+void arm_radix4_butterfly_inverse_f32( 
+  float32_t * pSrc, 
+  uint16_t fftLen, 
+  float32_t * pCoef, 
+  uint16_t twidCoefModifier, 
+  float32_t onebyfftLen) 
+{ 
+  float32_t co1, co2, co3, si1, si2, si3; 
+  float32_t t1, t2, r1, r2, s1, s2; 
+  uint32_t ia1, ia2, ia3; 
+  uint32_t i0, i1, i2, i3; 
+  uint32_t n1, n2, j, k; 
+ 
+  /*  Initializations for the first stage */ 
+  n2 = fftLen; 
+  n1 = n2; 
+ 
+  /* n2 = fftLen/4 */ 
+  n2 >>= 2u; 
+  i0 = 0u; 
+  ia1 = 0u; 
+ 
+  j = n2; 
+ 
+  /*  Calculation of first stage */ 
+  do 
+  { 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /*  Butterfly implementation */ 
+    /* xa + xc */ 
+    r1 = pSrc[(2u * i0)] + pSrc[(2u * i2)]; 
+ 
+    /* xa - xc */ 
+    r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
+ 
+    /* ya + yc */ 
+    s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
+ 
+    /* ya - yc */ 
+    s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
+ 
+    /* xb + xd */ 
+    t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
+ 
+    /* xa' = xa + xb + xc + xd */ 
+    pSrc[2u * i0] = r1 + t1; 
+ 
+    /* (xa + xc) - (xb + xd) */ 
+    r1 = r1 - t1; 
+ 
+    /* yb + yd */ 
+    t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
+ 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[(2u * i0) + 1u] = s1 + t2; 
+ 
+    /* (ya + yc) - (yb + yd) */ 
+    s1 = s1 - t2; 
+ 
+    /* yb - yd */ 
+    t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
+ 
+    /* xb - xd */ 
+    t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
+ 
+    /*  index calculation for the coefficients */ 
+    ia2 = ia1 + ia1; 
+    co2 = pCoef[ia2 * 2u]; 
+    si2 = pCoef[(ia2 * 2u) + 1u]; 
+ 
+    /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 
+    pSrc[2u * i1] = (r1 * co2) - (s1 * si2); 
+ 
+    /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 
+    pSrc[(2u * i1) + 1u] = (s1 * co2) + (r1 * si2); 
+ 
+    /* (xa - xc) - (yb - yd) */ 
+    r1 = r2 - t1; 
+ 
+    /* (xa - xc) + (yb - yd) */ 
+    r2 = r2 + t1; 
+ 
+    /* (ya - yc) + (xb - xd) */ 
+    s1 = s2 + t2; 
+ 
+    /* (ya - yc) - (xb - xd) */ 
+    s2 = s2 - t2; 
+ 
+    co1 = pCoef[ia1 * 2u]; 
+    si1 = pCoef[(ia1 * 2u) + 1u]; 
+ 
+    /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 
+    pSrc[2u * i2] = (r1 * co1) - (s1 * si1); 
+ 
+    /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 
+    pSrc[(2u * i2) + 1u] = (s1 * co1) + (r1 * si1); 
+ 
+    /*  index calculation for the coefficients */ 
+    ia3 = ia2 + ia1; 
+    co3 = pCoef[ia3 * 2u]; 
+    si3 = pCoef[(ia3 * 2u) + 1u]; 
+ 
+    /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 
+    pSrc[2u * i3] = (r2 * co3) - (s2 * si3); 
+ 
+    /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 
+    pSrc[(2u * i3) + 1u] = (s2 * co3) + (r2 * si3); 
+ 
+    /*  Twiddle coefficients index modifier */ 
+    ia1 = ia1 + twidCoefModifier; 
+ 
+    /*  Updating input index */ 
+    i0 = i0 + 1u; 
+ 
+  } 
+  while(--j); 
+ 
+  twidCoefModifier <<= 2u; 
+ 
+  /*  Calculation of second stage to excluding last stage */ 
+  for (k = fftLen / 4; k > 4u; k >>= 2u) 
+  { 
+    /*  Initializations for the first stage */ 
+    n1 = n2; 
+    n2 >>= 2u; 
+    ia1 = 0u; 
+ 
+    /*  Calculation of first stage */ 
+    for (j = 0u; j <= (n2 - 1u); j++) 
+    { 
+      /*  index calculation for the coefficients */ 
+      ia2 = ia1 + ia1; 
+      ia3 = ia2 + ia1; 
+      co1 = pCoef[ia1 * 2u]; 
+      si1 = pCoef[(ia1 * 2u) + 1u]; 
+      co2 = pCoef[ia2 * 2u]; 
+      si2 = pCoef[(ia2 * 2u) + 1u]; 
+      co3 = pCoef[ia3 * 2u]; 
+      si3 = pCoef[(ia3 * 2u) + 1u]; 
+ 
+      /*  Twiddle coefficients index modifier */ 
+      ia1 = ia1 + twidCoefModifier; 
+ 
+      for (i0 = j; i0 < fftLen; i0 += n1) 
+      { 
+        /*  index calculation for the input as, */ 
+        /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+        i1 = i0 + n2; 
+        i2 = i1 + n2; 
+        i3 = i2 + n2; 
+ 
+        /* xa + xc */ 
+        r1 = pSrc[(2u * i0)] + pSrc[(2u * i2)]; 
+ 
+        /* xa - xc */ 
+        r2 = pSrc[(2u * i0)] - pSrc[(2u * i2)]; 
+ 
+        /* ya + yc */ 
+        s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
+ 
+        /* ya - yc */ 
+        s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
+ 
+        /* xb + xd */ 
+        t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
+ 
+        /* xa' = xa + xb + xc + xd */ 
+        pSrc[2u * i0] = r1 + t1; 
+ 
+        /* xa + xc -(xb + xd) */ 
+        r1 = r1 - t1; 
+ 
+        /* yb + yd */ 
+        t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
+ 
+        /* ya' = ya + yb + yc + yd */ 
+        pSrc[(2u * i0) + 1u] = s1 + t2; 
+ 
+        /* (ya + yc) - (yb + yd) */ 
+        s1 = s1 - t2; 
+ 
+        /* (yb - yd) */ 
+        t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
+ 
+        /* (xb - xd) */ 
+        t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
+ 
+        /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 
+        pSrc[2u * i1] = (r1 * co2) - (s1 * si2); 
+ 
+        /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 
+        pSrc[(2u * i1) + 1u] = (s1 * co2) + (r1 * si2); 
+ 
+        /* (xa - xc) - (yb - yd) */ 
+        r1 = r2 - t1; 
+ 
+        /* (xa - xc) + (yb - yd) */ 
+        r2 = r2 + t1; 
+ 
+        /* (ya - yc) +  (xb - xd) */ 
+        s1 = s2 + t2; 
+ 
+        /* (ya - yc) -  (xb - xd) */ 
+        s2 = s2 - t2; 
+ 
+        /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 
+        pSrc[2u * i2] = (r1 * co1) - (s1 * si1); 
+ 
+        /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 
+        pSrc[(2u * i2) + 1u] = (s1 * co1) + (r1 * si1); 
+ 
+        /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 
+        pSrc[2u * i3] = (r2 * co3) - (s2 * si3); 
+ 
+        /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 
+        pSrc[(2u * i3) + 1u] = (s2 * co3) + (r2 * si3); 
+      } 
+    } 
+    twidCoefModifier <<= 2u; 
+  } 
+ 
+  /*  Initializations of last stage */ 
+  n1 = n2; 
+  n2 >>= 2u; 
+ 
+  /*  Calculations of last stage */ 
+  for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 
+  { 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /*  Butterfly implementation */ 
+    /* xa + xc */ 
+    r1 = pSrc[2u * i0] + pSrc[2u * i2]; 
+ 
+    /* xa - xc */ 
+    r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
+ 
+    /* ya + yc */ 
+    s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
+ 
+    /* ya - yc */ 
+    s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
+ 
+    /* xc + xd */ 
+    t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
+ 
+    /* xa' = xa + xb + xc + xd */ 
+    pSrc[2u * i0] = (r1 + t1) * onebyfftLen; 
+ 
+    /* (xa + xb) - (xc + xd) */ 
+    r1 = r1 - t1; 
+ 
+    /* yb + yd */ 
+    t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
+ 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[(2u * i0) + 1u] = (s1 + t2) * onebyfftLen; 
+ 
+    /* (ya + yc) - (yb + yd) */ 
+    s1 = s1 - t2; 
+ 
+    /* (yb-yd) */ 
+    t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
+ 
+    /* (xb-xd) */ 
+    t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
+ 
+    /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 
+    pSrc[2u * i1] = r1 * onebyfftLen; 
+ 
+    /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 
+    pSrc[(2u * i1) + 1u] = s1 * onebyfftLen; 
+ 
+ 
+    /* (xa - xc) - (yb-yd) */ 
+    r1 = r2 - t1; 
+ 
+    /* (xa - xc) + (yb-yd) */ 
+    r2 = r2 + t1; 
+ 
+    /* (ya - yc) + (xb-xd) */ 
+    s1 = s2 + t2; 
+ 
+    /* (ya - yc) - (xb-xd) */ 
+    s2 = s2 - t2; 
+ 
+    /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 
+    pSrc[2u * i2] = r1 * onebyfftLen; 
+ 
+    /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 
+    pSrc[(2u * i2) + 1u] = s1 * onebyfftLen; 
+ 
+    /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 
+    pSrc[2u * i3] = r2 * onebyfftLen; 
+ 
+    /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 
+    pSrc[(2u * i3) + 1u] = s2 * onebyfftLen; 
+  } 
+} 
+ 
+/*  
+ * @brief  In-place bit reversal function. 
+ * @param[in, out] *pSrc        points to the in-place buffer of floating-point data type. 
+ * @param[in]      fftSize      length of the FFT. 
+ * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table. 
+ * @param[in]      *pBitRevTab  points to the bit reversal table. 
+ * @return none. 
+ */ 
+ 
+void arm_bitreversal_f32( 
+  float32_t * pSrc, 
+  uint16_t fftSize, 
+  uint16_t bitRevFactor, 
+  uint16_t * pBitRevTab) 
+{ 
+  uint16_t fftLenBy2, fftLenBy2p1; 
+  uint16_t i, j; 
+  float32_t in; 
+ 
+  /*  Initializations */ 
+  j = 0u; 
+  fftLenBy2 = fftSize >> 1u; 
+  fftLenBy2p1 = (fftSize >> 1u) + 1u; 
+ 
+  /* Bit Reversal Implementation */ 
+  for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u) 
+  { 
+    if(i < j) 
+    { 
+      /*  pSrc[i] <-> pSrc[j]; */ 
+      in = pSrc[2u * i]; 
+      pSrc[2u * i] = pSrc[2u * j]; 
+      pSrc[2u * j] = in; 
+ 
+      /*  pSrc[i+1u] <-> pSrc[j+1u] */ 
+      in = pSrc[(2u * i) + 1u]; 
+      pSrc[(2u * i) + 1u] = pSrc[(2u * j) + 1u]; 
+      pSrc[(2u * j) + 1u] = in; 
+ 
+      /*  pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */ 
+      in = pSrc[2u * (i + fftLenBy2p1)]; 
+      pSrc[2u * (i + fftLenBy2p1)] = pSrc[2u * (j + fftLenBy2p1)]; 
+      pSrc[2u * (j + fftLenBy2p1)] = in; 
+ 
+      /*  pSrc[i+fftLenBy2p1+1u] <-> pSrc[j+fftLenBy2p1+1u] */ 
+      in = pSrc[(2u * (i + fftLenBy2p1)) + 1u]; 
+      pSrc[(2u * (i + fftLenBy2p1)) + 1u] = 
+        pSrc[(2u * (j + fftLenBy2p1)) + 1u]; 
+      pSrc[(2u * (j + fftLenBy2p1)) + 1u] = in; 
+ 
+    } 
+ 
+    /*  pSrc[i+1u] <-> pSrc[j+1u] */ 
+    in = pSrc[2u * (i + 1u)]; 
+    pSrc[2u * (i + 1u)] = pSrc[2u * (j + fftLenBy2)]; 
+    pSrc[2u * (j + fftLenBy2)] = in; 
+ 
+    /*  pSrc[i+2u] <-> pSrc[j+2u] */ 
+    in = pSrc[(2u * (i + 1u)) + 1u]; 
+    pSrc[(2u * (i + 1u)) + 1u] = pSrc[(2u * (j + fftLenBy2)) + 1u]; 
+    pSrc[(2u * (j + fftLenBy2)) + 1u] = in; 
+ 
+    /*  Reading the index for the bit reversal */ 
+    j = *pBitRevTab; 
+ 
+    /*  Updating the bit reversal index depending on the fft length  */ 
+    pBitRevTab += bitRevFactor; 
+  } 
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,1190 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cfft_radix4_init_f32.c  
+*  
+* Description:	Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+ 
+#include "arm_math.h" 
+#include "arm_common_tables.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @addtogroup CFFT_CIFFT  
+ * @{  
+ */ 
+ 
+/*  
+* @brief  Floating-point Twiddle factors Table Generation  
+*/ 
+ 
+ 
+/**  
+* \par  
+* Example code for Floating-point Twiddle factors Generation:  
+* \par  
+* <pre>for(i = 0; i< N; i++)  
+* {  
+*	twiddleCoef[2*i]= cos(i * 2*PI/(float)N);  
+*	twiddleCoef[2*i+1]= sin(i * 2*PI/(float)N);  
+* } </pre>  
+* \par  
+* where N = 1024	and PI = 3.14159265358979  
+* \par  
+* Cos and Sin values are in interleaved fashion  
+*   
+*/ 
+ 
+static const float32_t twiddleCoef[2048] = { 
+  1.000000000000000000f, 0.000000000000000000f, 
+  0.999981175282601110f, 0.006135884649154475f, 
+  0.999924701839144500f, 0.012271538285719925f, 
+  0.999830581795823400f, 0.018406729905804820f, 
+  0.999698818696204250f, 0.024541228522912288f, 
+  0.999529417501093140f, 0.030674803176636626f, 
+  0.999322384588349540f, 0.036807222941358832f, 
+  0.999077727752645360f, 0.042938256934940820f, 
+  0.998795456205172410f, 0.049067674327418015f, 
+  0.998475580573294770f, 0.055195244349689934f, 
+  0.998118112900149180f, 0.061320736302208578f, 
+  0.997723066644191640f, 0.067443919563664051f, 
+  0.997290456678690210f, 0.073564563599667426f, 
+  0.996820299291165670f, 0.079682437971430126f, 
+  0.996312612182778000f, 0.085797312344439894f, 
+  0.995767414467659820f, 0.091908956497132724f, 
+  0.995184726672196930f, 0.098017140329560604f, 
+  0.994564570734255420f, 0.104121633872054590f, 
+  0.993906970002356060f, 0.110222207293883060f, 
+  0.993211949234794500f, 0.116318630911904750f, 
+  0.992479534598709970f, 0.122410675199216200f, 
+  0.991709753669099530f, 0.128498110793793170f, 
+  0.990902635427780010f, 0.134580708507126170f, 
+  0.990058210262297120f, 0.140658239332849210f, 
+  0.989176509964781010f, 0.146730474455361750f, 
+  0.988257567730749460f, 0.152797185258443440f, 
+  0.987301418157858430f, 0.158858143333861450f, 
+  0.986308097244598670f, 0.164913120489969890f, 
+  0.985277642388941220f, 0.170961888760301220f, 
+  0.984210092386929030f, 0.177004220412148750f, 
+  0.983105487431216290f, 0.183039887955140950f, 
+  0.981963869109555240f, 0.189068664149806190f, 
+  0.980785280403230430f, 0.195090322016128250f, 
+  0.979569765685440520f, 0.201104634842091900f, 
+  0.978317370719627650f, 0.207111376192218560f, 
+  0.977028142657754390f, 0.213110319916091360f, 
+  0.975702130038528570f, 0.219101240156869800f, 
+  0.974339382785575860f, 0.225083911359792830f, 
+  0.972939952205560180f, 0.231058108280671110f, 
+  0.971503890986251780f, 0.237023605994367200f, 
+  0.970031253194543970f, 0.242980179903263870f, 
+  0.968522094274417380f, 0.248927605745720150f, 
+  0.966976471044852070f, 0.254865659604514570f, 
+  0.965394441697689400f, 0.260794117915275510f, 
+  0.963776065795439840f, 0.266712757474898370f, 
+  0.962121404269041580f, 0.272621355449948980f, 
+  0.960430519415565790f, 0.278519689385053060f, 
+  0.958703474895871600f, 0.284407537211271880f, 
+  0.956940335732208820f, 0.290284677254462330f, 
+  0.955141168305770780f, 0.296150888243623790f, 
+  0.953306040354193860f, 0.302005949319228080f, 
+  0.951435020969008340f, 0.307849640041534870f, 
+  0.949528180593036670f, 0.313681740398891520f, 
+  0.947585591017741090f, 0.319502030816015690f, 
+  0.945607325380521280f, 0.325310292162262930f, 
+  0.943593458161960390f, 0.331106305759876430f, 
+  0.941544065183020810f, 0.336889853392220050f, 
+  0.939459223602189920f, 0.342660717311994380f, 
+  0.937339011912574960f, 0.348418680249434560f, 
+  0.935183509938947610f, 0.354163525420490340f, 
+  0.932992798834738960f, 0.359895036534988110f, 
+  0.930766961078983710f, 0.365612997804773850f, 
+  0.928506080473215590f, 0.371317193951837540f, 
+  0.926210242138311380f, 0.377007410216418260f, 
+  0.923879532511286740f, 0.382683432365089780f, 
+  0.921514039342042010f, 0.388345046698826250f, 
+  0.919113851690057770f, 0.393992040061048100f, 
+  0.916679059921042700f, 0.399624199845646790f, 
+  0.914209755703530690f, 0.405241314004989860f, 
+  0.911706032005429880f, 0.410843171057903910f, 
+  0.909167983090522380f, 0.416429560097637150f, 
+  0.906595704514915330f, 0.422000270799799680f, 
+  0.903989293123443340f, 0.427555093430282080f, 
+  0.901348847046022030f, 0.433093818853151960f, 
+  0.898674465693953820f, 0.438616238538527660f, 
+  0.895966249756185220f, 0.444122144570429200f, 
+  0.893224301195515320f, 0.449611329654606540f, 
+  0.890448723244757880f, 0.455083587126343840f, 
+  0.887639620402853930f, 0.460538710958240010f, 
+  0.884797098430937790f, 0.465976495767966180f, 
+  0.881921264348355050f, 0.471396736825997640f, 
+  0.879012226428633530f, 0.476799230063322090f, 
+  0.876070094195406600f, 0.482183772079122720f, 
+  0.873094978418290090f, 0.487550160148436000f, 
+  0.870086991108711460f, 0.492898192229784040f, 
+  0.867046245515692650f, 0.498227666972781870f, 
+  0.863972856121586810f, 0.503538383725717580f, 
+  0.860866938637767310f, 0.508830142543106990f, 
+  0.857728610000272120f, 0.514102744193221660f, 
+  0.854557988365400530f, 0.519355990165589640f, 
+  0.851355193105265200f, 0.524589682678468950f, 
+  0.848120344803297230f, 0.529803624686294610f, 
+  0.844853565249707120f, 0.534997619887097150f, 
+  0.841554977436898440f, 0.540171472729892850f, 
+  0.838224705554838080f, 0.545324988422046460f, 
+  0.834862874986380010f, 0.550457972936604810f, 
+  0.831469612302545240f, 0.555570233019602180f, 
+  0.828045045257755800f, 0.560661576197336030f, 
+  0.824589302785025290f, 0.565731810783613120f, 
+  0.821102514991104650f, 0.570780745886967260f, 
+  0.817584813151583710f, 0.575808191417845340f, 
+  0.814036329705948410f, 0.580813958095764530f, 
+  0.810457198252594770f, 0.585797857456438860f, 
+  0.806847553543799330f, 0.590759701858874160f, 
+  0.803207531480644940f, 0.595699304492433360f, 
+  0.799537269107905010f, 0.600616479383868970f, 
+  0.795836904608883570f, 0.605511041404325550f, 
+  0.792106577300212390f, 0.610382806276309480f, 
+  0.788346427626606340f, 0.615231590580626820f, 
+  0.784556597155575240f, 0.620057211763289100f, 
+  0.780737228572094490f, 0.624859488142386340f, 
+  0.776888465673232440f, 0.629638238914926980f, 
+  0.773010453362736990f, 0.634393284163645490f, 
+  0.769103337645579700f, 0.639124444863775730f, 
+  0.765167265622458960f, 0.643831542889791390f, 
+  0.761202385484261780f, 0.648514401022112440f, 
+  0.757208846506484570f, 0.653172842953776760f, 
+  0.753186799043612520f, 0.657806693297078640f, 
+  0.749136394523459370f, 0.662415777590171780f, 
+  0.745057785441466060f, 0.666999922303637470f, 
+  0.740951125354959110f, 0.671558954847018330f, 
+  0.736816568877369900f, 0.676092703575315920f, 
+  0.732654271672412820f, 0.680600997795453020f, 
+  0.728464390448225200f, 0.685083667772700360f, 
+  0.724247082951467000f, 0.689540544737066830f, 
+  0.720002507961381650f, 0.693971460889654000f, 
+  0.715730825283818590f, 0.698376249408972920f, 
+  0.711432195745216430f, 0.702754744457225300f, 
+  0.707106781186547570f, 0.707106781186547460f, 
+  0.702754744457225300f, 0.711432195745216430f, 
+  0.698376249408972920f, 0.715730825283818590f, 
+  0.693971460889654000f, 0.720002507961381650f, 
+  0.689540544737066940f, 0.724247082951466890f, 
+  0.685083667772700360f, 0.728464390448225200f, 
+  0.680600997795453130f, 0.732654271672412820f, 
+  0.676092703575316030f, 0.736816568877369790f, 
+  0.671558954847018330f, 0.740951125354959110f, 
+  0.666999922303637470f, 0.745057785441465950f, 
+  0.662415777590171780f, 0.749136394523459260f, 
+  0.657806693297078640f, 0.753186799043612410f, 
+  0.653172842953776760f, 0.757208846506484460f, 
+  0.648514401022112550f, 0.761202385484261780f, 
+  0.643831542889791500f, 0.765167265622458960f, 
+  0.639124444863775730f, 0.769103337645579590f, 
+  0.634393284163645490f, 0.773010453362736990f, 
+  0.629638238914927100f, 0.776888465673232440f, 
+  0.624859488142386450f, 0.780737228572094380f, 
+  0.620057211763289210f, 0.784556597155575240f, 
+  0.615231590580626820f, 0.788346427626606230f, 
+  0.610382806276309480f, 0.792106577300212390f, 
+  0.605511041404325550f, 0.795836904608883460f, 
+  0.600616479383868970f, 0.799537269107905010f, 
+  0.595699304492433470f, 0.803207531480644830f, 
+  0.590759701858874280f, 0.806847553543799220f, 
+  0.585797857456438860f, 0.810457198252594770f, 
+  0.580813958095764530f, 0.814036329705948300f, 
+  0.575808191417845340f, 0.817584813151583710f, 
+  0.570780745886967370f, 0.821102514991104650f, 
+  0.565731810783613230f, 0.824589302785025290f, 
+  0.560661576197336030f, 0.828045045257755800f, 
+  0.555570233019602290f, 0.831469612302545240f, 
+  0.550457972936604810f, 0.834862874986380010f, 
+  0.545324988422046460f, 0.838224705554837970f, 
+  0.540171472729892970f, 0.841554977436898330f, 
+  0.534997619887097260f, 0.844853565249707010f, 
+  0.529803624686294830f, 0.848120344803297120f, 
+  0.524589682678468840f, 0.851355193105265200f, 
+  0.519355990165589530f, 0.854557988365400530f, 
+  0.514102744193221660f, 0.857728610000272120f, 
+  0.508830142543106990f, 0.860866938637767310f, 
+  0.503538383725717580f, 0.863972856121586700f, 
+  0.498227666972781870f, 0.867046245515692650f, 
+  0.492898192229784090f, 0.870086991108711350f, 
+  0.487550160148436050f, 0.873094978418290090f, 
+  0.482183772079122830f, 0.876070094195406600f, 
+  0.476799230063322250f, 0.879012226428633410f, 
+  0.471396736825997810f, 0.881921264348354940f, 
+  0.465976495767966130f, 0.884797098430937790f, 
+  0.460538710958240010f, 0.887639620402853930f, 
+  0.455083587126343840f, 0.890448723244757880f, 
+  0.449611329654606600f, 0.893224301195515320f, 
+  0.444122144570429260f, 0.895966249756185110f, 
+  0.438616238538527710f, 0.898674465693953820f, 
+  0.433093818853152010f, 0.901348847046022030f, 
+  0.427555093430282200f, 0.903989293123443340f, 
+  0.422000270799799790f, 0.906595704514915330f, 
+  0.416429560097637320f, 0.909167983090522270f, 
+  0.410843171057903910f, 0.911706032005429880f, 
+  0.405241314004989860f, 0.914209755703530690f, 
+  0.399624199845646790f, 0.916679059921042700f, 
+  0.393992040061048100f, 0.919113851690057770f, 
+  0.388345046698826300f, 0.921514039342041900f, 
+  0.382683432365089840f, 0.923879532511286740f, 
+  0.377007410216418310f, 0.926210242138311270f, 
+  0.371317193951837600f, 0.928506080473215480f, 
+  0.365612997804773960f, 0.930766961078983710f, 
+  0.359895036534988280f, 0.932992798834738850f, 
+  0.354163525420490510f, 0.935183509938947500f, 
+  0.348418680249434510f, 0.937339011912574960f, 
+  0.342660717311994380f, 0.939459223602189920f, 
+  0.336889853392220050f, 0.941544065183020810f, 
+  0.331106305759876430f, 0.943593458161960390f, 
+  0.325310292162262980f, 0.945607325380521280f, 
+  0.319502030816015750f, 0.947585591017741090f, 
+  0.313681740398891570f, 0.949528180593036670f, 
+  0.307849640041534980f, 0.951435020969008340f, 
+  0.302005949319228200f, 0.953306040354193750f, 
+  0.296150888243623960f, 0.955141168305770670f, 
+  0.290284677254462330f, 0.956940335732208940f, 
+  0.284407537211271820f, 0.958703474895871600f, 
+  0.278519689385053060f, 0.960430519415565790f, 
+  0.272621355449948980f, 0.962121404269041580f, 
+  0.266712757474898420f, 0.963776065795439840f, 
+  0.260794117915275570f, 0.965394441697689400f, 
+  0.254865659604514630f, 0.966976471044852070f, 
+  0.248927605745720260f, 0.968522094274417270f, 
+  0.242980179903263980f, 0.970031253194543970f, 
+  0.237023605994367340f, 0.971503890986251780f, 
+  0.231058108280671280f, 0.972939952205560070f, 
+  0.225083911359792780f, 0.974339382785575860f, 
+  0.219101240156869770f, 0.975702130038528570f, 
+  0.213110319916091360f, 0.977028142657754390f, 
+  0.207111376192218560f, 0.978317370719627650f, 
+  0.201104634842091960f, 0.979569765685440520f, 
+  0.195090322016128330f, 0.980785280403230430f, 
+  0.189068664149806280f, 0.981963869109555240f, 
+  0.183039887955141060f, 0.983105487431216290f, 
+  0.177004220412148860f, 0.984210092386929030f, 
+  0.170961888760301360f, 0.985277642388941220f, 
+  0.164913120489970090f, 0.986308097244598670f, 
+  0.158858143333861390f, 0.987301418157858430f, 
+  0.152797185258443410f, 0.988257567730749460f, 
+  0.146730474455361750f, 0.989176509964781010f, 
+  0.140658239332849240f, 0.990058210262297120f, 
+  0.134580708507126220f, 0.990902635427780010f, 
+  0.128498110793793220f, 0.991709753669099530f, 
+  0.122410675199216280f, 0.992479534598709970f, 
+  0.116318630911904880f, 0.993211949234794500f, 
+  0.110222207293883180f, 0.993906970002356060f, 
+  0.104121633872054730f, 0.994564570734255420f, 
+  0.098017140329560770f, 0.995184726672196820f, 
+  0.091908956497132696f, 0.995767414467659820f, 
+  0.085797312344439880f, 0.996312612182778000f, 
+  0.079682437971430126f, 0.996820299291165670f, 
+  0.073564563599667454f, 0.997290456678690210f, 
+  0.067443919563664106f, 0.997723066644191640f, 
+  0.061320736302208648f, 0.998118112900149180f, 
+  0.055195244349690031f, 0.998475580573294770f, 
+  0.049067674327418126f, 0.998795456205172410f, 
+  0.042938256934940959f, 0.999077727752645360f, 
+  0.036807222941358991f, 0.999322384588349540f, 
+  0.030674803176636581f, 0.999529417501093140f, 
+  0.024541228522912264f, 0.999698818696204250f, 
+  0.018406729905804820f, 0.999830581795823400f, 
+  0.012271538285719944f, 0.999924701839144500f, 
+  0.006135884649154515f, 0.999981175282601110f, 
+  0.000000000000000061f, 1.000000000000000000f, 
+  -0.006135884649154393f, 0.999981175282601110f, 
+  -0.012271538285719823f, 0.999924701839144500f, 
+  -0.018406729905804695f, 0.999830581795823400f, 
+  -0.024541228522912142f, 0.999698818696204250f, 
+  -0.030674803176636459f, 0.999529417501093140f, 
+  -0.036807222941358866f, 0.999322384588349540f, 
+  -0.042938256934940834f, 0.999077727752645360f, 
+  -0.049067674327418008f, 0.998795456205172410f, 
+  -0.055195244349689913f, 0.998475580573294770f, 
+  -0.061320736302208530f, 0.998118112900149180f, 
+  -0.067443919563663982f, 0.997723066644191640f, 
+  -0.073564563599667329f, 0.997290456678690210f, 
+  -0.079682437971430015f, 0.996820299291165780f, 
+  -0.085797312344439755f, 0.996312612182778000f, 
+  -0.091908956497132571f, 0.995767414467659820f, 
+  -0.098017140329560645f, 0.995184726672196930f, 
+  -0.104121633872054600f, 0.994564570734255420f, 
+  -0.110222207293883060f, 0.993906970002356060f, 
+  -0.116318630911904750f, 0.993211949234794500f, 
+  -0.122410675199216150f, 0.992479534598709970f, 
+  -0.128498110793793110f, 0.991709753669099530f, 
+  -0.134580708507126110f, 0.990902635427780010f, 
+  -0.140658239332849130f, 0.990058210262297120f, 
+  -0.146730474455361640f, 0.989176509964781010f, 
+  -0.152797185258443300f, 0.988257567730749460f, 
+  -0.158858143333861280f, 0.987301418157858430f, 
+  -0.164913120489969950f, 0.986308097244598670f, 
+  -0.170961888760301240f, 0.985277642388941220f, 
+  -0.177004220412148750f, 0.984210092386929030f, 
+  -0.183039887955140920f, 0.983105487431216290f, 
+  -0.189068664149806160f, 0.981963869109555240f, 
+  -0.195090322016128190f, 0.980785280403230430f, 
+  -0.201104634842091820f, 0.979569765685440520f, 
+  -0.207111376192218450f, 0.978317370719627650f, 
+  -0.213110319916091250f, 0.977028142657754390f, 
+  -0.219101240156869660f, 0.975702130038528570f, 
+  -0.225083911359792670f, 0.974339382785575860f, 
+  -0.231058108280671140f, 0.972939952205560180f, 
+  -0.237023605994367230f, 0.971503890986251780f, 
+  -0.242980179903263870f, 0.970031253194543970f, 
+  -0.248927605745720120f, 0.968522094274417380f, 
+  -0.254865659604514520f, 0.966976471044852070f, 
+  -0.260794117915275460f, 0.965394441697689400f, 
+  -0.266712757474898310f, 0.963776065795439840f, 
+  -0.272621355449948870f, 0.962121404269041580f, 
+  -0.278519689385052950f, 0.960430519415565900f, 
+  -0.284407537211271710f, 0.958703474895871600f, 
+  -0.290284677254462160f, 0.956940335732208940f, 
+  -0.296150888243623840f, 0.955141168305770670f, 
+  -0.302005949319228080f, 0.953306040354193860f, 
+  -0.307849640041534870f, 0.951435020969008340f, 
+  -0.313681740398891410f, 0.949528180593036670f, 
+  -0.319502030816015640f, 0.947585591017741200f, 
+  -0.325310292162262870f, 0.945607325380521390f, 
+  -0.331106305759876320f, 0.943593458161960390f, 
+  -0.336889853392219940f, 0.941544065183020810f, 
+  -0.342660717311994270f, 0.939459223602189920f, 
+  -0.348418680249434400f, 0.937339011912574960f, 
+  -0.354163525420490400f, 0.935183509938947610f, 
+  -0.359895036534988170f, 0.932992798834738850f, 
+  -0.365612997804773850f, 0.930766961078983710f, 
+  -0.371317193951837490f, 0.928506080473215590f, 
+  -0.377007410216418200f, 0.926210242138311380f, 
+  -0.382683432365089730f, 0.923879532511286740f, 
+  -0.388345046698826190f, 0.921514039342042010f, 
+  -0.393992040061047990f, 0.919113851690057770f, 
+  -0.399624199845646680f, 0.916679059921042700f, 
+  -0.405241314004989750f, 0.914209755703530690f, 
+  -0.410843171057903800f, 0.911706032005429880f, 
+  -0.416429560097636990f, 0.909167983090522490f, 
+  -0.422000270799799680f, 0.906595704514915330f, 
+  -0.427555093430281860f, 0.903989293123443450f, 
+  -0.433093818853151900f, 0.901348847046022030f, 
+  -0.438616238538527380f, 0.898674465693953930f, 
+  -0.444122144570429140f, 0.895966249756185220f, 
+  -0.449611329654606710f, 0.893224301195515210f, 
+  -0.455083587126343720f, 0.890448723244757990f, 
+  -0.460538710958240060f, 0.887639620402853930f, 
+  -0.465976495767966010f, 0.884797098430937900f, 
+  -0.471396736825997700f, 0.881921264348355050f, 
+  -0.476799230063321920f, 0.879012226428633530f, 
+  -0.482183772079122720f, 0.876070094195406600f, 
+  -0.487550160148435720f, 0.873094978418290200f, 
+  -0.492898192229783980f, 0.870086991108711460f, 
+  -0.498227666972781590f, 0.867046245515692760f, 
+  -0.503538383725717460f, 0.863972856121586810f, 
+  -0.508830142543107100f, 0.860866938637767200f, 
+  -0.514102744193221660f, 0.857728610000272120f, 
+  -0.519355990165589640f, 0.854557988365400530f, 
+  -0.524589682678468730f, 0.851355193105265200f, 
+  -0.529803624686294720f, 0.848120344803297230f, 
+  -0.534997619887097040f, 0.844853565249707230f, 
+  -0.540171472729892850f, 0.841554977436898440f, 
+  -0.545324988422046240f, 0.838224705554838190f, 
+  -0.550457972936604700f, 0.834862874986380120f, 
+  -0.555570233019601960f, 0.831469612302545460f, 
+  -0.560661576197335920f, 0.828045045257755800f, 
+  -0.565731810783613230f, 0.824589302785025180f, 
+  -0.570780745886967140f, 0.821102514991104760f, 
+  -0.575808191417845340f, 0.817584813151583710f, 
+  -0.580813958095764420f, 0.814036329705948520f, 
+  -0.585797857456438860f, 0.810457198252594770f, 
+  -0.590759701858874050f, 0.806847553543799450f, 
+  -0.595699304492433360f, 0.803207531480644940f, 
+  -0.600616479383868750f, 0.799537269107905240f, 
+  -0.605511041404325430f, 0.795836904608883570f, 
+  -0.610382806276309590f, 0.792106577300212280f, 
+  -0.615231590580626710f, 0.788346427626606340f, 
+  -0.620057211763289210f, 0.784556597155575130f, 
+  -0.624859488142386230f, 0.780737228572094600f, 
+  -0.629638238914927100f, 0.776888465673232440f, 
+  -0.634393284163645380f, 0.773010453362737100f, 
+  -0.639124444863775730f, 0.769103337645579590f, 
+  -0.643831542889791280f, 0.765167265622459070f, 
+  -0.648514401022112440f, 0.761202385484261890f, 
+  -0.653172842953776530f, 0.757208846506484680f, 
+  -0.657806693297078640f, 0.753186799043612520f, 
+  -0.662415777590171890f, 0.749136394523459260f, 
+  -0.666999922303637360f, 0.745057785441466060f, 
+  -0.671558954847018440f, 0.740951125354958990f, 
+  -0.676092703575315810f, 0.736816568877370020f, 
+  -0.680600997795453020f, 0.732654271672412820f, 
+  -0.685083667772700240f, 0.728464390448225310f, 
+  -0.689540544737066940f, 0.724247082951466890f, 
+  -0.693971460889653780f, 0.720002507961381770f, 
+  -0.698376249408972800f, 0.715730825283818710f, 
+  -0.702754744457225080f, 0.711432195745216660f, 
+  -0.707106781186547460f, 0.707106781186547570f, 
+  -0.711432195745216540f, 0.702754744457225190f, 
+  -0.715730825283818590f, 0.698376249408972920f, 
+  -0.720002507961381650f, 0.693971460889654000f, 
+  -0.724247082951466780f, 0.689540544737067050f, 
+  -0.728464390448225200f, 0.685083667772700360f, 
+  -0.732654271672412700f, 0.680600997795453240f, 
+  -0.736816568877369900f, 0.676092703575315920f, 
+  -0.740951125354958880f, 0.671558954847018550f, 
+  -0.745057785441465950f, 0.666999922303637580f, 
+  -0.749136394523459150f, 0.662415777590172010f, 
+  -0.753186799043612410f, 0.657806693297078750f, 
+  -0.757208846506484570f, 0.653172842953776640f, 
+  -0.761202385484261670f, 0.648514401022112550f, 
+  -0.765167265622458960f, 0.643831542889791390f, 
+  -0.769103337645579480f, 0.639124444863775840f, 
+  -0.773010453362736990f, 0.634393284163645490f, 
+  -0.776888465673232330f, 0.629638238914927210f, 
+  -0.780737228572094490f, 0.624859488142386340f, 
+  -0.784556597155575020f, 0.620057211763289430f, 
+  -0.788346427626606230f, 0.615231590580626930f, 
+  -0.792106577300212170f, 0.610382806276309700f, 
+  -0.795836904608883460f, 0.605511041404325660f, 
+  -0.799537269107905120f, 0.600616479383868860f, 
+  -0.803207531480644830f, 0.595699304492433470f, 
+  -0.806847553543799330f, 0.590759701858874160f, 
+  -0.810457198252594660f, 0.585797857456438980f, 
+  -0.814036329705948410f, 0.580813958095764530f, 
+  -0.817584813151583600f, 0.575808191417845450f, 
+  -0.821102514991104650f, 0.570780745886967260f, 
+  -0.824589302785025070f, 0.565731810783613450f, 
+  -0.828045045257755690f, 0.560661576197336140f, 
+  -0.831469612302545350f, 0.555570233019602180f, 
+  -0.834862874986380010f, 0.550457972936604920f, 
+  -0.838224705554838080f, 0.545324988422046350f, 
+  -0.841554977436898330f, 0.540171472729892970f, 
+  -0.844853565249707120f, 0.534997619887097150f, 
+  -0.848120344803297120f, 0.529803624686294830f, 
+  -0.851355193105265200f, 0.524589682678468950f, 
+  -0.854557988365400420f, 0.519355990165589750f, 
+  -0.857728610000272010f, 0.514102744193221770f, 
+  -0.860866938637767090f, 0.508830142543107320f, 
+  -0.863972856121586700f, 0.503538383725717690f, 
+  -0.867046245515692760f, 0.498227666972781760f, 
+  -0.870086991108711350f, 0.492898192229784150f, 
+  -0.873094978418290090f, 0.487550160148435880f, 
+  -0.876070094195406490f, 0.482183772079122890f, 
+  -0.879012226428633530f, 0.476799230063322090f, 
+  -0.881921264348354940f, 0.471396736825997860f, 
+  -0.884797098430937790f, 0.465976495767966180f, 
+  -0.887639620402853820f, 0.460538710958240230f, 
+  -0.890448723244757880f, 0.455083587126343890f, 
+  -0.893224301195515210f, 0.449611329654606870f, 
+  -0.895966249756185110f, 0.444122144570429310f, 
+  -0.898674465693953930f, 0.438616238538527550f, 
+  -0.901348847046021920f, 0.433093818853152070f, 
+  -0.903989293123443340f, 0.427555093430282030f, 
+  -0.906595704514915330f, 0.422000270799799850f, 
+  -0.909167983090522380f, 0.416429560097637150f, 
+  -0.911706032005429770f, 0.410843171057904130f, 
+  -0.914209755703530690f, 0.405241314004989920f, 
+  -0.916679059921042590f, 0.399624199845647070f, 
+  -0.919113851690057770f, 0.393992040061048150f, 
+  -0.921514039342041790f, 0.388345046698826580f, 
+  -0.923879532511286740f, 0.382683432365089890f, 
+  -0.926210242138311380f, 0.377007410216418150f, 
+  -0.928506080473215480f, 0.371317193951837710f, 
+  -0.930766961078983710f, 0.365612997804773800f, 
+  -0.932992798834738850f, 0.359895036534988330f, 
+  -0.935183509938947610f, 0.354163525420490400f, 
+  -0.937339011912574850f, 0.348418680249434790f, 
+  -0.939459223602189920f, 0.342660717311994430f, 
+  -0.941544065183020700f, 0.336889853392220330f, 
+  -0.943593458161960390f, 0.331106305759876480f, 
+  -0.945607325380521170f, 0.325310292162263260f, 
+  -0.947585591017741090f, 0.319502030816015800f, 
+  -0.949528180593036670f, 0.313681740398891410f, 
+  -0.951435020969008340f, 0.307849640041535030f, 
+  -0.953306040354193860f, 0.302005949319228030f, 
+  -0.955141168305770670f, 0.296150888243624010f, 
+  -0.956940335732208820f, 0.290284677254462390f, 
+  -0.958703474895871490f, 0.284407537211272100f, 
+  -0.960430519415565790f, 0.278519689385053170f, 
+  -0.962121404269041470f, 0.272621355449949250f, 
+  -0.963776065795439840f, 0.266712757474898480f, 
+  -0.965394441697689290f, 0.260794117915275850f, 
+  -0.966976471044852070f, 0.254865659604514680f, 
+  -0.968522094274417380f, 0.248927605745720090f, 
+  -0.970031253194543970f, 0.242980179903264070f, 
+  -0.971503890986251780f, 0.237023605994367170f, 
+  -0.972939952205560070f, 0.231058108280671330f, 
+  -0.974339382785575860f, 0.225083911359792830f, 
+  -0.975702130038528460f, 0.219101240156870050f, 
+  -0.977028142657754390f, 0.213110319916091420f, 
+  -0.978317370719627540f, 0.207111376192218840f, 
+  -0.979569765685440520f, 0.201104634842092010f, 
+  -0.980785280403230430f, 0.195090322016128610f, 
+  -0.981963869109555240f, 0.189068664149806360f, 
+  -0.983105487431216290f, 0.183039887955140900f, 
+  -0.984210092386929030f, 0.177004220412148940f, 
+  -0.985277642388941220f, 0.170961888760301220f, 
+  -0.986308097244598560f, 0.164913120489970140f, 
+  -0.987301418157858430f, 0.158858143333861470f, 
+  -0.988257567730749460f, 0.152797185258443690f, 
+  -0.989176509964781010f, 0.146730474455361800f, 
+  -0.990058210262297010f, 0.140658239332849540f, 
+  -0.990902635427780010f, 0.134580708507126280f, 
+  -0.991709753669099530f, 0.128498110793793090f, 
+  -0.992479534598709970f, 0.122410675199216350f, 
+  -0.993211949234794500f, 0.116318630911904710f, 
+  -0.993906970002356060f, 0.110222207293883240f, 
+  -0.994564570734255420f, 0.104121633872054570f, 
+  -0.995184726672196820f, 0.098017140329560826f, 
+  -0.995767414467659820f, 0.091908956497132752f, 
+  -0.996312612182778000f, 0.085797312344440158f, 
+  -0.996820299291165670f, 0.079682437971430195f, 
+  -0.997290456678690210f, 0.073564563599667732f, 
+  -0.997723066644191640f, 0.067443919563664176f, 
+  -0.998118112900149180f, 0.061320736302208488f, 
+  -0.998475580573294770f, 0.055195244349690094f, 
+  -0.998795456205172410f, 0.049067674327417966f, 
+  -0.999077727752645360f, 0.042938256934941021f, 
+  -0.999322384588349540f, 0.036807222941358832f, 
+  -0.999529417501093140f, 0.030674803176636865f, 
+  -0.999698818696204250f, 0.024541228522912326f, 
+  -0.999830581795823400f, 0.018406729905805101f, 
+  -0.999924701839144500f, 0.012271538285720007f, 
+  -0.999981175282601110f, 0.006135884649154799f, 
+  -1.000000000000000000f, 0.000000000000000122f, 
+  -0.999981175282601110f, -0.006135884649154554f, 
+  -0.999924701839144500f, -0.012271538285719762f, 
+  -0.999830581795823400f, -0.018406729905804858f, 
+  -0.999698818696204250f, -0.024541228522912080f, 
+  -0.999529417501093140f, -0.030674803176636619f, 
+  -0.999322384588349540f, -0.036807222941358582f, 
+  -0.999077727752645360f, -0.042938256934940779f, 
+  -0.998795456205172410f, -0.049067674327417724f, 
+  -0.998475580573294770f, -0.055195244349689851f, 
+  -0.998118112900149180f, -0.061320736302208245f, 
+  -0.997723066644191640f, -0.067443919563663926f, 
+  -0.997290456678690210f, -0.073564563599667496f, 
+  -0.996820299291165780f, -0.079682437971429945f, 
+  -0.996312612182778000f, -0.085797312344439922f, 
+  -0.995767414467659820f, -0.091908956497132516f, 
+  -0.995184726672196930f, -0.098017140329560590f, 
+  -0.994564570734255530f, -0.104121633872054320f, 
+  -0.993906970002356060f, -0.110222207293883000f, 
+  -0.993211949234794610f, -0.116318630911904470f, 
+  -0.992479534598709970f, -0.122410675199216100f, 
+  -0.991709753669099530f, -0.128498110793792840f, 
+  -0.990902635427780010f, -0.134580708507126060f, 
+  -0.990058210262297120f, -0.140658239332849290f, 
+  -0.989176509964781010f, -0.146730474455361580f, 
+  -0.988257567730749460f, -0.152797185258443440f, 
+  -0.987301418157858430f, -0.158858143333861220f, 
+  -0.986308097244598670f, -0.164913120489969890f, 
+  -0.985277642388941330f, -0.170961888760300970f, 
+  -0.984210092386929140f, -0.177004220412148690f, 
+  -0.983105487431216400f, -0.183039887955140650f, 
+  -0.981963869109555240f, -0.189068664149806110f, 
+  -0.980785280403230430f, -0.195090322016128360f, 
+  -0.979569765685440520f, -0.201104634842091760f, 
+  -0.978317370719627650f, -0.207111376192218590f, 
+  -0.977028142657754390f, -0.213110319916091200f, 
+  -0.975702130038528570f, -0.219101240156869800f, 
+  -0.974339382785575860f, -0.225083911359792610f, 
+  -0.972939952205560180f, -0.231058108280671080f, 
+  -0.971503890986251890f, -0.237023605994366950f, 
+  -0.970031253194543970f, -0.242980179903263820f, 
+  -0.968522094274417380f, -0.248927605745719870f, 
+  -0.966976471044852180f, -0.254865659604514460f, 
+  -0.965394441697689400f, -0.260794117915275630f, 
+  -0.963776065795439950f, -0.266712757474898250f, 
+  -0.962121404269041580f, -0.272621355449949030f, 
+  -0.960430519415565900f, -0.278519689385052890f, 
+  -0.958703474895871600f, -0.284407537211271820f, 
+  -0.956940335732208940f, -0.290284677254462110f, 
+  -0.955141168305770780f, -0.296150888243623790f, 
+  -0.953306040354193970f, -0.302005949319227810f, 
+  -0.951435020969008450f, -0.307849640041534810f, 
+  -0.949528180593036790f, -0.313681740398891180f, 
+  -0.947585591017741200f, -0.319502030816015580f, 
+  -0.945607325380521280f, -0.325310292162262980f, 
+  -0.943593458161960390f, -0.331106305759876260f, 
+  -0.941544065183020810f, -0.336889853392220110f, 
+  -0.939459223602190030f, -0.342660717311994210f, 
+  -0.937339011912574960f, -0.348418680249434560f, 
+  -0.935183509938947720f, -0.354163525420490120f, 
+  -0.932992798834738960f, -0.359895036534988110f, 
+  -0.930766961078983820f, -0.365612997804773580f, 
+  -0.928506080473215590f, -0.371317193951837430f, 
+  -0.926210242138311490f, -0.377007410216417930f, 
+  -0.923879532511286850f, -0.382683432365089670f, 
+  -0.921514039342041900f, -0.388345046698826360f, 
+  -0.919113851690057770f, -0.393992040061047930f, 
+  -0.916679059921042700f, -0.399624199845646840f, 
+  -0.914209755703530690f, -0.405241314004989690f, 
+  -0.911706032005429880f, -0.410843171057903910f, 
+  -0.909167983090522490f, -0.416429560097636930f, 
+  -0.906595704514915450f, -0.422000270799799630f, 
+  -0.903989293123443450f, -0.427555093430281810f, 
+  -0.901348847046022030f, -0.433093818853151850f, 
+  -0.898674465693954040f, -0.438616238538527330f, 
+  -0.895966249756185220f, -0.444122144570429090f, 
+  -0.893224301195515320f, -0.449611329654606650f, 
+  -0.890448723244757990f, -0.455083587126343670f, 
+  -0.887639620402853930f, -0.460538710958240060f, 
+  -0.884797098430937900f, -0.465976495767965960f, 
+  -0.881921264348355050f, -0.471396736825997640f, 
+  -0.879012226428633640f, -0.476799230063321870f, 
+  -0.876070094195406600f, -0.482183772079122660f, 
+  -0.873094978418290200f, -0.487550160148435660f, 
+  -0.870086991108711460f, -0.492898192229783930f, 
+  -0.867046245515692870f, -0.498227666972781540f, 
+  -0.863972856121586810f, -0.503538383725717460f, 
+  -0.860866938637767310f, -0.508830142543107100f, 
+  -0.857728610000272120f, -0.514102744193221550f, 
+  -0.854557988365400530f, -0.519355990165589640f, 
+  -0.851355193105265310f, -0.524589682678468730f, 
+  -0.848120344803297230f, -0.529803624686294610f, 
+  -0.844853565249707230f, -0.534997619887096930f, 
+  -0.841554977436898440f, -0.540171472729892850f, 
+  -0.838224705554838190f, -0.545324988422046130f, 
+  -0.834862874986380120f, -0.550457972936604700f, 
+  -0.831469612302545460f, -0.555570233019601960f, 
+  -0.828045045257755800f, -0.560661576197335920f, 
+  -0.824589302785025290f, -0.565731810783613230f, 
+  -0.821102514991104760f, -0.570780745886967140f, 
+  -0.817584813151583710f, -0.575808191417845340f, 
+  -0.814036329705948520f, -0.580813958095764300f, 
+  -0.810457198252594770f, -0.585797857456438860f, 
+  -0.806847553543799450f, -0.590759701858873940f, 
+  -0.803207531480644940f, -0.595699304492433250f, 
+  -0.799537269107905240f, -0.600616479383868640f, 
+  -0.795836904608883570f, -0.605511041404325430f, 
+  -0.792106577300212280f, -0.610382806276309480f, 
+  -0.788346427626606340f, -0.615231590580626710f, 
+  -0.784556597155575240f, -0.620057211763289210f, 
+  -0.780737228572094600f, -0.624859488142386230f, 
+  -0.776888465673232440f, -0.629638238914926980f, 
+  -0.773010453362737100f, -0.634393284163645270f, 
+  -0.769103337645579700f, -0.639124444863775730f, 
+  -0.765167265622459070f, -0.643831542889791280f, 
+  -0.761202385484261890f, -0.648514401022112330f, 
+  -0.757208846506484790f, -0.653172842953776530f, 
+  -0.753186799043612630f, -0.657806693297078530f, 
+  -0.749136394523459260f, -0.662415777590171780f, 
+  -0.745057785441466060f, -0.666999922303637360f, 
+  -0.740951125354959110f, -0.671558954847018440f, 
+  -0.736816568877370020f, -0.676092703575315810f, 
+  -0.732654271672412820f, -0.680600997795453020f, 
+  -0.728464390448225420f, -0.685083667772700130f, 
+  -0.724247082951467000f, -0.689540544737066830f, 
+  -0.720002507961381880f, -0.693971460889653780f, 
+  -0.715730825283818710f, -0.698376249408972800f, 
+  -0.711432195745216660f, -0.702754744457225080f, 
+  -0.707106781186547680f, -0.707106781186547460f, 
+  -0.702754744457225300f, -0.711432195745216430f, 
+  -0.698376249408973030f, -0.715730825283818480f, 
+  -0.693971460889654000f, -0.720002507961381650f, 
+  -0.689540544737067050f, -0.724247082951466780f, 
+  -0.685083667772700360f, -0.728464390448225200f, 
+  -0.680600997795453240f, -0.732654271672412590f, 
+  -0.676092703575316030f, -0.736816568877369790f, 
+  -0.671558954847018660f, -0.740951125354958880f, 
+  -0.666999922303637580f, -0.745057785441465840f, 
+  -0.662415777590172010f, -0.749136394523459040f, 
+  -0.657806693297078750f, -0.753186799043612410f, 
+  -0.653172842953777090f, -0.757208846506484230f, 
+  -0.648514401022112220f, -0.761202385484262000f, 
+  -0.643831542889791500f, -0.765167265622458960f, 
+  -0.639124444863775950f, -0.769103337645579480f, 
+  -0.634393284163645930f, -0.773010453362736660f, 
+  -0.629638238914926870f, -0.776888465673232550f, 
+  -0.624859488142386450f, -0.780737228572094380f, 
+  -0.620057211763289430f, -0.784556597155575020f, 
+  -0.615231590580627260f, -0.788346427626605890f, 
+  -0.610382806276309360f, -0.792106577300212390f, 
+  -0.605511041404325660f, -0.795836904608883460f, 
+  -0.600616479383869310f, -0.799537269107904790f, 
+  -0.595699304492433130f, -0.803207531480645050f, 
+  -0.590759701858874280f, -0.806847553543799220f, 
+  -0.585797857456439090f, -0.810457198252594660f, 
+  -0.580813958095764970f, -0.814036329705948080f, 
+  -0.575808191417845230f, -0.817584813151583820f, 
+  -0.570780745886967370f, -0.821102514991104650f, 
+  -0.565731810783613450f, -0.824589302785025070f, 
+  -0.560661576197336480f, -0.828045045257755460f, 
+  -0.555570233019602180f, -0.831469612302545240f, 
+  -0.550457972936604920f, -0.834862874986380010f, 
+  -0.545324988422046800f, -0.838224705554837860f, 
+  -0.540171472729892740f, -0.841554977436898550f, 
+  -0.534997619887097260f, -0.844853565249707010f, 
+  -0.529803624686294940f, -0.848120344803297120f, 
+  -0.524589682678469390f, -0.851355193105264860f, 
+  -0.519355990165589420f, -0.854557988365400640f, 
+  -0.514102744193221770f, -0.857728610000272010f, 
+  -0.508830142543107320f, -0.860866938637767090f, 
+  -0.503538383725718020f, -0.863972856121586470f, 
+  -0.498227666972781810f, -0.867046245515692650f, 
+  -0.492898192229784200f, -0.870086991108711350f, 
+  -0.487550160148436330f, -0.873094978418289870f, 
+  -0.482183772079122550f, -0.876070094195406710f, 
+  -0.476799230063322140f, -0.879012226428633410f, 
+  -0.471396736825997860f, -0.881921264348354940f, 
+  -0.465976495767966630f, -0.884797098430937570f, 
+  -0.460538710958239890f, -0.887639620402854050f, 
+  -0.455083587126343950f, -0.890448723244757880f, 
+  -0.449611329654606930f, -0.893224301195515210f, 
+  -0.444122144570429760f, -0.895966249756184880f, 
+  -0.438616238538527600f, -0.898674465693953820f, 
+  -0.433093818853152120f, -0.901348847046021920f, 
+  -0.427555093430282470f, -0.903989293123443120f, 
+  -0.422000270799799520f, -0.906595704514915450f, 
+  -0.416429560097637210f, -0.909167983090522380f, 
+  -0.410843171057904190f, -0.911706032005429770f, 
+  -0.405241314004990360f, -0.914209755703530470f, 
+  -0.399624199845646730f, -0.916679059921042700f, 
+  -0.393992040061048210f, -0.919113851690057660f, 
+  -0.388345046698826630f, -0.921514039342041790f, 
+  -0.382683432365090340f, -0.923879532511286520f, 
+  -0.377007410216418200f, -0.926210242138311380f, 
+  -0.371317193951837770f, -0.928506080473215480f, 
+  -0.365612997804774300f, -0.930766961078983600f, 
+  -0.359895036534987940f, -0.932992798834738960f, 
+  -0.354163525420490450f, -0.935183509938947610f, 
+  -0.348418680249434840f, -0.937339011912574850f, 
+  -0.342660717311994880f, -0.939459223602189700f, 
+  -0.336889853392219940f, -0.941544065183020810f, 
+  -0.331106305759876540f, -0.943593458161960270f, 
+  -0.325310292162263310f, -0.945607325380521170f, 
+  -0.319502030816015410f, -0.947585591017741200f, 
+  -0.313681740398891460f, -0.949528180593036670f, 
+  -0.307849640041535090f, -0.951435020969008340f, 
+  -0.302005949319228530f, -0.953306040354193750f, 
+  -0.296150888243623680f, -0.955141168305770780f, 
+  -0.290284677254462440f, -0.956940335732208820f, 
+  -0.284407537211272150f, -0.958703474895871490f, 
+  -0.278519689385053610f, -0.960430519415565680f, 
+  -0.272621355449948870f, -0.962121404269041580f, 
+  -0.266712757474898530f, -0.963776065795439840f, 
+  -0.260794117915275900f, -0.965394441697689290f, 
+  -0.254865659604514350f, -0.966976471044852180f, 
+  -0.248927605745720150f, -0.968522094274417270f, 
+  -0.242980179903264120f, -0.970031253194543970f, 
+  -0.237023605994367670f, -0.971503890986251670f, 
+  -0.231058108280670940f, -0.972939952205560180f, 
+  -0.225083911359792920f, -0.974339382785575860f, 
+  -0.219101240156870100f, -0.975702130038528460f, 
+  -0.213110319916091920f, -0.977028142657754280f, 
+  -0.207111376192218480f, -0.978317370719627650f, 
+  -0.201104634842092070f, -0.979569765685440520f, 
+  -0.195090322016128660f, -0.980785280403230320f, 
+  -0.189068664149805970f, -0.981963869109555350f, 
+  -0.183039887955140950f, -0.983105487431216290f, 
+  -0.177004220412149000f, -0.984210092386929030f, 
+  -0.170961888760301690f, -0.985277642388941110f, 
+  -0.164913120489969760f, -0.986308097244598670f, 
+  -0.158858143333861530f, -0.987301418157858320f, 
+  -0.152797185258443740f, -0.988257567730749460f, 
+  -0.146730474455362300f, -0.989176509964780900f, 
+  -0.140658239332849160f, -0.990058210262297120f, 
+  -0.134580708507126360f, -0.990902635427780010f, 
+  -0.128498110793793590f, -0.991709753669099530f, 
+  -0.122410675199215960f, -0.992479534598710080f, 
+  -0.116318630911904770f, -0.993211949234794500f, 
+  -0.110222207293883310f, -0.993906970002356060f, 
+  -0.104121633872055070f, -0.994564570734255420f, 
+  -0.098017140329560451f, -0.995184726672196930f, 
+  -0.091908956497132821f, -0.995767414467659820f, 
+  -0.085797312344440227f, -0.996312612182778000f, 
+  -0.079682437971430695f, -0.996820299291165670f, 
+  -0.073564563599667357f, -0.997290456678690210f, 
+  -0.067443919563664231f, -0.997723066644191640f, 
+  -0.061320736302208995f, -0.998118112900149180f, 
+  -0.055195244349689712f, -0.998475580573294770f, 
+  -0.049067674327418029f, -0.998795456205172410f, 
+  -0.042938256934941084f, -0.999077727752645360f, 
+  -0.036807222941359331f, -0.999322384588349430f, 
+  -0.030674803176636484f, -0.999529417501093140f, 
+  -0.024541228522912389f, -0.999698818696204250f, 
+  -0.018406729905805164f, -0.999830581795823400f, 
+  -0.012271538285720512f, -0.999924701839144500f, 
+  -0.006135884649154416f, -0.999981175282601110f, 
+  -0.000000000000000184f, -1.000000000000000000f, 
+  0.006135884649154049f, -0.999981175282601110f, 
+  0.012271538285720144f, -0.999924701839144500f, 
+  0.018406729905804796f, -0.999830581795823400f, 
+  0.024541228522912021f, -0.999698818696204250f, 
+  0.030674803176636116f, -0.999529417501093140f, 
+  0.036807222941358964f, -0.999322384588349540f, 
+  0.042938256934940716f, -0.999077727752645360f, 
+  0.049067674327417661f, -0.998795456205172410f, 
+  0.055195244349689344f, -0.998475580573294770f, 
+  0.061320736302208627f, -0.998118112900149180f, 
+  0.067443919563663871f, -0.997723066644191640f, 
+  0.073564563599666982f, -0.997290456678690210f, 
+  0.079682437971430334f, -0.996820299291165670f, 
+  0.085797312344439852f, -0.996312612182778000f, 
+  0.091908956497132446f, -0.995767414467659820f, 
+  0.098017140329560090f, -0.995184726672196930f, 
+  0.104121633872054700f, -0.994564570734255420f, 
+  0.110222207293882930f, -0.993906970002356060f, 
+  0.116318630911904410f, -0.993211949234794610f, 
+  0.122410675199215600f, -0.992479534598710080f, 
+  0.128498110793793220f, -0.991709753669099530f, 
+  0.134580708507125970f, -0.990902635427780010f, 
+  0.140658239332848790f, -0.990058210262297120f, 
+  0.146730474455361940f, -0.989176509964780900f, 
+  0.152797185258443380f, -0.988257567730749460f, 
+  0.158858143333861170f, -0.987301418157858430f, 
+  0.164913120489969390f, -0.986308097244598780f, 
+  0.170961888760301330f, -0.985277642388941220f, 
+  0.177004220412148640f, -0.984210092386929140f, 
+  0.183039887955140590f, -0.983105487431216400f, 
+  0.189068664149805610f, -0.981963869109555350f, 
+  0.195090322016128300f, -0.980785280403230430f, 
+  0.201104634842091710f, -0.979569765685440630f, 
+  0.207111376192218120f, -0.978317370719627770f, 
+  0.213110319916091560f, -0.977028142657754280f, 
+  0.219101240156869740f, -0.975702130038528570f, 
+  0.225083911359792550f, -0.974339382785575970f, 
+  0.231058108280670580f, -0.972939952205560290f, 
+  0.237023605994367310f, -0.971503890986251780f, 
+  0.242980179903263760f, -0.970031253194543970f, 
+  0.248927605745719790f, -0.968522094274417380f, 
+  0.254865659604513960f, -0.966976471044852290f, 
+  0.260794117915275510f, -0.965394441697689400f, 
+  0.266712757474898200f, -0.963776065795439950f, 
+  0.272621355449948530f, -0.962121404269041690f, 
+  0.278519689385053280f, -0.960430519415565790f, 
+  0.284407537211271770f, -0.958703474895871600f, 
+  0.290284677254462050f, -0.956940335732208940f, 
+  0.296150888243623290f, -0.955141168305770890f, 
+  0.302005949319228140f, -0.953306040354193860f, 
+  0.307849640041534760f, -0.951435020969008450f, 
+  0.313681740398891130f, -0.949528180593036790f, 
+  0.319502030816015080f, -0.947585591017741310f, 
+  0.325310292162262930f, -0.945607325380521280f, 
+  0.331106305759876210f, -0.943593458161960390f, 
+  0.336889853392219610f, -0.941544065183020920f, 
+  0.342660717311994540f, -0.939459223602189810f, 
+  0.348418680249434510f, -0.937339011912574960f, 
+  0.354163525420490070f, -0.935183509938947720f, 
+  0.359895036534987610f, -0.932992798834739070f, 
+  0.365612997804773960f, -0.930766961078983710f, 
+  0.371317193951837380f, -0.928506080473215590f, 
+  0.377007410216417870f, -0.926210242138311490f, 
+  0.382683432365090000f, -0.923879532511286630f, 
+  0.388345046698826300f, -0.921514039342041900f, 
+  0.393992040061047880f, -0.919113851690057880f, 
+  0.399624199845646400f, -0.916679059921042820f, 
+  0.405241314004990030f, -0.914209755703530580f, 
+  0.410843171057903860f, -0.911706032005429880f, 
+  0.416429560097636870f, -0.909167983090522490f, 
+  0.422000270799799180f, -0.906595704514915560f, 
+  0.427555093430282140f, -0.903989293123443340f, 
+  0.433093818853151790f, -0.901348847046022140f, 
+  0.438616238538527270f, -0.898674465693954040f, 
+  0.444122144570429420f, -0.895966249756185000f, 
+  0.449611329654606600f, -0.893224301195515320f, 
+  0.455083587126343610f, -0.890448723244757990f, 
+  0.460538710958239560f, -0.887639620402854160f, 
+  0.465976495767966290f, -0.884797098430937680f, 
+  0.471396736825997590f, -0.881921264348355050f, 
+  0.476799230063321870f, -0.879012226428633640f, 
+  0.482183772079122220f, -0.876070094195406930f, 
+  0.487550160148436000f, -0.873094978418290090f, 
+  0.492898192229783870f, -0.870086991108711460f, 
+  0.498227666972781480f, -0.867046245515692870f, 
+  0.503538383725717800f, -0.863972856121586590f, 
+  0.508830142543106990f, -0.860866938637767310f, 
+  0.514102744193221550f, -0.857728610000272230f, 
+  0.519355990165589200f, -0.854557988365400760f, 
+  0.524589682678469060f, -0.851355193105265080f, 
+  0.529803624686294610f, -0.848120344803297340f, 
+  0.534997619887096930f, -0.844853565249707230f, 
+  0.540171472729892410f, -0.841554977436898780f, 
+  0.545324988422046460f, -0.838224705554837970f, 
+  0.550457972936604700f, -0.834862874986380120f, 
+  0.555570233019601840f, -0.831469612302545460f, 
+  0.560661576197336250f, -0.828045045257755690f, 
+  0.565731810783613120f, -0.824589302785025290f, 
+  0.570780745886967030f, -0.821102514991104870f, 
+  0.575808191417844890f, -0.817584813151584040f, 
+  0.580813958095764640f, -0.814036329705948300f, 
+  0.585797857456438750f, -0.810457198252594880f, 
+  0.590759701858873940f, -0.806847553543799450f, 
+  0.595699304492432910f, -0.803207531480645280f, 
+  0.600616479383868970f, -0.799537269107905010f, 
+  0.605511041404325320f, -0.795836904608883680f, 
+  0.610382806276309140f, -0.792106577300212610f, 
+  0.615231590580627040f, -0.788346427626606120f, 
+  0.620057211763289100f, -0.784556597155575240f, 
+  0.624859488142386120f, -0.780737228572094600f, 
+  0.629638238914926650f, -0.776888465673232780f, 
+  0.634393284163645600f, -0.773010453362736880f, 
+  0.639124444863775620f, -0.769103337645579700f, 
+  0.643831542889791160f, -0.765167265622459180f, 
+  0.648514401022112000f, -0.761202385484262220f, 
+  0.653172842953776760f, -0.757208846506484570f, 
+  0.657806693297078530f, -0.753186799043612630f, 
+  0.662415777590171450f, -0.749136394523459590f, 
+  0.666999922303637690f, -0.745057785441465840f, 
+  0.671558954847018330f, -0.740951125354959110f, 
+  0.676092703575315700f, -0.736816568877370020f, 
+  0.680600997795452690f, -0.732654271672413150f, 
+  0.685083667772700470f, -0.728464390448225090f, 
+  0.689540544737066830f, -0.724247082951467000f, 
+  0.693971460889653780f, -0.720002507961381880f, 
+  0.698376249408972360f, -0.715730825283819040f, 
+  0.702754744457225300f, -0.711432195745216430f, 
+  0.707106781186547350f, -0.707106781186547680f, 
+  0.711432195745216100f, -0.702754744457225630f, 
+  0.715730825283818820f, -0.698376249408972690f, 
+  0.720002507961381540f, -0.693971460889654000f, 
+  0.724247082951466670f, -0.689540544737067160f, 
+  0.728464390448224860f, -0.685083667772700800f, 
+  0.732654271672412930f, -0.680600997795453020f, 
+  0.736816568877369790f, -0.676092703575316030f, 
+  0.740951125354958880f, -0.671558954847018660f, 
+  0.745057785441465500f, -0.666999922303638030f, 
+  0.749136394523459370f, -0.662415777590171780f, 
+  0.753186799043612300f, -0.657806693297078860f, 
+  0.757208846506484230f, -0.653172842953777090f, 
+  0.761202385484261890f, -0.648514401022112330f, 
+  0.765167265622458850f, -0.643831542889791500f, 
+  0.769103337645579480f, -0.639124444863775950f, 
+  0.773010453362736660f, -0.634393284163645930f, 
+  0.776888465673232550f, -0.629638238914926980f, 
+  0.780737228572094380f, -0.624859488142386450f, 
+  0.784556597155575020f, -0.620057211763289540f, 
+  0.788346427626605890f, -0.615231590580627370f, 
+  0.792106577300212390f, -0.610382806276309480f, 
+  0.795836904608883340f, -0.605511041404325660f, 
+  0.799537269107904790f, -0.600616479383869310f, 
+  0.803207531480645050f, -0.595699304492433250f, 
+  0.806847553543799220f, -0.590759701858874280f, 
+  0.810457198252594660f, -0.585797857456439090f, 
+  0.814036329705948080f, -0.580813958095764970f, 
+  0.817584813151583710f, -0.575808191417845230f, 
+  0.821102514991104540f, -0.570780745886967370f, 
+  0.824589302785025070f, -0.565731810783613560f, 
+  0.828045045257755350f, -0.560661576197336590f, 
+  0.831469612302545240f, -0.555570233019602180f, 
+  0.834862874986379900f, -0.550457972936605030f, 
+  0.838224705554837750f, -0.545324988422046800f, 
+  0.841554977436898440f, -0.540171472729892740f, 
+  0.844853565249707010f, -0.534997619887097260f, 
+  0.848120344803297120f, -0.529803624686294940f, 
+  0.851355193105264860f, -0.524589682678469390f, 
+  0.854557988365400530f, -0.519355990165589530f, 
+  0.857728610000272010f, -0.514102744193221880f, 
+  0.860866938637767090f, -0.508830142543107430f, 
+  0.863972856121586360f, -0.503538383725718130f, 
+  0.867046245515692650f, -0.498227666972781870f, 
+  0.870086991108711350f, -0.492898192229784260f, 
+  0.873094978418289870f, -0.487550160148436380f, 
+  0.876070094195406710f, -0.482183772079122610f, 
+  0.879012226428633410f, -0.476799230063322200f, 
+  0.881921264348354830f, -0.471396736825997920f, 
+  0.884797098430937460f, -0.465976495767966680f, 
+  0.887639620402853930f, -0.460538710958239950f, 
+  0.890448723244757770f, -0.455083587126344000f, 
+  0.893224301195515100f, -0.449611329654606980f, 
+  0.895966249756184880f, -0.444122144570429810f, 
+  0.898674465693953820f, -0.438616238538527660f, 
+  0.901348847046021920f, -0.433093818853152180f, 
+  0.903989293123443120f, -0.427555093430282530f, 
+  0.906595704514915450f, -0.422000270799799570f, 
+  0.909167983090522380f, -0.416429560097637260f, 
+  0.911706032005429660f, -0.410843171057904240f, 
+  0.914209755703530470f, -0.405241314004990420f, 
+  0.916679059921042700f, -0.399624199845646790f, 
+  0.919113851690057660f, -0.393992040061048270f, 
+  0.921514039342041790f, -0.388345046698826690f, 
+  0.923879532511286520f, -0.382683432365090390f, 
+  0.926210242138311380f, -0.377007410216418260f, 
+  0.928506080473215480f, -0.371317193951837820f, 
+  0.930766961078983490f, -0.365612997804774350f, 
+  0.932992798834738960f, -0.359895036534988000f, 
+  0.935183509938947500f, -0.354163525420490510f, 
+  0.937339011912574850f, -0.348418680249434900f, 
+  0.939459223602189700f, -0.342660717311994930f, 
+  0.941544065183020810f, -0.336889853392220000f, 
+  0.943593458161960270f, -0.331106305759876600f, 
+  0.945607325380521170f, -0.325310292162263370f, 
+  0.947585591017741200f, -0.319502030816015470f, 
+  0.949528180593036670f, -0.313681740398891520f, 
+  0.951435020969008340f, -0.307849640041535140f, 
+  0.953306040354193640f, -0.302005949319228580f, 
+  0.955141168305770780f, -0.296150888243623730f, 
+  0.956940335732208820f, -0.290284677254462500f, 
+  0.958703474895871490f, -0.284407537211272210f, 
+  0.960430519415565680f, -0.278519689385053670f, 
+  0.962121404269041580f, -0.272621355449948980f, 
+  0.963776065795439840f, -0.266712757474898590f, 
+  0.965394441697689290f, -0.260794117915275960f, 
+  0.966976471044852180f, -0.254865659604514410f, 
+  0.968522094274417270f, -0.248927605745720200f, 
+  0.970031253194543970f, -0.242980179903264180f, 
+  0.971503890986251670f, -0.237023605994367730f, 
+  0.972939952205560180f, -0.231058108280671000f, 
+  0.974339382785575860f, -0.225083911359792970f, 
+  0.975702130038528460f, -0.219101240156870160f, 
+  0.977028142657754170f, -0.213110319916091970f, 
+  0.978317370719627650f, -0.207111376192218530f, 
+  0.979569765685440520f, -0.201104634842092120f, 
+  0.980785280403230320f, -0.195090322016128720f, 
+  0.981963869109555350f, -0.189068664149806030f, 
+  0.983105487431216290f, -0.183039887955141010f, 
+  0.984210092386929030f, -0.177004220412149050f, 
+  0.985277642388941110f, -0.170961888760301770f, 
+  0.986308097244598670f, -0.164913120489969810f, 
+  0.987301418157858320f, -0.158858143333861580f, 
+  0.988257567730749460f, -0.152797185258443800f, 
+  0.989176509964780900f, -0.146730474455362390f, 
+  0.990058210262297120f, -0.140658239332849210f, 
+  0.990902635427780010f, -0.134580708507126420f, 
+  0.991709753669099410f, -0.128498110793793640f, 
+  0.992479534598709970f, -0.122410675199216030f, 
+  0.993211949234794500f, -0.116318630911904840f, 
+  0.993906970002356060f, -0.110222207293883360f, 
+  0.994564570734255420f, -0.104121633872055130f, 
+  0.995184726672196930f, -0.098017140329560506f, 
+  0.995767414467659820f, -0.091908956497132877f, 
+  0.996312612182778000f, -0.085797312344440282f, 
+  0.996820299291165670f, -0.079682437971430750f, 
+  0.997290456678690210f, -0.073564563599667412f, 
+  0.997723066644191640f, -0.067443919563664287f, 
+  0.998118112900149180f, -0.061320736302209057f, 
+  0.998475580573294770f, -0.055195244349689775f, 
+  0.998795456205172410f, -0.049067674327418091f, 
+  0.999077727752645360f, -0.042938256934941139f, 
+  0.999322384588349430f, -0.036807222941359394f, 
+  0.999529417501093140f, -0.030674803176636543f, 
+  0.999698818696204250f, -0.024541228522912448f, 
+  0.999830581795823400f, -0.018406729905805226f, 
+  0.999924701839144500f, -0.012271538285720572f, 
+  0.999981175282601110f, -0.006135884649154477f 
+}; 
+ 
+/**  
+* @brief  Initialization function for the floating-point CFFT/CIFFT. 
+* @param[in,out] *S             points to an instance of the floating-point CFFT/CIFFT structure. 
+* @param[in]     fftLen         length of the FFT. 
+* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. 
+* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. 
+* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value. 
+*  
+* \par Description: 
+* \par  
+* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.  
+* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated 
+* \par  
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.  
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.  
+* \par  
+* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.  
+* \par  
+* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.  
+*/ 
+ 
+arm_status arm_cfft_radix4_init_f32( 
+  arm_cfft_radix4_instance_f32 * S, 
+  uint16_t fftLen, 
+  uint8_t ifftFlag, 
+  uint8_t bitReverseFlag) 
+{ 
+  /*  Initialise the default arm status */ 
+  arm_status status = ARM_MATH_SUCCESS; 
+ 
+  /*  Initialise the FFT length */ 
+  S->fftLen = fftLen; 
+ 
+  /*  Initialise the Twiddle coefficient pointer */ 
+  S->pTwiddle = (float32_t *) twiddleCoef; 
+ 
+  /*  Initialise the Flag for selection of CFFT or CIFFT */ 
+  S->ifftFlag = ifftFlag; 
+ 
+  /*  Initialise the Flag for calculation Bit reversal or not */ 
+  S->bitReverseFlag = bitReverseFlag; 
+ 
+  /*  Initializations of structure parameters depending on the FFT length */ 
+  switch (S->fftLen) 
+  { 
+ 
+  case 1024u: 
+    /*  Initializations of structure parameters for 1024 point FFT */ 
+ 
+    /*  Initialise the twiddle coef modifier value */ 
+    S->twidCoefModifier = 1u; 
+    /*  Initialise the bit reversal table modifier */ 
+    S->bitRevFactor = 1u; 
+    /*  Initialise the bit reversal table pointer */ 
+    S->pBitRevTable = (uint16_t*)armBitRevTable; 
+    /*  Initialise the 1/fftLen Value */ 
+    S->onebyfftLen = 0.0009765625f; 
+    break; 
+ 
+ 
+  case 256u: 
+    /*  Initializations of structure parameters for 256 point FFT */ 
+    S->twidCoefModifier = 4u; 
+    S->bitRevFactor = 4u; 
+    S->pBitRevTable = (uint16_t*)&armBitRevTable[3]; 
+    S->onebyfftLen = 0.00390625f; 
+    break; 
+ 
+  case 64u: 
+    /*  Initializations of structure parameters for 64 point FFT */ 
+    S->twidCoefModifier = 16u; 
+    S->bitRevFactor = 16u; 
+    S->pBitRevTable = (uint16_t*)&armBitRevTable[15]; 
+    S->onebyfftLen = 0.015625f; 
+    break; 
+ 
+  case 16u: 
+    /*  Initializations of structure parameters for 16 point FFT */ 
+    S->twidCoefModifier = 64u; 
+    S->bitRevFactor = 64u; 
+    S->pBitRevTable = (uint16_t*)&armBitRevTable[63]; 
+    S->onebyfftLen = 0.0625f; 
+    break; 
+ 
+ 
+  default: 
+    /*  Reporting argument error if fftSize is not valid value */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+    break; 
+  } 
+ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of CFFT_CIFFT group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,412 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cfft_radix4_init_q15.c  
+*  
+* Description:	Radix-4 Decimation in Frequency Q15 FFT & IFFT initialization function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+#include "arm_common_tables.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+ 
+/**  
+ * @addtogroup CFFT_CIFFT  
+ * @{  
+ */ 
+ 
+/*  
+* @brief  Twiddle factors Table  
+*/ 
+ 
+/** 
+* \par  
+* Example code for Q15 Twiddle factors Generation::  
+* \par  
+* <pre>for(i = 0; i< N; i++)  
+* {  
+*	twiddleCoefQ15[2*i]= cos(i * 2*PI/(float)N);  
+*	twiddleCoefQ15[2*i+1]= sin(i * 2*PI/(float)N);  
+* } </pre>  
+* \par  
+* where N = 1024	and PI = 3.14159265358979  
+* \par  
+* Cos and Sin values are interleaved fashion  
+* \par  
+* Convert Floating point to Q15(Fixed point 1.15):  
+*	round(twiddleCoefQ15(i) * pow(2, 15))  
+*  
+*/ 
+ 
+static const q15_t twiddleCoefQ15[2048] = { 
+  0x7fff, 0x0, 0x7fff, 0xc9, 0x7ffe, 0x192, 0x7ffa, 0x25b, 
+  0x7ff6, 0x324, 0x7ff1, 0x3ed, 0x7fea, 0x4b6, 0x7fe2, 0x57f, 
+  0x7fd9, 0x648, 0x7fce, 0x711, 0x7fc2, 0x7d9, 0x7fb5, 0x8a2, 
+  0x7fa7, 0x96b, 0x7f98, 0xa33, 0x7f87, 0xafb, 0x7f75, 0xbc4, 
+  0x7f62, 0xc8c, 0x7f4e, 0xd54, 0x7f38, 0xe1c, 0x7f22, 0xee4, 
+  0x7f0a, 0xfab, 0x7ef0, 0x1073, 0x7ed6, 0x113a, 0x7eba, 0x1201, 
+  0x7e9d, 0x12c8, 0x7e7f, 0x138f, 0x7e60, 0x1455, 0x7e3f, 0x151c, 
+  0x7e1e, 0x15e2, 0x7dfb, 0x16a8, 0x7dd6, 0x176e, 0x7db1, 0x1833, 
+  0x7d8a, 0x18f9, 0x7d63, 0x19be, 0x7d3a, 0x1a83, 0x7d0f, 0x1b47, 
+  0x7ce4, 0x1c0c, 0x7cb7, 0x1cd0, 0x7c89, 0x1d93, 0x7c5a, 0x1e57, 
+  0x7c2a, 0x1f1a, 0x7bf9, 0x1fdd, 0x7bc6, 0x209f, 0x7b92, 0x2162, 
+  0x7b5d, 0x2224, 0x7b27, 0x22e5, 0x7aef, 0x23a7, 0x7ab7, 0x2467, 
+  0x7a7d, 0x2528, 0x7a42, 0x25e8, 0x7a06, 0x26a8, 0x79c9, 0x2768, 
+  0x798a, 0x2827, 0x794a, 0x28e5, 0x790a, 0x29a4, 0x78c8, 0x2a62, 
+  0x7885, 0x2b1f, 0x7840, 0x2bdc, 0x77fb, 0x2c99, 0x77b4, 0x2d55, 
+  0x776c, 0x2e11, 0x7723, 0x2ecc, 0x76d9, 0x2f87, 0x768e, 0x3042, 
+  0x7642, 0x30fc, 0x75f4, 0x31b5, 0x75a6, 0x326e, 0x7556, 0x3327, 
+  0x7505, 0x33df, 0x74b3, 0x3497, 0x7460, 0x354e, 0x740b, 0x3604, 
+  0x73b6, 0x36ba, 0x735f, 0x3770, 0x7308, 0x3825, 0x72af, 0x38d9, 
+  0x7255, 0x398d, 0x71fa, 0x3a40, 0x719e, 0x3af3, 0x7141, 0x3ba5, 
+  0x70e3, 0x3c57, 0x7083, 0x3d08, 0x7023, 0x3db8, 0x6fc2, 0x3e68, 
+  0x6f5f, 0x3f17, 0x6efb, 0x3fc6, 0x6e97, 0x4074, 0x6e31, 0x4121, 
+  0x6dca, 0x41ce, 0x6d62, 0x427a, 0x6cf9, 0x4326, 0x6c8f, 0x43d1, 
+  0x6c24, 0x447b, 0x6bb8, 0x4524, 0x6b4b, 0x45cd, 0x6add, 0x4675, 
+  0x6a6e, 0x471d, 0x69fd, 0x47c4, 0x698c, 0x486a, 0x691a, 0x490f, 
+  0x68a7, 0x49b4, 0x6832, 0x4a58, 0x67bd, 0x4afb, 0x6747, 0x4b9e, 
+  0x66d0, 0x4c40, 0x6657, 0x4ce1, 0x65de, 0x4d81, 0x6564, 0x4e21, 
+  0x64e9, 0x4ec0, 0x646c, 0x4f5e, 0x63ef, 0x4ffb, 0x6371, 0x5098, 
+  0x62f2, 0x5134, 0x6272, 0x51cf, 0x61f1, 0x5269, 0x616f, 0x5303, 
+  0x60ec, 0x539b, 0x6068, 0x5433, 0x5fe4, 0x54ca, 0x5f5e, 0x5560, 
+  0x5ed7, 0x55f6, 0x5e50, 0x568a, 0x5dc8, 0x571e, 0x5d3e, 0x57b1, 
+  0x5cb4, 0x5843, 0x5c29, 0x58d4, 0x5b9d, 0x5964, 0x5b10, 0x59f4, 
+  0x5a82, 0x5a82, 0x59f4, 0x5b10, 0x5964, 0x5b9d, 0x58d4, 0x5c29, 
+  0x5843, 0x5cb4, 0x57b1, 0x5d3e, 0x571e, 0x5dc8, 0x568a, 0x5e50, 
+  0x55f6, 0x5ed7, 0x5560, 0x5f5e, 0x54ca, 0x5fe4, 0x5433, 0x6068, 
+  0x539b, 0x60ec, 0x5303, 0x616f, 0x5269, 0x61f1, 0x51cf, 0x6272, 
+  0x5134, 0x62f2, 0x5098, 0x6371, 0x4ffb, 0x63ef, 0x4f5e, 0x646c, 
+  0x4ec0, 0x64e9, 0x4e21, 0x6564, 0x4d81, 0x65de, 0x4ce1, 0x6657, 
+  0x4c40, 0x66d0, 0x4b9e, 0x6747, 0x4afb, 0x67bd, 0x4a58, 0x6832, 
+  0x49b4, 0x68a7, 0x490f, 0x691a, 0x486a, 0x698c, 0x47c4, 0x69fd, 
+  0x471d, 0x6a6e, 0x4675, 0x6add, 0x45cd, 0x6b4b, 0x4524, 0x6bb8, 
+  0x447b, 0x6c24, 0x43d1, 0x6c8f, 0x4326, 0x6cf9, 0x427a, 0x6d62, 
+  0x41ce, 0x6dca, 0x4121, 0x6e31, 0x4074, 0x6e97, 0x3fc6, 0x6efb, 
+  0x3f17, 0x6f5f, 0x3e68, 0x6fc2, 0x3db8, 0x7023, 0x3d08, 0x7083, 
+  0x3c57, 0x70e3, 0x3ba5, 0x7141, 0x3af3, 0x719e, 0x3a40, 0x71fa, 
+  0x398d, 0x7255, 0x38d9, 0x72af, 0x3825, 0x7308, 0x3770, 0x735f, 
+  0x36ba, 0x73b6, 0x3604, 0x740b, 0x354e, 0x7460, 0x3497, 0x74b3, 
+  0x33df, 0x7505, 0x3327, 0x7556, 0x326e, 0x75a6, 0x31b5, 0x75f4, 
+  0x30fc, 0x7642, 0x3042, 0x768e, 0x2f87, 0x76d9, 0x2ecc, 0x7723, 
+  0x2e11, 0x776c, 0x2d55, 0x77b4, 0x2c99, 0x77fb, 0x2bdc, 0x7840, 
+  0x2b1f, 0x7885, 0x2a62, 0x78c8, 0x29a4, 0x790a, 0x28e5, 0x794a, 
+  0x2827, 0x798a, 0x2768, 0x79c9, 0x26a8, 0x7a06, 0x25e8, 0x7a42, 
+  0x2528, 0x7a7d, 0x2467, 0x7ab7, 0x23a7, 0x7aef, 0x22e5, 0x7b27, 
+  0x2224, 0x7b5d, 0x2162, 0x7b92, 0x209f, 0x7bc6, 0x1fdd, 0x7bf9, 
+  0x1f1a, 0x7c2a, 0x1e57, 0x7c5a, 0x1d93, 0x7c89, 0x1cd0, 0x7cb7, 
+  0x1c0c, 0x7ce4, 0x1b47, 0x7d0f, 0x1a83, 0x7d3a, 0x19be, 0x7d63, 
+  0x18f9, 0x7d8a, 0x1833, 0x7db1, 0x176e, 0x7dd6, 0x16a8, 0x7dfb, 
+  0x15e2, 0x7e1e, 0x151c, 0x7e3f, 0x1455, 0x7e60, 0x138f, 0x7e7f, 
+  0x12c8, 0x7e9d, 0x1201, 0x7eba, 0x113a, 0x7ed6, 0x1073, 0x7ef0, 
+  0xfab, 0x7f0a, 0xee4, 0x7f22, 0xe1c, 0x7f38, 0xd54, 0x7f4e, 
+  0xc8c, 0x7f62, 0xbc4, 0x7f75, 0xafb, 0x7f87, 0xa33, 0x7f98, 
+  0x96b, 0x7fa7, 0x8a2, 0x7fb5, 0x7d9, 0x7fc2, 0x711, 0x7fce, 
+  0x648, 0x7fd9, 0x57f, 0x7fe2, 0x4b6, 0x7fea, 0x3ed, 0x7ff1, 
+  0x324, 0x7ff6, 0x25b, 0x7ffa, 0x192, 0x7ffe, 0xc9, 0x7fff, 
+  0x0, 0x7fff, 0xff37, 0x7fff, 0xfe6e, 0x7ffe, 0xfda5, 0x7ffa, 
+  0xfcdc, 0x7ff6, 0xfc13, 0x7ff1, 0xfb4a, 0x7fea, 0xfa81, 0x7fe2, 
+  0xf9b8, 0x7fd9, 0xf8ef, 0x7fce, 0xf827, 0x7fc2, 0xf75e, 0x7fb5, 
+  0xf695, 0x7fa7, 0xf5cd, 0x7f98, 0xf505, 0x7f87, 0xf43c, 0x7f75, 
+  0xf374, 0x7f62, 0xf2ac, 0x7f4e, 0xf1e4, 0x7f38, 0xf11c, 0x7f22, 
+  0xf055, 0x7f0a, 0xef8d, 0x7ef0, 0xeec6, 0x7ed6, 0xedff, 0x7eba, 
+  0xed38, 0x7e9d, 0xec71, 0x7e7f, 0xebab, 0x7e60, 0xeae4, 0x7e3f, 
+  0xea1e, 0x7e1e, 0xe958, 0x7dfb, 0xe892, 0x7dd6, 0xe7cd, 0x7db1, 
+  0xe707, 0x7d8a, 0xe642, 0x7d63, 0xe57d, 0x7d3a, 0xe4b9, 0x7d0f, 
+  0xe3f4, 0x7ce4, 0xe330, 0x7cb7, 0xe26d, 0x7c89, 0xe1a9, 0x7c5a, 
+  0xe0e6, 0x7c2a, 0xe023, 0x7bf9, 0xdf61, 0x7bc6, 0xde9e, 0x7b92, 
+  0xdddc, 0x7b5d, 0xdd1b, 0x7b27, 0xdc59, 0x7aef, 0xdb99, 0x7ab7, 
+  0xdad8, 0x7a7d, 0xda18, 0x7a42, 0xd958, 0x7a06, 0xd898, 0x79c9, 
+  0xd7d9, 0x798a, 0xd71b, 0x794a, 0xd65c, 0x790a, 0xd59e, 0x78c8, 
+  0xd4e1, 0x7885, 0xd424, 0x7840, 0xd367, 0x77fb, 0xd2ab, 0x77b4, 
+  0xd1ef, 0x776c, 0xd134, 0x7723, 0xd079, 0x76d9, 0xcfbe, 0x768e, 
+  0xcf04, 0x7642, 0xce4b, 0x75f4, 0xcd92, 0x75a6, 0xccd9, 0x7556, 
+  0xcc21, 0x7505, 0xcb69, 0x74b3, 0xcab2, 0x7460, 0xc9fc, 0x740b, 
+  0xc946, 0x73b6, 0xc890, 0x735f, 0xc7db, 0x7308, 0xc727, 0x72af, 
+  0xc673, 0x7255, 0xc5c0, 0x71fa, 0xc50d, 0x719e, 0xc45b, 0x7141, 
+  0xc3a9, 0x70e3, 0xc2f8, 0x7083, 0xc248, 0x7023, 0xc198, 0x6fc2, 
+  0xc0e9, 0x6f5f, 0xc03a, 0x6efb, 0xbf8c, 0x6e97, 0xbedf, 0x6e31, 
+  0xbe32, 0x6dca, 0xbd86, 0x6d62, 0xbcda, 0x6cf9, 0xbc2f, 0x6c8f, 
+  0xbb85, 0x6c24, 0xbadc, 0x6bb8, 0xba33, 0x6b4b, 0xb98b, 0x6add, 
+  0xb8e3, 0x6a6e, 0xb83c, 0x69fd, 0xb796, 0x698c, 0xb6f1, 0x691a, 
+  0xb64c, 0x68a7, 0xb5a8, 0x6832, 0xb505, 0x67bd, 0xb462, 0x6747, 
+  0xb3c0, 0x66d0, 0xb31f, 0x6657, 0xb27f, 0x65de, 0xb1df, 0x6564, 
+  0xb140, 0x64e9, 0xb0a2, 0x646c, 0xb005, 0x63ef, 0xaf68, 0x6371, 
+  0xaecc, 0x62f2, 0xae31, 0x6272, 0xad97, 0x61f1, 0xacfd, 0x616f, 
+  0xac65, 0x60ec, 0xabcd, 0x6068, 0xab36, 0x5fe4, 0xaaa0, 0x5f5e, 
+  0xaa0a, 0x5ed7, 0xa976, 0x5e50, 0xa8e2, 0x5dc8, 0xa84f, 0x5d3e, 
+  0xa7bd, 0x5cb4, 0xa72c, 0x5c29, 0xa69c, 0x5b9d, 0xa60c, 0x5b10, 
+  0xa57e, 0x5a82, 0xa4f0, 0x59f4, 0xa463, 0x5964, 0xa3d7, 0x58d4, 
+  0xa34c, 0x5843, 0xa2c2, 0x57b1, 0xa238, 0x571e, 0xa1b0, 0x568a, 
+  0xa129, 0x55f6, 0xa0a2, 0x5560, 0xa01c, 0x54ca, 0x9f98, 0x5433, 
+  0x9f14, 0x539b, 0x9e91, 0x5303, 0x9e0f, 0x5269, 0x9d8e, 0x51cf, 
+  0x9d0e, 0x5134, 0x9c8f, 0x5098, 0x9c11, 0x4ffb, 0x9b94, 0x4f5e, 
+  0x9b17, 0x4ec0, 0x9a9c, 0x4e21, 0x9a22, 0x4d81, 0x99a9, 0x4ce1, 
+  0x9930, 0x4c40, 0x98b9, 0x4b9e, 0x9843, 0x4afb, 0x97ce, 0x4a58, 
+  0x9759, 0x49b4, 0x96e6, 0x490f, 0x9674, 0x486a, 0x9603, 0x47c4, 
+  0x9592, 0x471d, 0x9523, 0x4675, 0x94b5, 0x45cd, 0x9448, 0x4524, 
+  0x93dc, 0x447b, 0x9371, 0x43d1, 0x9307, 0x4326, 0x929e, 0x427a, 
+  0x9236, 0x41ce, 0x91cf, 0x4121, 0x9169, 0x4074, 0x9105, 0x3fc6, 
+  0x90a1, 0x3f17, 0x903e, 0x3e68, 0x8fdd, 0x3db8, 0x8f7d, 0x3d08, 
+  0x8f1d, 0x3c57, 0x8ebf, 0x3ba5, 0x8e62, 0x3af3, 0x8e06, 0x3a40, 
+  0x8dab, 0x398d, 0x8d51, 0x38d9, 0x8cf8, 0x3825, 0x8ca1, 0x3770, 
+  0x8c4a, 0x36ba, 0x8bf5, 0x3604, 0x8ba0, 0x354e, 0x8b4d, 0x3497, 
+  0x8afb, 0x33df, 0x8aaa, 0x3327, 0x8a5a, 0x326e, 0x8a0c, 0x31b5, 
+  0x89be, 0x30fc, 0x8972, 0x3042, 0x8927, 0x2f87, 0x88dd, 0x2ecc, 
+  0x8894, 0x2e11, 0x884c, 0x2d55, 0x8805, 0x2c99, 0x87c0, 0x2bdc, 
+  0x877b, 0x2b1f, 0x8738, 0x2a62, 0x86f6, 0x29a4, 0x86b6, 0x28e5, 
+  0x8676, 0x2827, 0x8637, 0x2768, 0x85fa, 0x26a8, 0x85be, 0x25e8, 
+  0x8583, 0x2528, 0x8549, 0x2467, 0x8511, 0x23a7, 0x84d9, 0x22e5, 
+  0x84a3, 0x2224, 0x846e, 0x2162, 0x843a, 0x209f, 0x8407, 0x1fdd, 
+  0x83d6, 0x1f1a, 0x83a6, 0x1e57, 0x8377, 0x1d93, 0x8349, 0x1cd0, 
+  0x831c, 0x1c0c, 0x82f1, 0x1b47, 0x82c6, 0x1a83, 0x829d, 0x19be, 
+  0x8276, 0x18f9, 0x824f, 0x1833, 0x822a, 0x176e, 0x8205, 0x16a8, 
+  0x81e2, 0x15e2, 0x81c1, 0x151c, 0x81a0, 0x1455, 0x8181, 0x138f, 
+  0x8163, 0x12c8, 0x8146, 0x1201, 0x812a, 0x113a, 0x8110, 0x1073, 
+  0x80f6, 0xfab, 0x80de, 0xee4, 0x80c8, 0xe1c, 0x80b2, 0xd54, 
+  0x809e, 0xc8c, 0x808b, 0xbc4, 0x8079, 0xafb, 0x8068, 0xa33, 
+  0x8059, 0x96b, 0x804b, 0x8a2, 0x803e, 0x7d9, 0x8032, 0x711, 
+  0x8027, 0x648, 0x801e, 0x57f, 0x8016, 0x4b6, 0x800f, 0x3ed, 
+  0x800a, 0x324, 0x8006, 0x25b, 0x8002, 0x192, 0x8001, 0xc9, 
+  0x8000, 0x0, 0x8001, 0xff37, 0x8002, 0xfe6e, 0x8006, 0xfda5, 
+  0x800a, 0xfcdc, 0x800f, 0xfc13, 0x8016, 0xfb4a, 0x801e, 0xfa81, 
+  0x8027, 0xf9b8, 0x8032, 0xf8ef, 0x803e, 0xf827, 0x804b, 0xf75e, 
+  0x8059, 0xf695, 0x8068, 0xf5cd, 0x8079, 0xf505, 0x808b, 0xf43c, 
+  0x809e, 0xf374, 0x80b2, 0xf2ac, 0x80c8, 0xf1e4, 0x80de, 0xf11c, 
+  0x80f6, 0xf055, 0x8110, 0xef8d, 0x812a, 0xeec6, 0x8146, 0xedff, 
+  0x8163, 0xed38, 0x8181, 0xec71, 0x81a0, 0xebab, 0x81c1, 0xeae4, 
+  0x81e2, 0xea1e, 0x8205, 0xe958, 0x822a, 0xe892, 0x824f, 0xe7cd, 
+  0x8276, 0xe707, 0x829d, 0xe642, 0x82c6, 0xe57d, 0x82f1, 0xe4b9, 
+  0x831c, 0xe3f4, 0x8349, 0xe330, 0x8377, 0xe26d, 0x83a6, 0xe1a9, 
+  0x83d6, 0xe0e6, 0x8407, 0xe023, 0x843a, 0xdf61, 0x846e, 0xde9e, 
+  0x84a3, 0xdddc, 0x84d9, 0xdd1b, 0x8511, 0xdc59, 0x8549, 0xdb99, 
+  0x8583, 0xdad8, 0x85be, 0xda18, 0x85fa, 0xd958, 0x8637, 0xd898, 
+  0x8676, 0xd7d9, 0x86b6, 0xd71b, 0x86f6, 0xd65c, 0x8738, 0xd59e, 
+  0x877b, 0xd4e1, 0x87c0, 0xd424, 0x8805, 0xd367, 0x884c, 0xd2ab, 
+  0x8894, 0xd1ef, 0x88dd, 0xd134, 0x8927, 0xd079, 0x8972, 0xcfbe, 
+  0x89be, 0xcf04, 0x8a0c, 0xce4b, 0x8a5a, 0xcd92, 0x8aaa, 0xccd9, 
+  0x8afb, 0xcc21, 0x8b4d, 0xcb69, 0x8ba0, 0xcab2, 0x8bf5, 0xc9fc, 
+  0x8c4a, 0xc946, 0x8ca1, 0xc890, 0x8cf8, 0xc7db, 0x8d51, 0xc727, 
+  0x8dab, 0xc673, 0x8e06, 0xc5c0, 0x8e62, 0xc50d, 0x8ebf, 0xc45b, 
+  0x8f1d, 0xc3a9, 0x8f7d, 0xc2f8, 0x8fdd, 0xc248, 0x903e, 0xc198, 
+  0x90a1, 0xc0e9, 0x9105, 0xc03a, 0x9169, 0xbf8c, 0x91cf, 0xbedf, 
+  0x9236, 0xbe32, 0x929e, 0xbd86, 0x9307, 0xbcda, 0x9371, 0xbc2f, 
+  0x93dc, 0xbb85, 0x9448, 0xbadc, 0x94b5, 0xba33, 0x9523, 0xb98b, 
+  0x9592, 0xb8e3, 0x9603, 0xb83c, 0x9674, 0xb796, 0x96e6, 0xb6f1, 
+  0x9759, 0xb64c, 0x97ce, 0xb5a8, 0x9843, 0xb505, 0x98b9, 0xb462, 
+  0x9930, 0xb3c0, 0x99a9, 0xb31f, 0x9a22, 0xb27f, 0x9a9c, 0xb1df, 
+  0x9b17, 0xb140, 0x9b94, 0xb0a2, 0x9c11, 0xb005, 0x9c8f, 0xaf68, 
+  0x9d0e, 0xaecc, 0x9d8e, 0xae31, 0x9e0f, 0xad97, 0x9e91, 0xacfd, 
+  0x9f14, 0xac65, 0x9f98, 0xabcd, 0xa01c, 0xab36, 0xa0a2, 0xaaa0, 
+  0xa129, 0xaa0a, 0xa1b0, 0xa976, 0xa238, 0xa8e2, 0xa2c2, 0xa84f, 
+  0xa34c, 0xa7bd, 0xa3d7, 0xa72c, 0xa463, 0xa69c, 0xa4f0, 0xa60c, 
+  0xa57e, 0xa57e, 0xa60c, 0xa4f0, 0xa69c, 0xa463, 0xa72c, 0xa3d7, 
+  0xa7bd, 0xa34c, 0xa84f, 0xa2c2, 0xa8e2, 0xa238, 0xa976, 0xa1b0, 
+  0xaa0a, 0xa129, 0xaaa0, 0xa0a2, 0xab36, 0xa01c, 0xabcd, 0x9f98, 
+  0xac65, 0x9f14, 0xacfd, 0x9e91, 0xad97, 0x9e0f, 0xae31, 0x9d8e, 
+  0xaecc, 0x9d0e, 0xaf68, 0x9c8f, 0xb005, 0x9c11, 0xb0a2, 0x9b94, 
+  0xb140, 0x9b17, 0xb1df, 0x9a9c, 0xb27f, 0x9a22, 0xb31f, 0x99a9, 
+  0xb3c0, 0x9930, 0xb462, 0x98b9, 0xb505, 0x9843, 0xb5a8, 0x97ce, 
+  0xb64c, 0x9759, 0xb6f1, 0x96e6, 0xb796, 0x9674, 0xb83c, 0x9603, 
+  0xb8e3, 0x9592, 0xb98b, 0x9523, 0xba33, 0x94b5, 0xbadc, 0x9448, 
+  0xbb85, 0x93dc, 0xbc2f, 0x9371, 0xbcda, 0x9307, 0xbd86, 0x929e, 
+  0xbe32, 0x9236, 0xbedf, 0x91cf, 0xbf8c, 0x9169, 0xc03a, 0x9105, 
+  0xc0e9, 0x90a1, 0xc198, 0x903e, 0xc248, 0x8fdd, 0xc2f8, 0x8f7d, 
+  0xc3a9, 0x8f1d, 0xc45b, 0x8ebf, 0xc50d, 0x8e62, 0xc5c0, 0x8e06, 
+  0xc673, 0x8dab, 0xc727, 0x8d51, 0xc7db, 0x8cf8, 0xc890, 0x8ca1, 
+  0xc946, 0x8c4a, 0xc9fc, 0x8bf5, 0xcab2, 0x8ba0, 0xcb69, 0x8b4d, 
+  0xcc21, 0x8afb, 0xccd9, 0x8aaa, 0xcd92, 0x8a5a, 0xce4b, 0x8a0c, 
+  0xcf04, 0x89be, 0xcfbe, 0x8972, 0xd079, 0x8927, 0xd134, 0x88dd, 
+  0xd1ef, 0x8894, 0xd2ab, 0x884c, 0xd367, 0x8805, 0xd424, 0x87c0, 
+  0xd4e1, 0x877b, 0xd59e, 0x8738, 0xd65c, 0x86f6, 0xd71b, 0x86b6, 
+  0xd7d9, 0x8676, 0xd898, 0x8637, 0xd958, 0x85fa, 0xda18, 0x85be, 
+  0xdad8, 0x8583, 0xdb99, 0x8549, 0xdc59, 0x8511, 0xdd1b, 0x84d9, 
+  0xdddc, 0x84a3, 0xde9e, 0x846e, 0xdf61, 0x843a, 0xe023, 0x8407, 
+  0xe0e6, 0x83d6, 0xe1a9, 0x83a6, 0xe26d, 0x8377, 0xe330, 0x8349, 
+  0xe3f4, 0x831c, 0xe4b9, 0x82f1, 0xe57d, 0x82c6, 0xe642, 0x829d, 
+  0xe707, 0x8276, 0xe7cd, 0x824f, 0xe892, 0x822a, 0xe958, 0x8205, 
+  0xea1e, 0x81e2, 0xeae4, 0x81c1, 0xebab, 0x81a0, 0xec71, 0x8181, 
+  0xed38, 0x8163, 0xedff, 0x8146, 0xeec6, 0x812a, 0xef8d, 0x8110, 
+  0xf055, 0x80f6, 0xf11c, 0x80de, 0xf1e4, 0x80c8, 0xf2ac, 0x80b2, 
+  0xf374, 0x809e, 0xf43c, 0x808b, 0xf505, 0x8079, 0xf5cd, 0x8068, 
+  0xf695, 0x8059, 0xf75e, 0x804b, 0xf827, 0x803e, 0xf8ef, 0x8032, 
+  0xf9b8, 0x8027, 0xfa81, 0x801e, 0xfb4a, 0x8016, 0xfc13, 0x800f, 
+  0xfcdc, 0x800a, 0xfda5, 0x8006, 0xfe6e, 0x8002, 0xff37, 0x8001, 
+  0x0, 0x8000, 0xc9, 0x8001, 0x192, 0x8002, 0x25b, 0x8006, 
+  0x324, 0x800a, 0x3ed, 0x800f, 0x4b6, 0x8016, 0x57f, 0x801e, 
+  0x648, 0x8027, 0x711, 0x8032, 0x7d9, 0x803e, 0x8a2, 0x804b, 
+  0x96b, 0x8059, 0xa33, 0x8068, 0xafb, 0x8079, 0xbc4, 0x808b, 
+  0xc8c, 0x809e, 0xd54, 0x80b2, 0xe1c, 0x80c8, 0xee4, 0x80de, 
+  0xfab, 0x80f6, 0x1073, 0x8110, 0x113a, 0x812a, 0x1201, 0x8146, 
+  0x12c8, 0x8163, 0x138f, 0x8181, 0x1455, 0x81a0, 0x151c, 0x81c1, 
+  0x15e2, 0x81e2, 0x16a8, 0x8205, 0x176e, 0x822a, 0x1833, 0x824f, 
+  0x18f9, 0x8276, 0x19be, 0x829d, 0x1a83, 0x82c6, 0x1b47, 0x82f1, 
+  0x1c0c, 0x831c, 0x1cd0, 0x8349, 0x1d93, 0x8377, 0x1e57, 0x83a6, 
+  0x1f1a, 0x83d6, 0x1fdd, 0x8407, 0x209f, 0x843a, 0x2162, 0x846e, 
+  0x2224, 0x84a3, 0x22e5, 0x84d9, 0x23a7, 0x8511, 0x2467, 0x8549, 
+  0x2528, 0x8583, 0x25e8, 0x85be, 0x26a8, 0x85fa, 0x2768, 0x8637, 
+  0x2827, 0x8676, 0x28e5, 0x86b6, 0x29a4, 0x86f6, 0x2a62, 0x8738, 
+  0x2b1f, 0x877b, 0x2bdc, 0x87c0, 0x2c99, 0x8805, 0x2d55, 0x884c, 
+  0x2e11, 0x8894, 0x2ecc, 0x88dd, 0x2f87, 0x8927, 0x3042, 0x8972, 
+  0x30fc, 0x89be, 0x31b5, 0x8a0c, 0x326e, 0x8a5a, 0x3327, 0x8aaa, 
+  0x33df, 0x8afb, 0x3497, 0x8b4d, 0x354e, 0x8ba0, 0x3604, 0x8bf5, 
+  0x36ba, 0x8c4a, 0x3770, 0x8ca1, 0x3825, 0x8cf8, 0x38d9, 0x8d51, 
+  0x398d, 0x8dab, 0x3a40, 0x8e06, 0x3af3, 0x8e62, 0x3ba5, 0x8ebf, 
+  0x3c57, 0x8f1d, 0x3d08, 0x8f7d, 0x3db8, 0x8fdd, 0x3e68, 0x903e, 
+  0x3f17, 0x90a1, 0x3fc6, 0x9105, 0x4074, 0x9169, 0x4121, 0x91cf, 
+  0x41ce, 0x9236, 0x427a, 0x929e, 0x4326, 0x9307, 0x43d1, 0x9371, 
+  0x447b, 0x93dc, 0x4524, 0x9448, 0x45cd, 0x94b5, 0x4675, 0x9523, 
+  0x471d, 0x9592, 0x47c4, 0x9603, 0x486a, 0x9674, 0x490f, 0x96e6, 
+  0x49b4, 0x9759, 0x4a58, 0x97ce, 0x4afb, 0x9843, 0x4b9e, 0x98b9, 
+  0x4c40, 0x9930, 0x4ce1, 0x99a9, 0x4d81, 0x9a22, 0x4e21, 0x9a9c, 
+  0x4ec0, 0x9b17, 0x4f5e, 0x9b94, 0x4ffb, 0x9c11, 0x5098, 0x9c8f, 
+  0x5134, 0x9d0e, 0x51cf, 0x9d8e, 0x5269, 0x9e0f, 0x5303, 0x9e91, 
+  0x539b, 0x9f14, 0x5433, 0x9f98, 0x54ca, 0xa01c, 0x5560, 0xa0a2, 
+  0x55f6, 0xa129, 0x568a, 0xa1b0, 0x571e, 0xa238, 0x57b1, 0xa2c2, 
+  0x5843, 0xa34c, 0x58d4, 0xa3d7, 0x5964, 0xa463, 0x59f4, 0xa4f0, 
+  0x5a82, 0xa57e, 0x5b10, 0xa60c, 0x5b9d, 0xa69c, 0x5c29, 0xa72c, 
+  0x5cb4, 0xa7bd, 0x5d3e, 0xa84f, 0x5dc8, 0xa8e2, 0x5e50, 0xa976, 
+  0x5ed7, 0xaa0a, 0x5f5e, 0xaaa0, 0x5fe4, 0xab36, 0x6068, 0xabcd, 
+  0x60ec, 0xac65, 0x616f, 0xacfd, 0x61f1, 0xad97, 0x6272, 0xae31, 
+  0x62f2, 0xaecc, 0x6371, 0xaf68, 0x63ef, 0xb005, 0x646c, 0xb0a2, 
+  0x64e9, 0xb140, 0x6564, 0xb1df, 0x65de, 0xb27f, 0x6657, 0xb31f, 
+  0x66d0, 0xb3c0, 0x6747, 0xb462, 0x67bd, 0xb505, 0x6832, 0xb5a8, 
+  0x68a7, 0xb64c, 0x691a, 0xb6f1, 0x698c, 0xb796, 0x69fd, 0xb83c, 
+  0x6a6e, 0xb8e3, 0x6add, 0xb98b, 0x6b4b, 0xba33, 0x6bb8, 0xbadc, 
+  0x6c24, 0xbb85, 0x6c8f, 0xbc2f, 0x6cf9, 0xbcda, 0x6d62, 0xbd86, 
+  0x6dca, 0xbe32, 0x6e31, 0xbedf, 0x6e97, 0xbf8c, 0x6efb, 0xc03a, 
+  0x6f5f, 0xc0e9, 0x6fc2, 0xc198, 0x7023, 0xc248, 0x7083, 0xc2f8, 
+  0x70e3, 0xc3a9, 0x7141, 0xc45b, 0x719e, 0xc50d, 0x71fa, 0xc5c0, 
+  0x7255, 0xc673, 0x72af, 0xc727, 0x7308, 0xc7db, 0x735f, 0xc890, 
+  0x73b6, 0xc946, 0x740b, 0xc9fc, 0x7460, 0xcab2, 0x74b3, 0xcb69, 
+  0x7505, 0xcc21, 0x7556, 0xccd9, 0x75a6, 0xcd92, 0x75f4, 0xce4b, 
+  0x7642, 0xcf04, 0x768e, 0xcfbe, 0x76d9, 0xd079, 0x7723, 0xd134, 
+  0x776c, 0xd1ef, 0x77b4, 0xd2ab, 0x77fb, 0xd367, 0x7840, 0xd424, 
+  0x7885, 0xd4e1, 0x78c8, 0xd59e, 0x790a, 0xd65c, 0x794a, 0xd71b, 
+  0x798a, 0xd7d9, 0x79c9, 0xd898, 0x7a06, 0xd958, 0x7a42, 0xda18, 
+  0x7a7d, 0xdad8, 0x7ab7, 0xdb99, 0x7aef, 0xdc59, 0x7b27, 0xdd1b, 
+  0x7b5d, 0xdddc, 0x7b92, 0xde9e, 0x7bc6, 0xdf61, 0x7bf9, 0xe023, 
+  0x7c2a, 0xe0e6, 0x7c5a, 0xe1a9, 0x7c89, 0xe26d, 0x7cb7, 0xe330, 
+  0x7ce4, 0xe3f4, 0x7d0f, 0xe4b9, 0x7d3a, 0xe57d, 0x7d63, 0xe642, 
+  0x7d8a, 0xe707, 0x7db1, 0xe7cd, 0x7dd6, 0xe892, 0x7dfb, 0xe958, 
+  0x7e1e, 0xea1e, 0x7e3f, 0xeae4, 0x7e60, 0xebab, 0x7e7f, 0xec71, 
+  0x7e9d, 0xed38, 0x7eba, 0xedff, 0x7ed6, 0xeec6, 0x7ef0, 0xef8d, 
+  0x7f0a, 0xf055, 0x7f22, 0xf11c, 0x7f38, 0xf1e4, 0x7f4e, 0xf2ac, 
+  0x7f62, 0xf374, 0x7f75, 0xf43c, 0x7f87, 0xf505, 0x7f98, 0xf5cd, 
+  0x7fa7, 0xf695, 0x7fb5, 0xf75e, 0x7fc2, 0xf827, 0x7fce, 0xf8ef, 
+  0x7fd9, 0xf9b8, 0x7fe2, 0xfa81, 0x7fea, 0xfb4a, 0x7ff1, 0xfc13, 
+  0x7ff6, 0xfcdc, 0x7ffa, 0xfda5, 0x7ffe, 0xfe6e, 0x7fff, 0xff37 
+}; 
+ 
+ 
+/**  
+* @brief Initialization function for the Q15 CFFT/CIFFT. 
+* @param[in,out] *S             points to an instance of the Q15 CFFT/CIFFT structure. 
+* @param[in]     fftLen         length of the FFT. 
+* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. 
+* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. 
+* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value. 
+*  
+* \par Description: 
+* \par  
+* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.  
+* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated 
+* \par  
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.  
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.  
+* \par  
+* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.  
+* \par  
+* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.  
+*/ 
+ 
+arm_status arm_cfft_radix4_init_q15( 
+  arm_cfft_radix4_instance_q15 * S, 
+  uint16_t fftLen, 
+  uint8_t ifftFlag, 
+  uint8_t bitReverseFlag) 
+{ 
+  /*  Initialise the default arm status */ 
+  arm_status status = ARM_MATH_SUCCESS; 
+  /*  Initialise the FFT length */ 
+  S->fftLen = fftLen; 
+  /*  Initialise the Twiddle coefficient pointer */ 
+  S->pTwiddle = (q15_t *) twiddleCoefQ15; 
+  /*  Initialise the Flag for selection of CFFT or CIFFT */ 
+  S->ifftFlag = ifftFlag; 
+  /*  Initialise the Flag for calculation Bit reversal or not */ 
+  S->bitReverseFlag = bitReverseFlag; 
+ 
+  /*  Initializations of structure parameters depending on the FFT length */ 
+  switch (S->fftLen) 
+  { 
+    /*  Initializations of structure parameters for 1024 point FFT */ 
+  case 1024u: 
+    /*  Initialise the twiddle coef modifier value */ 
+    S->twidCoefModifier = 1u; 
+    /*  Initialise the bit reversal table modifier */ 
+    S->bitRevFactor = 1u; 
+    /*  Initialise the bit reversal table pointer */ 
+    S->pBitRevTable = (uint16_t*)armBitRevTable; 
+ 
+    break; 
+  case 256u: 
+    /*  Initializations of structure parameters for 2566 point FFT */ 
+    S->twidCoefModifier = 4u; 
+    S->bitRevFactor = 4u; 
+    S->pBitRevTable = (uint16_t*)&armBitRevTable[3]; 
+ 
+    break; 
+  case 64u: 
+    /*  Initializations of structure parameters for 64 point FFT */ 
+    S->twidCoefModifier = 16u; 
+    S->bitRevFactor = 16u; 
+    S->pBitRevTable = (uint16_t*)&armBitRevTable[15]; 
+ 
+    break; 
+  case 16u: 
+    /*  Initializations of structure parameters for 16 point FFT */ 
+    S->twidCoefModifier = 64u; 
+    S->bitRevFactor = 64u; 
+    S->pBitRevTable = (uint16_t*)&armBitRevTable[63]; 
+ 
+    break; 
+  default: 
+    /*  Reporting argument error if fftSize is not valid value */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+    break; 
+  } 
+ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of CFFT_CIFFT group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,667 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cfft_radix4_init_q31.c  
+*  
+* Description:	Radix-4 Decimation in Frequency Q31 FFT & IFFT initialization function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+#include "arm_common_tables.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @addtogroup CFFT_CIFFT  
+ * @{  
+ */ 
+ 
+/*  
+* @brief  Twiddle factors Table  
+*/ 
+ 
+/**  
+* \par 
+* Example code for Q31 Twiddle factors Generation::  
+* \par  
+* <pre>for(i = 0; i< N; i++)  
+* {  
+*    twiddleCoefQ31[2*i]= cos(i * 2*PI/(float)N);  
+*    twiddleCoefQ31[2*i+1]= sin(i * 2*PI/(float)N);  
+* } </pre>  
+* \par  
+* where N = 1024	and PI = 3.14159265358979  
+* \par  
+* Cos and Sin values are interleaved fashion  
+* \par  
+* Convert Floating point to Q31(Fixed point 1.31):  
+*	round(twiddleCoefQ31(i) * pow(2, 31))  
+*  
+*/ 
+ 
+static const q31_t twiddleCoefQ31[2048] = { 
+  0x7fffffff, 0x0, 0x7fff6216, 0xc90f88, 0x7ffd885a, 0x1921d20, 0x7ffa72d1, 
+  0x25b26d7, 
+  0x7ff62182, 0x3242abf, 0x7ff09478, 0x3ed26e6, 0x7fe9cbc0, 0x4b6195d, 
+  0x7fe1c76b, 0x57f0035, 
+  0x7fd8878e, 0x647d97c, 0x7fce0c3e, 0x710a345, 0x7fc25596, 0x7d95b9e, 
+  0x7fb563b3, 0x8a2009a, 
+  0x7fa736b4, 0x96a9049, 0x7f97cebd, 0xa3308bd, 0x7f872bf3, 0xafb6805, 
+  0x7f754e80, 0xbc3ac35, 
+  0x7f62368f, 0xc8bd35e, 0x7f4de451, 0xd53db92, 0x7f3857f6, 0xe1bc2e4, 
+  0x7f2191b4, 0xee38766, 
+  0x7f0991c4, 0xfab272b, 0x7ef05860, 0x1072a048, 0x7ed5e5c6, 0x1139f0cf, 
+  0x7eba3a39, 0x120116d5, 
+  0x7e9d55fc, 0x12c8106f, 0x7e7f3957, 0x138edbb1, 0x7e5fe493, 0x145576b1, 
+  0x7e3f57ff, 0x151bdf86, 
+  0x7e1d93ea, 0x15e21445, 0x7dfa98a8, 0x16a81305, 0x7dd6668f, 0x176dd9de, 
+  0x7db0fdf8, 0x183366e9, 
+  0x7d8a5f40, 0x18f8b83c, 0x7d628ac6, 0x19bdcbf3, 0x7d3980ec, 0x1a82a026, 
+  0x7d0f4218, 0x1b4732ef, 
+  0x7ce3ceb2, 0x1c0b826a, 0x7cb72724, 0x1ccf8cb3, 0x7c894bde, 0x1d934fe5, 
+  0x7c5a3d50, 0x1e56ca1e, 
+  0x7c29fbee, 0x1f19f97b, 0x7bf88830, 0x1fdcdc1b, 0x7bc5e290, 0x209f701c, 
+  0x7b920b89, 0x2161b3a0, 
+  0x7b5d039e, 0x2223a4c5, 0x7b26cb4f, 0x22e541af, 0x7aef6323, 0x23a6887f, 
+  0x7ab6cba4, 0x24677758, 
+  0x7a7d055b, 0x25280c5e, 0x7a4210d8, 0x25e845b6, 0x7a05eead, 0x26a82186, 
+  0x79c89f6e, 0x27679df4, 
+  0x798a23b1, 0x2826b928, 0x794a7c12, 0x28e5714b, 0x7909a92d, 0x29a3c485, 
+  0x78c7aba2, 0x2a61b101, 
+  0x78848414, 0x2b1f34eb, 0x78403329, 0x2bdc4e6f, 0x77fab989, 0x2c98fbba, 
+  0x77b417df, 0x2d553afc, 
+  0x776c4edb, 0x2e110a62, 0x77235f2d, 0x2ecc681e, 0x76d94989, 0x2f875262, 
+  0x768e0ea6, 0x3041c761, 
+  0x7641af3d, 0x30fbc54d, 0x75f42c0b, 0x31b54a5e, 0x75a585cf, 0x326e54c7, 
+  0x7555bd4c, 0x3326e2c3, 
+  0x7504d345, 0x33def287, 0x74b2c884, 0x34968250, 0x745f9dd1, 0x354d9057, 
+  0x740b53fb, 0x36041ad9, 
+  0x73b5ebd1, 0x36ba2014, 0x735f6626, 0x376f9e46, 0x7307c3d0, 0x382493b0, 
+  0x72af05a7, 0x38d8fe93, 
+  0x72552c85, 0x398cdd32, 0x71fa3949, 0x3a402dd2, 0x719e2cd2, 0x3af2eeb7, 
+  0x71410805, 0x3ba51e29, 
+  0x70e2cbc6, 0x3c56ba70, 0x708378ff, 0x3d07c1d6, 0x7023109a, 0x3db832a6, 
+  0x6fc19385, 0x3e680b2c, 
+  0x6f5f02b2, 0x3f1749b8, 0x6efb5f12, 0x3fc5ec98, 0x6e96a99d, 0x4073f21d, 
+  0x6e30e34a, 0x4121589b, 
+  0x6dca0d14, 0x41ce1e65, 0x6d6227fa, 0x427a41d0, 0x6cf934fc, 0x4325c135, 
+  0x6c8f351c, 0x43d09aed, 
+  0x6c242960, 0x447acd50, 0x6bb812d1, 0x452456bd, 0x6b4af279, 0x45cd358f, 
+  0x6adcc964, 0x46756828, 
+  0x6a6d98a4, 0x471cece7, 0x69fd614a, 0x47c3c22f, 0x698c246c, 0x4869e665, 
+  0x6919e320, 0x490f57ee, 
+  0x68a69e81, 0x49b41533, 0x683257ab, 0x4a581c9e, 0x67bd0fbd, 0x4afb6c98, 
+  0x6746c7d8, 0x4b9e0390, 
+  0x66cf8120, 0x4c3fdff4, 0x66573cbb, 0x4ce10034, 0x65ddfbd3, 0x4d8162c4, 
+  0x6563bf92, 0x4e210617, 
+  0x64e88926, 0x4ebfe8a5, 0x646c59bf, 0x4f5e08e3, 0x63ef3290, 0x4ffb654d, 
+  0x637114cc, 0x5097fc5e, 
+  0x62f201ac, 0x5133cc94, 0x6271fa69, 0x51ced46e, 0x61f1003f, 0x5269126e, 
+  0x616f146c, 0x53028518, 
+  0x60ec3830, 0x539b2af0, 0x60686ccf, 0x5433027d, 0x5fe3b38d, 0x54ca0a4b, 
+  0x5f5e0db3, 0x556040e2, 
+  0x5ed77c8a, 0x55f5a4d2, 0x5e50015d, 0x568a34a9, 0x5dc79d7c, 0x571deefa, 
+  0x5d3e5237, 0x57b0d256, 
+  0x5cb420e0, 0x5842dd54, 0x5c290acc, 0x58d40e8c, 0x5b9d1154, 0x59646498, 
+  0x5b1035cf, 0x59f3de12, 
+  0x5a82799a, 0x5a82799a, 0x59f3de12, 0x5b1035cf, 0x59646498, 0x5b9d1154, 
+  0x58d40e8c, 0x5c290acc, 
+  0x5842dd54, 0x5cb420e0, 0x57b0d256, 0x5d3e5237, 0x571deefa, 0x5dc79d7c, 
+  0x568a34a9, 0x5e50015d, 
+  0x55f5a4d2, 0x5ed77c8a, 0x556040e2, 0x5f5e0db3, 0x54ca0a4b, 0x5fe3b38d, 
+  0x5433027d, 0x60686ccf, 
+  0x539b2af0, 0x60ec3830, 0x53028518, 0x616f146c, 0x5269126e, 0x61f1003f, 
+  0x51ced46e, 0x6271fa69, 
+  0x5133cc94, 0x62f201ac, 0x5097fc5e, 0x637114cc, 0x4ffb654d, 0x63ef3290, 
+  0x4f5e08e3, 0x646c59bf, 
+  0x4ebfe8a5, 0x64e88926, 0x4e210617, 0x6563bf92, 0x4d8162c4, 0x65ddfbd3, 
+  0x4ce10034, 0x66573cbb, 
+  0x4c3fdff4, 0x66cf8120, 0x4b9e0390, 0x6746c7d8, 0x4afb6c98, 0x67bd0fbd, 
+  0x4a581c9e, 0x683257ab, 
+  0x49b41533, 0x68a69e81, 0x490f57ee, 0x6919e320, 0x4869e665, 0x698c246c, 
+  0x47c3c22f, 0x69fd614a, 
+  0x471cece7, 0x6a6d98a4, 0x46756828, 0x6adcc964, 0x45cd358f, 0x6b4af279, 
+  0x452456bd, 0x6bb812d1, 
+  0x447acd50, 0x6c242960, 0x43d09aed, 0x6c8f351c, 0x4325c135, 0x6cf934fc, 
+  0x427a41d0, 0x6d6227fa, 
+  0x41ce1e65, 0x6dca0d14, 0x4121589b, 0x6e30e34a, 0x4073f21d, 0x6e96a99d, 
+  0x3fc5ec98, 0x6efb5f12, 
+  0x3f1749b8, 0x6f5f02b2, 0x3e680b2c, 0x6fc19385, 0x3db832a6, 0x7023109a, 
+  0x3d07c1d6, 0x708378ff, 
+  0x3c56ba70, 0x70e2cbc6, 0x3ba51e29, 0x71410805, 0x3af2eeb7, 0x719e2cd2, 
+  0x3a402dd2, 0x71fa3949, 
+  0x398cdd32, 0x72552c85, 0x38d8fe93, 0x72af05a7, 0x382493b0, 0x7307c3d0, 
+  0x376f9e46, 0x735f6626, 
+  0x36ba2014, 0x73b5ebd1, 0x36041ad9, 0x740b53fb, 0x354d9057, 0x745f9dd1, 
+  0x34968250, 0x74b2c884, 
+  0x33def287, 0x7504d345, 0x3326e2c3, 0x7555bd4c, 0x326e54c7, 0x75a585cf, 
+  0x31b54a5e, 0x75f42c0b, 
+  0x30fbc54d, 0x7641af3d, 0x3041c761, 0x768e0ea6, 0x2f875262, 0x76d94989, 
+  0x2ecc681e, 0x77235f2d, 
+  0x2e110a62, 0x776c4edb, 0x2d553afc, 0x77b417df, 0x2c98fbba, 0x77fab989, 
+  0x2bdc4e6f, 0x78403329, 
+  0x2b1f34eb, 0x78848414, 0x2a61b101, 0x78c7aba2, 0x29a3c485, 0x7909a92d, 
+  0x28e5714b, 0x794a7c12, 
+  0x2826b928, 0x798a23b1, 0x27679df4, 0x79c89f6e, 0x26a82186, 0x7a05eead, 
+  0x25e845b6, 0x7a4210d8, 
+  0x25280c5e, 0x7a7d055b, 0x24677758, 0x7ab6cba4, 0x23a6887f, 0x7aef6323, 
+  0x22e541af, 0x7b26cb4f, 
+  0x2223a4c5, 0x7b5d039e, 0x2161b3a0, 0x7b920b89, 0x209f701c, 0x7bc5e290, 
+  0x1fdcdc1b, 0x7bf88830, 
+  0x1f19f97b, 0x7c29fbee, 0x1e56ca1e, 0x7c5a3d50, 0x1d934fe5, 0x7c894bde, 
+  0x1ccf8cb3, 0x7cb72724, 
+  0x1c0b826a, 0x7ce3ceb2, 0x1b4732ef, 0x7d0f4218, 0x1a82a026, 0x7d3980ec, 
+  0x19bdcbf3, 0x7d628ac6, 
+  0x18f8b83c, 0x7d8a5f40, 0x183366e9, 0x7db0fdf8, 0x176dd9de, 0x7dd6668f, 
+  0x16a81305, 0x7dfa98a8, 
+  0x15e21445, 0x7e1d93ea, 0x151bdf86, 0x7e3f57ff, 0x145576b1, 0x7e5fe493, 
+  0x138edbb1, 0x7e7f3957, 
+  0x12c8106f, 0x7e9d55fc, 0x120116d5, 0x7eba3a39, 0x1139f0cf, 0x7ed5e5c6, 
+  0x1072a048, 0x7ef05860, 
+  0xfab272b, 0x7f0991c4, 0xee38766, 0x7f2191b4, 0xe1bc2e4, 0x7f3857f6, 
+  0xd53db92, 0x7f4de451, 
+  0xc8bd35e, 0x7f62368f, 0xbc3ac35, 0x7f754e80, 0xafb6805, 0x7f872bf3, 
+  0xa3308bd, 0x7f97cebd, 
+  0x96a9049, 0x7fa736b4, 0x8a2009a, 0x7fb563b3, 0x7d95b9e, 0x7fc25596, 
+  0x710a345, 0x7fce0c3e, 
+  0x647d97c, 0x7fd8878e, 0x57f0035, 0x7fe1c76b, 0x4b6195d, 0x7fe9cbc0, 
+  0x3ed26e6, 0x7ff09478, 
+  0x3242abf, 0x7ff62182, 0x25b26d7, 0x7ffa72d1, 0x1921d20, 0x7ffd885a, 
+  0xc90f88, 0x7fff6216, 
+  0x0, 0x7fffffff, 0xff36f078, 0x7fff6216, 0xfe6de2e0, 0x7ffd885a, 0xfda4d929, 
+  0x7ffa72d1, 
+  0xfcdbd541, 0x7ff62182, 0xfc12d91a, 0x7ff09478, 0xfb49e6a3, 0x7fe9cbc0, 
+  0xfa80ffcb, 0x7fe1c76b, 
+  0xf9b82684, 0x7fd8878e, 0xf8ef5cbb, 0x7fce0c3e, 0xf826a462, 0x7fc25596, 
+  0xf75dff66, 0x7fb563b3, 
+  0xf6956fb7, 0x7fa736b4, 0xf5ccf743, 0x7f97cebd, 0xf50497fb, 0x7f872bf3, 
+  0xf43c53cb, 0x7f754e80, 
+  0xf3742ca2, 0x7f62368f, 0xf2ac246e, 0x7f4de451, 0xf1e43d1c, 0x7f3857f6, 
+  0xf11c789a, 0x7f2191b4, 
+  0xf054d8d5, 0x7f0991c4, 0xef8d5fb8, 0x7ef05860, 0xeec60f31, 0x7ed5e5c6, 
+  0xedfee92b, 0x7eba3a39, 
+  0xed37ef91, 0x7e9d55fc, 0xec71244f, 0x7e7f3957, 0xebaa894f, 0x7e5fe493, 
+  0xeae4207a, 0x7e3f57ff, 
+  0xea1debbb, 0x7e1d93ea, 0xe957ecfb, 0x7dfa98a8, 0xe8922622, 0x7dd6668f, 
+  0xe7cc9917, 0x7db0fdf8, 
+  0xe70747c4, 0x7d8a5f40, 0xe642340d, 0x7d628ac6, 0xe57d5fda, 0x7d3980ec, 
+  0xe4b8cd11, 0x7d0f4218, 
+  0xe3f47d96, 0x7ce3ceb2, 0xe330734d, 0x7cb72724, 0xe26cb01b, 0x7c894bde, 
+  0xe1a935e2, 0x7c5a3d50, 
+  0xe0e60685, 0x7c29fbee, 0xe02323e5, 0x7bf88830, 0xdf608fe4, 0x7bc5e290, 
+  0xde9e4c60, 0x7b920b89, 
+  0xdddc5b3b, 0x7b5d039e, 0xdd1abe51, 0x7b26cb4f, 0xdc597781, 0x7aef6323, 
+  0xdb9888a8, 0x7ab6cba4, 
+  0xdad7f3a2, 0x7a7d055b, 0xda17ba4a, 0x7a4210d8, 0xd957de7a, 0x7a05eead, 
+  0xd898620c, 0x79c89f6e, 
+  0xd7d946d8, 0x798a23b1, 0xd71a8eb5, 0x794a7c12, 0xd65c3b7b, 0x7909a92d, 
+  0xd59e4eff, 0x78c7aba2, 
+  0xd4e0cb15, 0x78848414, 0xd423b191, 0x78403329, 0xd3670446, 0x77fab989, 
+  0xd2aac504, 0x77b417df, 
+  0xd1eef59e, 0x776c4edb, 0xd13397e2, 0x77235f2d, 0xd078ad9e, 0x76d94989, 
+  0xcfbe389f, 0x768e0ea6, 
+  0xcf043ab3, 0x7641af3d, 0xce4ab5a2, 0x75f42c0b, 0xcd91ab39, 0x75a585cf, 
+  0xccd91d3d, 0x7555bd4c, 
+  0xcc210d79, 0x7504d345, 0xcb697db0, 0x74b2c884, 0xcab26fa9, 0x745f9dd1, 
+  0xc9fbe527, 0x740b53fb, 
+  0xc945dfec, 0x73b5ebd1, 0xc89061ba, 0x735f6626, 0xc7db6c50, 0x7307c3d0, 
+  0xc727016d, 0x72af05a7, 
+  0xc67322ce, 0x72552c85, 0xc5bfd22e, 0x71fa3949, 0xc50d1149, 0x719e2cd2, 
+  0xc45ae1d7, 0x71410805, 
+  0xc3a94590, 0x70e2cbc6, 0xc2f83e2a, 0x708378ff, 0xc247cd5a, 0x7023109a, 
+  0xc197f4d4, 0x6fc19385, 
+  0xc0e8b648, 0x6f5f02b2, 0xc03a1368, 0x6efb5f12, 0xbf8c0de3, 0x6e96a99d, 
+  0xbedea765, 0x6e30e34a, 
+  0xbe31e19b, 0x6dca0d14, 0xbd85be30, 0x6d6227fa, 0xbcda3ecb, 0x6cf934fc, 
+  0xbc2f6513, 0x6c8f351c, 
+  0xbb8532b0, 0x6c242960, 0xbadba943, 0x6bb812d1, 0xba32ca71, 0x6b4af279, 
+  0xb98a97d8, 0x6adcc964, 
+  0xb8e31319, 0x6a6d98a4, 0xb83c3dd1, 0x69fd614a, 0xb796199b, 0x698c246c, 
+  0xb6f0a812, 0x6919e320, 
+  0xb64beacd, 0x68a69e81, 0xb5a7e362, 0x683257ab, 0xb5049368, 0x67bd0fbd, 
+  0xb461fc70, 0x6746c7d8, 
+  0xb3c0200c, 0x66cf8120, 0xb31effcc, 0x66573cbb, 0xb27e9d3c, 0x65ddfbd3, 
+  0xb1def9e9, 0x6563bf92, 
+  0xb140175b, 0x64e88926, 0xb0a1f71d, 0x646c59bf, 0xb0049ab3, 0x63ef3290, 
+  0xaf6803a2, 0x637114cc, 
+  0xaecc336c, 0x62f201ac, 0xae312b92, 0x6271fa69, 0xad96ed92, 0x61f1003f, 
+  0xacfd7ae8, 0x616f146c, 
+  0xac64d510, 0x60ec3830, 0xabccfd83, 0x60686ccf, 0xab35f5b5, 0x5fe3b38d, 
+  0xaa9fbf1e, 0x5f5e0db3, 
+  0xaa0a5b2e, 0x5ed77c8a, 0xa975cb57, 0x5e50015d, 0xa8e21106, 0x5dc79d7c, 
+  0xa84f2daa, 0x5d3e5237, 
+  0xa7bd22ac, 0x5cb420e0, 0xa72bf174, 0x5c290acc, 0xa69b9b68, 0x5b9d1154, 
+  0xa60c21ee, 0x5b1035cf, 
+  0xa57d8666, 0x5a82799a, 0xa4efca31, 0x59f3de12, 0xa462eeac, 0x59646498, 
+  0xa3d6f534, 0x58d40e8c, 
+  0xa34bdf20, 0x5842dd54, 0xa2c1adc9, 0x57b0d256, 0xa2386284, 0x571deefa, 
+  0xa1affea3, 0x568a34a9, 
+  0xa1288376, 0x55f5a4d2, 0xa0a1f24d, 0x556040e2, 0xa01c4c73, 0x54ca0a4b, 
+  0x9f979331, 0x5433027d, 
+  0x9f13c7d0, 0x539b2af0, 0x9e90eb94, 0x53028518, 0x9e0effc1, 0x5269126e, 
+  0x9d8e0597, 0x51ced46e, 
+  0x9d0dfe54, 0x5133cc94, 0x9c8eeb34, 0x5097fc5e, 0x9c10cd70, 0x4ffb654d, 
+  0x9b93a641, 0x4f5e08e3, 
+  0x9b1776da, 0x4ebfe8a5, 0x9a9c406e, 0x4e210617, 0x9a22042d, 0x4d8162c4, 
+  0x99a8c345, 0x4ce10034, 
+  0x99307ee0, 0x4c3fdff4, 0x98b93828, 0x4b9e0390, 0x9842f043, 0x4afb6c98, 
+  0x97cda855, 0x4a581c9e, 
+  0x9759617f, 0x49b41533, 0x96e61ce0, 0x490f57ee, 0x9673db94, 0x4869e665, 
+  0x96029eb6, 0x47c3c22f, 
+  0x9592675c, 0x471cece7, 0x9523369c, 0x46756828, 0x94b50d87, 0x45cd358f, 
+  0x9447ed2f, 0x452456bd, 
+  0x93dbd6a0, 0x447acd50, 0x9370cae4, 0x43d09aed, 0x9306cb04, 0x4325c135, 
+  0x929dd806, 0x427a41d0, 
+  0x9235f2ec, 0x41ce1e65, 0x91cf1cb6, 0x4121589b, 0x91695663, 0x4073f21d, 
+  0x9104a0ee, 0x3fc5ec98, 
+  0x90a0fd4e, 0x3f1749b8, 0x903e6c7b, 0x3e680b2c, 0x8fdcef66, 0x3db832a6, 
+  0x8f7c8701, 0x3d07c1d6, 
+  0x8f1d343a, 0x3c56ba70, 0x8ebef7fb, 0x3ba51e29, 0x8e61d32e, 0x3af2eeb7, 
+  0x8e05c6b7, 0x3a402dd2, 
+  0x8daad37b, 0x398cdd32, 0x8d50fa59, 0x38d8fe93, 0x8cf83c30, 0x382493b0, 
+  0x8ca099da, 0x376f9e46, 
+  0x8c4a142f, 0x36ba2014, 0x8bf4ac05, 0x36041ad9, 0x8ba0622f, 0x354d9057, 
+  0x8b4d377c, 0x34968250, 
+  0x8afb2cbb, 0x33def287, 0x8aaa42b4, 0x3326e2c3, 0x8a5a7a31, 0x326e54c7, 
+  0x8a0bd3f5, 0x31b54a5e, 
+  0x89be50c3, 0x30fbc54d, 0x8971f15a, 0x3041c761, 0x8926b677, 0x2f875262, 
+  0x88dca0d3, 0x2ecc681e, 
+  0x8893b125, 0x2e110a62, 0x884be821, 0x2d553afc, 0x88054677, 0x2c98fbba, 
+  0x87bfccd7, 0x2bdc4e6f, 
+  0x877b7bec, 0x2b1f34eb, 0x8738545e, 0x2a61b101, 0x86f656d3, 0x29a3c485, 
+  0x86b583ee, 0x28e5714b, 
+  0x8675dc4f, 0x2826b928, 0x86376092, 0x27679df4, 0x85fa1153, 0x26a82186, 
+  0x85bdef28, 0x25e845b6, 
+  0x8582faa5, 0x25280c5e, 0x8549345c, 0x24677758, 0x85109cdd, 0x23a6887f, 
+  0x84d934b1, 0x22e541af, 
+  0x84a2fc62, 0x2223a4c5, 0x846df477, 0x2161b3a0, 0x843a1d70, 0x209f701c, 
+  0x840777d0, 0x1fdcdc1b, 
+  0x83d60412, 0x1f19f97b, 0x83a5c2b0, 0x1e56ca1e, 0x8376b422, 0x1d934fe5, 
+  0x8348d8dc, 0x1ccf8cb3, 
+  0x831c314e, 0x1c0b826a, 0x82f0bde8, 0x1b4732ef, 0x82c67f14, 0x1a82a026, 
+  0x829d753a, 0x19bdcbf3, 
+  0x8275a0c0, 0x18f8b83c, 0x824f0208, 0x183366e9, 0x82299971, 0x176dd9de, 
+  0x82056758, 0x16a81305, 
+  0x81e26c16, 0x15e21445, 0x81c0a801, 0x151bdf86, 0x81a01b6d, 0x145576b1, 
+  0x8180c6a9, 0x138edbb1, 
+  0x8162aa04, 0x12c8106f, 0x8145c5c7, 0x120116d5, 0x812a1a3a, 0x1139f0cf, 
+  0x810fa7a0, 0x1072a048, 
+  0x80f66e3c, 0xfab272b, 0x80de6e4c, 0xee38766, 0x80c7a80a, 0xe1bc2e4, 
+  0x80b21baf, 0xd53db92, 
+  0x809dc971, 0xc8bd35e, 0x808ab180, 0xbc3ac35, 0x8078d40d, 0xafb6805, 
+  0x80683143, 0xa3308bd, 
+  0x8058c94c, 0x96a9049, 0x804a9c4d, 0x8a2009a, 0x803daa6a, 0x7d95b9e, 
+  0x8031f3c2, 0x710a345, 
+  0x80277872, 0x647d97c, 0x801e3895, 0x57f0035, 0x80163440, 0x4b6195d, 
+  0x800f6b88, 0x3ed26e6, 
+  0x8009de7e, 0x3242abf, 0x80058d2f, 0x25b26d7, 0x800277a6, 0x1921d20, 
+  0x80009dea, 0xc90f88, 
+  0x80000000, 0x0, 0x80009dea, 0xff36f078, 0x800277a6, 0xfe6de2e0, 0x80058d2f, 
+  0xfda4d929, 
+  0x8009de7e, 0xfcdbd541, 0x800f6b88, 0xfc12d91a, 0x80163440, 0xfb49e6a3, 
+  0x801e3895, 0xfa80ffcb, 
+  0x80277872, 0xf9b82684, 0x8031f3c2, 0xf8ef5cbb, 0x803daa6a, 0xf826a462, 
+  0x804a9c4d, 0xf75dff66, 
+  0x8058c94c, 0xf6956fb7, 0x80683143, 0xf5ccf743, 0x8078d40d, 0xf50497fb, 
+  0x808ab180, 0xf43c53cb, 
+  0x809dc971, 0xf3742ca2, 0x80b21baf, 0xf2ac246e, 0x80c7a80a, 0xf1e43d1c, 
+  0x80de6e4c, 0xf11c789a, 
+  0x80f66e3c, 0xf054d8d5, 0x810fa7a0, 0xef8d5fb8, 0x812a1a3a, 0xeec60f31, 
+  0x8145c5c7, 0xedfee92b, 
+  0x8162aa04, 0xed37ef91, 0x8180c6a9, 0xec71244f, 0x81a01b6d, 0xebaa894f, 
+  0x81c0a801, 0xeae4207a, 
+  0x81e26c16, 0xea1debbb, 0x82056758, 0xe957ecfb, 0x82299971, 0xe8922622, 
+  0x824f0208, 0xe7cc9917, 
+  0x8275a0c0, 0xe70747c4, 0x829d753a, 0xe642340d, 0x82c67f14, 0xe57d5fda, 
+  0x82f0bde8, 0xe4b8cd11, 
+  0x831c314e, 0xe3f47d96, 0x8348d8dc, 0xe330734d, 0x8376b422, 0xe26cb01b, 
+  0x83a5c2b0, 0xe1a935e2, 
+  0x83d60412, 0xe0e60685, 0x840777d0, 0xe02323e5, 0x843a1d70, 0xdf608fe4, 
+  0x846df477, 0xde9e4c60, 
+  0x84a2fc62, 0xdddc5b3b, 0x84d934b1, 0xdd1abe51, 0x85109cdd, 0xdc597781, 
+  0x8549345c, 0xdb9888a8, 
+  0x8582faa5, 0xdad7f3a2, 0x85bdef28, 0xda17ba4a, 0x85fa1153, 0xd957de7a, 
+  0x86376092, 0xd898620c, 
+  0x8675dc4f, 0xd7d946d8, 0x86b583ee, 0xd71a8eb5, 0x86f656d3, 0xd65c3b7b, 
+  0x8738545e, 0xd59e4eff, 
+  0x877b7bec, 0xd4e0cb15, 0x87bfccd7, 0xd423b191, 0x88054677, 0xd3670446, 
+  0x884be821, 0xd2aac504, 
+  0x8893b125, 0xd1eef59e, 0x88dca0d3, 0xd13397e2, 0x8926b677, 0xd078ad9e, 
+  0x8971f15a, 0xcfbe389f, 
+  0x89be50c3, 0xcf043ab3, 0x8a0bd3f5, 0xce4ab5a2, 0x8a5a7a31, 0xcd91ab39, 
+  0x8aaa42b4, 0xccd91d3d, 
+  0x8afb2cbb, 0xcc210d79, 0x8b4d377c, 0xcb697db0, 0x8ba0622f, 0xcab26fa9, 
+  0x8bf4ac05, 0xc9fbe527, 
+  0x8c4a142f, 0xc945dfec, 0x8ca099da, 0xc89061ba, 0x8cf83c30, 0xc7db6c50, 
+  0x8d50fa59, 0xc727016d, 
+  0x8daad37b, 0xc67322ce, 0x8e05c6b7, 0xc5bfd22e, 0x8e61d32e, 0xc50d1149, 
+  0x8ebef7fb, 0xc45ae1d7, 
+  0x8f1d343a, 0xc3a94590, 0x8f7c8701, 0xc2f83e2a, 0x8fdcef66, 0xc247cd5a, 
+  0x903e6c7b, 0xc197f4d4, 
+  0x90a0fd4e, 0xc0e8b648, 0x9104a0ee, 0xc03a1368, 0x91695663, 0xbf8c0de3, 
+  0x91cf1cb6, 0xbedea765, 
+  0x9235f2ec, 0xbe31e19b, 0x929dd806, 0xbd85be30, 0x9306cb04, 0xbcda3ecb, 
+  0x9370cae4, 0xbc2f6513, 
+  0x93dbd6a0, 0xbb8532b0, 0x9447ed2f, 0xbadba943, 0x94b50d87, 0xba32ca71, 
+  0x9523369c, 0xb98a97d8, 
+  0x9592675c, 0xb8e31319, 0x96029eb6, 0xb83c3dd1, 0x9673db94, 0xb796199b, 
+  0x96e61ce0, 0xb6f0a812, 
+  0x9759617f, 0xb64beacd, 0x97cda855, 0xb5a7e362, 0x9842f043, 0xb5049368, 
+  0x98b93828, 0xb461fc70, 
+  0x99307ee0, 0xb3c0200c, 0x99a8c345, 0xb31effcc, 0x9a22042d, 0xb27e9d3c, 
+  0x9a9c406e, 0xb1def9e9, 
+  0x9b1776da, 0xb140175b, 0x9b93a641, 0xb0a1f71d, 0x9c10cd70, 0xb0049ab3, 
+  0x9c8eeb34, 0xaf6803a2, 
+  0x9d0dfe54, 0xaecc336c, 0x9d8e0597, 0xae312b92, 0x9e0effc1, 0xad96ed92, 
+  0x9e90eb94, 0xacfd7ae8, 
+  0x9f13c7d0, 0xac64d510, 0x9f979331, 0xabccfd83, 0xa01c4c73, 0xab35f5b5, 
+  0xa0a1f24d, 0xaa9fbf1e, 
+  0xa1288376, 0xaa0a5b2e, 0xa1affea3, 0xa975cb57, 0xa2386284, 0xa8e21106, 
+  0xa2c1adc9, 0xa84f2daa, 
+  0xa34bdf20, 0xa7bd22ac, 0xa3d6f534, 0xa72bf174, 0xa462eeac, 0xa69b9b68, 
+  0xa4efca31, 0xa60c21ee, 
+  0xa57d8666, 0xa57d8666, 0xa60c21ee, 0xa4efca31, 0xa69b9b68, 0xa462eeac, 
+  0xa72bf174, 0xa3d6f534, 
+  0xa7bd22ac, 0xa34bdf20, 0xa84f2daa, 0xa2c1adc9, 0xa8e21106, 0xa2386284, 
+  0xa975cb57, 0xa1affea3, 
+  0xaa0a5b2e, 0xa1288376, 0xaa9fbf1e, 0xa0a1f24d, 0xab35f5b5, 0xa01c4c73, 
+  0xabccfd83, 0x9f979331, 
+  0xac64d510, 0x9f13c7d0, 0xacfd7ae8, 0x9e90eb94, 0xad96ed92, 0x9e0effc1, 
+  0xae312b92, 0x9d8e0597, 
+  0xaecc336c, 0x9d0dfe54, 0xaf6803a2, 0x9c8eeb34, 0xb0049ab3, 0x9c10cd70, 
+  0xb0a1f71d, 0x9b93a641, 
+  0xb140175b, 0x9b1776da, 0xb1def9e9, 0x9a9c406e, 0xb27e9d3c, 0x9a22042d, 
+  0xb31effcc, 0x99a8c345, 
+  0xb3c0200c, 0x99307ee0, 0xb461fc70, 0x98b93828, 0xb5049368, 0x9842f043, 
+  0xb5a7e362, 0x97cda855, 
+  0xb64beacd, 0x9759617f, 0xb6f0a812, 0x96e61ce0, 0xb796199b, 0x9673db94, 
+  0xb83c3dd1, 0x96029eb6, 
+  0xb8e31319, 0x9592675c, 0xb98a97d8, 0x9523369c, 0xba32ca71, 0x94b50d87, 
+  0xbadba943, 0x9447ed2f, 
+  0xbb8532b0, 0x93dbd6a0, 0xbc2f6513, 0x9370cae4, 0xbcda3ecb, 0x9306cb04, 
+  0xbd85be30, 0x929dd806, 
+  0xbe31e19b, 0x9235f2ec, 0xbedea765, 0x91cf1cb6, 0xbf8c0de3, 0x91695663, 
+  0xc03a1368, 0x9104a0ee, 
+  0xc0e8b648, 0x90a0fd4e, 0xc197f4d4, 0x903e6c7b, 0xc247cd5a, 0x8fdcef66, 
+  0xc2f83e2a, 0x8f7c8701, 
+  0xc3a94590, 0x8f1d343a, 0xc45ae1d7, 0x8ebef7fb, 0xc50d1149, 0x8e61d32e, 
+  0xc5bfd22e, 0x8e05c6b7, 
+  0xc67322ce, 0x8daad37b, 0xc727016d, 0x8d50fa59, 0xc7db6c50, 0x8cf83c30, 
+  0xc89061ba, 0x8ca099da, 
+  0xc945dfec, 0x8c4a142f, 0xc9fbe527, 0x8bf4ac05, 0xcab26fa9, 0x8ba0622f, 
+  0xcb697db0, 0x8b4d377c, 
+  0xcc210d79, 0x8afb2cbb, 0xccd91d3d, 0x8aaa42b4, 0xcd91ab39, 0x8a5a7a31, 
+  0xce4ab5a2, 0x8a0bd3f5, 
+  0xcf043ab3, 0x89be50c3, 0xcfbe389f, 0x8971f15a, 0xd078ad9e, 0x8926b677, 
+  0xd13397e2, 0x88dca0d3, 
+  0xd1eef59e, 0x8893b125, 0xd2aac504, 0x884be821, 0xd3670446, 0x88054677, 
+  0xd423b191, 0x87bfccd7, 
+  0xd4e0cb15, 0x877b7bec, 0xd59e4eff, 0x8738545e, 0xd65c3b7b, 0x86f656d3, 
+  0xd71a8eb5, 0x86b583ee, 
+  0xd7d946d8, 0x8675dc4f, 0xd898620c, 0x86376092, 0xd957de7a, 0x85fa1153, 
+  0xda17ba4a, 0x85bdef28, 
+  0xdad7f3a2, 0x8582faa5, 0xdb9888a8, 0x8549345c, 0xdc597781, 0x85109cdd, 
+  0xdd1abe51, 0x84d934b1, 
+  0xdddc5b3b, 0x84a2fc62, 0xde9e4c60, 0x846df477, 0xdf608fe4, 0x843a1d70, 
+  0xe02323e5, 0x840777d0, 
+  0xe0e60685, 0x83d60412, 0xe1a935e2, 0x83a5c2b0, 0xe26cb01b, 0x8376b422, 
+  0xe330734d, 0x8348d8dc, 
+  0xe3f47d96, 0x831c314e, 0xe4b8cd11, 0x82f0bde8, 0xe57d5fda, 0x82c67f14, 
+  0xe642340d, 0x829d753a, 
+  0xe70747c4, 0x8275a0c0, 0xe7cc9917, 0x824f0208, 0xe8922622, 0x82299971, 
+  0xe957ecfb, 0x82056758, 
+  0xea1debbb, 0x81e26c16, 0xeae4207a, 0x81c0a801, 0xebaa894f, 0x81a01b6d, 
+  0xec71244f, 0x8180c6a9, 
+  0xed37ef91, 0x8162aa04, 0xedfee92b, 0x8145c5c7, 0xeec60f31, 0x812a1a3a, 
+  0xef8d5fb8, 0x810fa7a0, 
+  0xf054d8d5, 0x80f66e3c, 0xf11c789a, 0x80de6e4c, 0xf1e43d1c, 0x80c7a80a, 
+  0xf2ac246e, 0x80b21baf, 
+  0xf3742ca2, 0x809dc971, 0xf43c53cb, 0x808ab180, 0xf50497fb, 0x8078d40d, 
+  0xf5ccf743, 0x80683143, 
+  0xf6956fb7, 0x8058c94c, 0xf75dff66, 0x804a9c4d, 0xf826a462, 0x803daa6a, 
+  0xf8ef5cbb, 0x8031f3c2, 
+  0xf9b82684, 0x80277872, 0xfa80ffcb, 0x801e3895, 0xfb49e6a3, 0x80163440, 
+  0xfc12d91a, 0x800f6b88, 
+  0xfcdbd541, 0x8009de7e, 0xfda4d929, 0x80058d2f, 0xfe6de2e0, 0x800277a6, 
+  0xff36f078, 0x80009dea, 
+  0x0, 0x80000000, 0xc90f88, 0x80009dea, 0x1921d20, 0x800277a6, 0x25b26d7, 
+  0x80058d2f, 
+  0x3242abf, 0x8009de7e, 0x3ed26e6, 0x800f6b88, 0x4b6195d, 0x80163440, 
+  0x57f0035, 0x801e3895, 
+  0x647d97c, 0x80277872, 0x710a345, 0x8031f3c2, 0x7d95b9e, 0x803daa6a, 
+  0x8a2009a, 0x804a9c4d, 
+  0x96a9049, 0x8058c94c, 0xa3308bd, 0x80683143, 0xafb6805, 0x8078d40d, 
+  0xbc3ac35, 0x808ab180, 
+  0xc8bd35e, 0x809dc971, 0xd53db92, 0x80b21baf, 0xe1bc2e4, 0x80c7a80a, 
+  0xee38766, 0x80de6e4c, 
+  0xfab272b, 0x80f66e3c, 0x1072a048, 0x810fa7a0, 0x1139f0cf, 0x812a1a3a, 
+  0x120116d5, 0x8145c5c7, 
+  0x12c8106f, 0x8162aa04, 0x138edbb1, 0x8180c6a9, 0x145576b1, 0x81a01b6d, 
+  0x151bdf86, 0x81c0a801, 
+  0x15e21445, 0x81e26c16, 0x16a81305, 0x82056758, 0x176dd9de, 0x82299971, 
+  0x183366e9, 0x824f0208, 
+  0x18f8b83c, 0x8275a0c0, 0x19bdcbf3, 0x829d753a, 0x1a82a026, 0x82c67f14, 
+  0x1b4732ef, 0x82f0bde8, 
+  0x1c0b826a, 0x831c314e, 0x1ccf8cb3, 0x8348d8dc, 0x1d934fe5, 0x8376b422, 
+  0x1e56ca1e, 0x83a5c2b0, 
+  0x1f19f97b, 0x83d60412, 0x1fdcdc1b, 0x840777d0, 0x209f701c, 0x843a1d70, 
+  0x2161b3a0, 0x846df477, 
+  0x2223a4c5, 0x84a2fc62, 0x22e541af, 0x84d934b1, 0x23a6887f, 0x85109cdd, 
+  0x24677758, 0x8549345c, 
+  0x25280c5e, 0x8582faa5, 0x25e845b6, 0x85bdef28, 0x26a82186, 0x85fa1153, 
+  0x27679df4, 0x86376092, 
+  0x2826b928, 0x8675dc4f, 0x28e5714b, 0x86b583ee, 0x29a3c485, 0x86f656d3, 
+  0x2a61b101, 0x8738545e, 
+  0x2b1f34eb, 0x877b7bec, 0x2bdc4e6f, 0x87bfccd7, 0x2c98fbba, 0x88054677, 
+  0x2d553afc, 0x884be821, 
+  0x2e110a62, 0x8893b125, 0x2ecc681e, 0x88dca0d3, 0x2f875262, 0x8926b677, 
+  0x3041c761, 0x8971f15a, 
+  0x30fbc54d, 0x89be50c3, 0x31b54a5e, 0x8a0bd3f5, 0x326e54c7, 0x8a5a7a31, 
+  0x3326e2c3, 0x8aaa42b4, 
+  0x33def287, 0x8afb2cbb, 0x34968250, 0x8b4d377c, 0x354d9057, 0x8ba0622f, 
+  0x36041ad9, 0x8bf4ac05, 
+  0x36ba2014, 0x8c4a142f, 0x376f9e46, 0x8ca099da, 0x382493b0, 0x8cf83c30, 
+  0x38d8fe93, 0x8d50fa59, 
+  0x398cdd32, 0x8daad37b, 0x3a402dd2, 0x8e05c6b7, 0x3af2eeb7, 0x8e61d32e, 
+  0x3ba51e29, 0x8ebef7fb, 
+  0x3c56ba70, 0x8f1d343a, 0x3d07c1d6, 0x8f7c8701, 0x3db832a6, 0x8fdcef66, 
+  0x3e680b2c, 0x903e6c7b, 
+  0x3f1749b8, 0x90a0fd4e, 0x3fc5ec98, 0x9104a0ee, 0x4073f21d, 0x91695663, 
+  0x4121589b, 0x91cf1cb6, 
+  0x41ce1e65, 0x9235f2ec, 0x427a41d0, 0x929dd806, 0x4325c135, 0x9306cb04, 
+  0x43d09aed, 0x9370cae4, 
+  0x447acd50, 0x93dbd6a0, 0x452456bd, 0x9447ed2f, 0x45cd358f, 0x94b50d87, 
+  0x46756828, 0x9523369c, 
+  0x471cece7, 0x9592675c, 0x47c3c22f, 0x96029eb6, 0x4869e665, 0x9673db94, 
+  0x490f57ee, 0x96e61ce0, 
+  0x49b41533, 0x9759617f, 0x4a581c9e, 0x97cda855, 0x4afb6c98, 0x9842f043, 
+  0x4b9e0390, 0x98b93828, 
+  0x4c3fdff4, 0x99307ee0, 0x4ce10034, 0x99a8c345, 0x4d8162c4, 0x9a22042d, 
+  0x4e210617, 0x9a9c406e, 
+  0x4ebfe8a5, 0x9b1776da, 0x4f5e08e3, 0x9b93a641, 0x4ffb654d, 0x9c10cd70, 
+  0x5097fc5e, 0x9c8eeb34, 
+  0x5133cc94, 0x9d0dfe54, 0x51ced46e, 0x9d8e0597, 0x5269126e, 0x9e0effc1, 
+  0x53028518, 0x9e90eb94, 
+  0x539b2af0, 0x9f13c7d0, 0x5433027d, 0x9f979331, 0x54ca0a4b, 0xa01c4c73, 
+  0x556040e2, 0xa0a1f24d, 
+  0x55f5a4d2, 0xa1288376, 0x568a34a9, 0xa1affea3, 0x571deefa, 0xa2386284, 
+  0x57b0d256, 0xa2c1adc9, 
+  0x5842dd54, 0xa34bdf20, 0x58d40e8c, 0xa3d6f534, 0x59646498, 0xa462eeac, 
+  0x59f3de12, 0xa4efca31, 
+  0x5a82799a, 0xa57d8666, 0x5b1035cf, 0xa60c21ee, 0x5b9d1154, 0xa69b9b68, 
+  0x5c290acc, 0xa72bf174, 
+  0x5cb420e0, 0xa7bd22ac, 0x5d3e5237, 0xa84f2daa, 0x5dc79d7c, 0xa8e21106, 
+  0x5e50015d, 0xa975cb57, 
+  0x5ed77c8a, 0xaa0a5b2e, 0x5f5e0db3, 0xaa9fbf1e, 0x5fe3b38d, 0xab35f5b5, 
+  0x60686ccf, 0xabccfd83, 
+  0x60ec3830, 0xac64d510, 0x616f146c, 0xacfd7ae8, 0x61f1003f, 0xad96ed92, 
+  0x6271fa69, 0xae312b92, 
+  0x62f201ac, 0xaecc336c, 0x637114cc, 0xaf6803a2, 0x63ef3290, 0xb0049ab3, 
+  0x646c59bf, 0xb0a1f71d, 
+  0x64e88926, 0xb140175b, 0x6563bf92, 0xb1def9e9, 0x65ddfbd3, 0xb27e9d3c, 
+  0x66573cbb, 0xb31effcc, 
+  0x66cf8120, 0xb3c0200c, 0x6746c7d8, 0xb461fc70, 0x67bd0fbd, 0xb5049368, 
+  0x683257ab, 0xb5a7e362, 
+  0x68a69e81, 0xb64beacd, 0x6919e320, 0xb6f0a812, 0x698c246c, 0xb796199b, 
+  0x69fd614a, 0xb83c3dd1, 
+  0x6a6d98a4, 0xb8e31319, 0x6adcc964, 0xb98a97d8, 0x6b4af279, 0xba32ca71, 
+  0x6bb812d1, 0xbadba943, 
+  0x6c242960, 0xbb8532b0, 0x6c8f351c, 0xbc2f6513, 0x6cf934fc, 0xbcda3ecb, 
+  0x6d6227fa, 0xbd85be30, 
+  0x6dca0d14, 0xbe31e19b, 0x6e30e34a, 0xbedea765, 0x6e96a99d, 0xbf8c0de3, 
+  0x6efb5f12, 0xc03a1368, 
+  0x6f5f02b2, 0xc0e8b648, 0x6fc19385, 0xc197f4d4, 0x7023109a, 0xc247cd5a, 
+  0x708378ff, 0xc2f83e2a, 
+  0x70e2cbc6, 0xc3a94590, 0x71410805, 0xc45ae1d7, 0x719e2cd2, 0xc50d1149, 
+  0x71fa3949, 0xc5bfd22e, 
+  0x72552c85, 0xc67322ce, 0x72af05a7, 0xc727016d, 0x7307c3d0, 0xc7db6c50, 
+  0x735f6626, 0xc89061ba, 
+  0x73b5ebd1, 0xc945dfec, 0x740b53fb, 0xc9fbe527, 0x745f9dd1, 0xcab26fa9, 
+  0x74b2c884, 0xcb697db0, 
+  0x7504d345, 0xcc210d79, 0x7555bd4c, 0xccd91d3d, 0x75a585cf, 0xcd91ab39, 
+  0x75f42c0b, 0xce4ab5a2, 
+  0x7641af3d, 0xcf043ab3, 0x768e0ea6, 0xcfbe389f, 0x76d94989, 0xd078ad9e, 
+  0x77235f2d, 0xd13397e2, 
+  0x776c4edb, 0xd1eef59e, 0x77b417df, 0xd2aac504, 0x77fab989, 0xd3670446, 
+  0x78403329, 0xd423b191, 
+  0x78848414, 0xd4e0cb15, 0x78c7aba2, 0xd59e4eff, 0x7909a92d, 0xd65c3b7b, 
+  0x794a7c12, 0xd71a8eb5, 
+  0x798a23b1, 0xd7d946d8, 0x79c89f6e, 0xd898620c, 0x7a05eead, 0xd957de7a, 
+  0x7a4210d8, 0xda17ba4a, 
+  0x7a7d055b, 0xdad7f3a2, 0x7ab6cba4, 0xdb9888a8, 0x7aef6323, 0xdc597781, 
+  0x7b26cb4f, 0xdd1abe51, 
+  0x7b5d039e, 0xdddc5b3b, 0x7b920b89, 0xde9e4c60, 0x7bc5e290, 0xdf608fe4, 
+  0x7bf88830, 0xe02323e5, 
+  0x7c29fbee, 0xe0e60685, 0x7c5a3d50, 0xe1a935e2, 0x7c894bde, 0xe26cb01b, 
+  0x7cb72724, 0xe330734d, 
+  0x7ce3ceb2, 0xe3f47d96, 0x7d0f4218, 0xe4b8cd11, 0x7d3980ec, 0xe57d5fda, 
+  0x7d628ac6, 0xe642340d, 
+  0x7d8a5f40, 0xe70747c4, 0x7db0fdf8, 0xe7cc9917, 0x7dd6668f, 0xe8922622, 
+  0x7dfa98a8, 0xe957ecfb, 
+  0x7e1d93ea, 0xea1debbb, 0x7e3f57ff, 0xeae4207a, 0x7e5fe493, 0xebaa894f, 
+  0x7e7f3957, 0xec71244f, 
+  0x7e9d55fc, 0xed37ef91, 0x7eba3a39, 0xedfee92b, 0x7ed5e5c6, 0xeec60f31, 
+  0x7ef05860, 0xef8d5fb8, 
+  0x7f0991c4, 0xf054d8d5, 0x7f2191b4, 0xf11c789a, 0x7f3857f6, 0xf1e43d1c, 
+  0x7f4de451, 0xf2ac246e, 
+  0x7f62368f, 0xf3742ca2, 0x7f754e80, 0xf43c53cb, 0x7f872bf3, 0xf50497fb, 
+  0x7f97cebd, 0xf5ccf743, 
+  0x7fa736b4, 0xf6956fb7, 0x7fb563b3, 0xf75dff66, 0x7fc25596, 0xf826a462, 
+  0x7fce0c3e, 0xf8ef5cbb, 
+  0x7fd8878e, 0xf9b82684, 0x7fe1c76b, 0xfa80ffcb, 0x7fe9cbc0, 0xfb49e6a3, 
+  0x7ff09478, 0xfc12d91a, 
+  0x7ff62182, 0xfcdbd541, 0x7ffa72d1, 0xfda4d929, 0x7ffd885a, 0xfe6de2e0, 
+  0x7fff6216, 0xff36f078 
+}; 
+ 
+/**  
+*  
+* @brief  Initialization function for the Q31 CFFT/CIFFT. 
+* @param[in,out] *S             points to an instance of the Q31 CFFT/CIFFT structure. 
+* @param[in]     fftLen         length of the FFT. 
+* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. 
+* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. 
+* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value. 
+*  
+* \par Description: 
+* \par  
+* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.  
+* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated 
+* \par  
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.  
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.  
+* \par  
+* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.  
+* \par  
+* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.  
+*/ 
+ 
+arm_status arm_cfft_radix4_init_q31( 
+  arm_cfft_radix4_instance_q31 * S, 
+  uint16_t fftLen, 
+  uint8_t ifftFlag, 
+  uint8_t bitReverseFlag) 
+{ 
+  /*  Initialise the default arm status */ 
+  arm_status status = ARM_MATH_SUCCESS; 
+  /*  Initialise the FFT length */ 
+  S->fftLen = fftLen; 
+  /*  Initialise the Twiddle coefficient pointer */ 
+  S->pTwiddle = (q31_t *) twiddleCoefQ31; 
+  /*  Initialise the Flag for selection of CFFT or CIFFT */ 
+  S->ifftFlag = ifftFlag; 
+  /*  Initialise the Flag for calculation Bit reversal or not */ 
+  S->bitReverseFlag = bitReverseFlag; 
+ 
+  /*  Initializations of Instance structure depending on the FFT length */ 
+  switch (S->fftLen) 
+  { 
+    /*  Initializations of structure parameters for 1024 point FFT */ 
+  case 1024u: 
+    /*  Initialise the twiddle coef modifier value */ 
+    S->twidCoefModifier = 1u; 
+    /*  Initialise the bit reversal table modifier */ 
+    S->bitRevFactor = 1u; 
+    /*  Initialise the bit reversal table pointer */ 
+    S->pBitRevTable = (uint16_t*)armBitRevTable; 
+    break; 
+ 
+  case 256u: 
+    /*  Initializations of structure parameters for 256 point FFT */ 
+    S->twidCoefModifier = 4u; 
+    S->bitRevFactor = 4u; 
+    S->pBitRevTable = (uint16_t*)&armBitRevTable[3]; 
+    break; 
+ 
+  case 64u: 
+    /*  Initializations of structure parameters for 64 point FFT */ 
+    S->twidCoefModifier = 16u; 
+    S->bitRevFactor = 16u; 
+    S->pBitRevTable = (uint16_t*)&armBitRevTable[15]; 
+    break; 
+ 
+  case 16u: 
+    /*  Initializations of structure parameters for 16 point FFT */ 
+    S->twidCoefModifier = 64u; 
+    S->bitRevFactor = 64u; 
+    S->pBitRevTable = (uint16_t*)&armBitRevTable[63]; 
+    break; 
+ 
+  default: 
+    /*  Reporting argument error if fftSize is not valid value */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+    break; 
+  } 
+ 
+  return (status); 
+} 
+ 
+/**  
+ * @} end of CFFT_CIFFT group  
+ */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,934 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cfft_radix4_q15.c  
+*  
+* Description:	This file has function definition of Radix-4 FFT & IFFT function and  
+*				In-place bit reversal using bit reversal table  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @addtogroup CFFT_CIFFT  
+ * @{  
+ */ 
+ 
+ 
+/**  
+ * @details  
+ * @brief Processing function for the Q15 CFFT/CIFFT. 
+ * @param[in]      *S    points to an instance of the Q15 CFFT/CIFFT structure. 
+ * @param[in, out] *pSrc points to the complex data buffer. Processing occurs in-place. 
+ * @return none. 
+ *   
+ * \par Input and output formats:  
+ * \par  
+ * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 
+ * Hence the output format is different for different FFT sizes.  
+ * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 
+ * \par 
+ * \image html CFFTQ15.gif "Input and Output Formats for Q15 CFFT"  
+ * \image html CIFFTQ15.gif "Input and Output Formats for Q15 CIFFT"  
+ */ 
+ 
+void arm_cfft_radix4_q15( 
+  const arm_cfft_radix4_instance_q15 * S, 
+  q15_t * pSrc) 
+{ 
+  if(S->ifftFlag == 1u) 
+  { 
+    /*  Complex IFFT radix-4  */ 
+    arm_radix4_butterfly_inverse_q15(pSrc, S->fftLen, S->pTwiddle, 
+                                     S->twidCoefModifier); 
+  } 
+  else 
+  { 
+    /*  Complex FFT radix-4  */ 
+    arm_radix4_butterfly_q15(pSrc, S->fftLen, S->pTwiddle, 
+                             S->twidCoefModifier); 
+  } 
+ 
+  if(S->bitReverseFlag == 1u) 
+  { 
+    /*  Bit Reversal */ 
+    arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of CFFT_CIFFT group  
+ */ 
+ 
+/*  
+* Radix-4 FFT algorithm used is :  
+*  
+* Input real and imaginary data:  
+* x(n) = xa + j * ya  
+* x(n+N/4 ) = xb + j * yb  
+* x(n+N/2 ) = xc + j * yc  
+* x(n+3N 4) = xd + j * yd  
+*  
+*  
+* Output real and imaginary data:  
+* x(4r) = xa'+ j * ya'  
+* x(4r+1) = xb'+ j * yb'  
+* x(4r+2) = xc'+ j * yc'  
+* x(4r+3) = xd'+ j * yd'  
+*  
+*  
+* Twiddle factors for radix-4 FFT:  
+* Wn = co1 + j * (- si1)  
+* W2n = co2 + j * (- si2)  
+* W3n = co3 + j * (- si3)  
+  
+* The real and imaginary output values for the radix-4 butterfly are  
+* xa' = xa + xb + xc + xd  
+* ya' = ya + yb + yc + yd  
+* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)  
+* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)  
+* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)  
+* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)  
+* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)  
+* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)  
+*  
+*/ 
+ 
+/**  
+ * @brief  Core function for the Q15 CFFT butterfly process. 
+ * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type. 
+ * @param[in]      fftLen           length of the FFT. 
+ * @param[in]      *pCoef16         points to twiddle coefficient buffer. 
+ * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
+ * @return none. 
+ */ 
+ 
+void arm_radix4_butterfly_q15( 
+  q15_t * pSrc16, 
+  uint32_t fftLen, 
+  q15_t * pCoef16, 
+  uint32_t twidCoefModifier) 
+{ 
+  q31_t R, S, T, U; 
+  q31_t C1, C2, C3, out1, out2; 
+  q31_t *pSrc, *pCoeff; 
+  uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 
+  q15_t in; 
+ 
+  /* Total process is divided into three stages */ 
+ 
+  /* process first stage, middle stages, & last stage */ 
+ 
+  /*  pointer initializations for SIMD calculations */ 
+  pSrc = (q31_t *) pSrc16; 
+  pCoeff = (q31_t *) pCoef16; 
+ 
+  /*  Initializations for the first stage */ 
+  n2 = fftLen; 
+  n1 = n2; 
+ 
+  /* n2 = fftLen/4 */ 
+  n2 >>= 2u; 
+ 
+  /* Index for twiddle coefficient */ 
+  ic = 0u; 
+ 
+  /* Index for input read and output write */ 
+  i0 = 0u; 
+  j = n2; 
+ 
+  /* Input is in 1.15(q15) format */ 
+ 
+  /*  start of first stage process */ 
+  do 
+  { 
+    /*  Butterfly implementation */ 
+ 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /*  Reading i0, i0+fftLen/2 inputs */ 
+    /* Read ya (real), xa(imag) input */ 
+    T = pSrc[i0]; 
+    in = ((int16_t) (T & 0xFFFF)) >> 2; 
+    T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+    /* Read yc (real), xc(imag) input */ 
+    S = pSrc[i2]; 
+    in = ((int16_t) (S & 0xFFFF)) >> 2; 
+    S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+    /* R = packed((ya + yc), (xa + xc) ) */ 
+    R = __QADD16(T, S); 
+    /* S = packed((ya - yc), (xa - xc) ) */ 
+    S = __QSUB16(T, S); 
+ 
+    /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 
+    /* Read yb (real), xb(imag) input */ 
+    T = pSrc[i1]; 
+    in = ((int16_t) (T & 0xFFFF)) >> 2; 
+    T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+    /* Read yd (real), xd(imag) input */ 
+    U = pSrc[i3]; 
+    in = ((int16_t) (U & 0xFFFF)) >> 2; 
+    U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+    /* T = packed((yb + yd), (xb + xd) ) */ 
+    T = __QADD16(T, U); 
+ 
+    /*  writing the butterfly processed i0 sample */ 
+    /* xa' = xa + xb + xc + xd */ 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[i0] = __SHADD16(R, T); 
+ 
+    /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 
+    R = __QSUB16(R, T); 
+ 
+    /* co2 & si2 are read from SIMD Coefficient pointer */ 
+    C2 = pCoeff[2u * ic]; 
+ 
+    /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 
+    out1 = __SMUAD(C2, R) >> 16u; 
+    /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 
+    out2 = __SMUSDX(C2, R); 
+ 
+    /*  Reading i0+fftLen/4 */ 
+    /* T = packed(yb, xb) */ 
+    T = pSrc[i1]; 
+    in = ((int16_t) (T & 0xFFFF)) >> 2; 
+    T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+ 
+    /* writing the butterfly processed i0 + fftLen/4 sample */ 
+    /* writing output(xc', yc') in little endian format */ 
+    pSrc[i1] = (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 
+ 
+    /*  Butterfly calculations */ 
+    /* U = packed(yd, xd) */ 
+    U = pSrc[i3]; 
+    in = ((int16_t) (U & 0xFFFF)) >> 2; 
+    U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+    /* T = packed(yb-yd, xb-xd) */ 
+    T = __QSUB16(T, U); 
+ 
+    /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 
+    R = __QASX(S, T); 
+    /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */ 
+    S = __QSAX(S, T); 
+ 
+    /* co1 & si1 are read from SIMD Coefficient pointer */ 
+    C1 = pCoeff[ic]; 
+    /*  Butterfly process for the i0+fftLen/2 sample */ 
+    /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 
+    out1 = __SMUAD(C1, S) >> 16u; 
+    /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 
+    out2 = __SMUSDX(C1, S); 
+    /* writing output(xb', yb') in little endian format */ 
+    pSrc[i2] = ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 
+ 
+ 
+    /* co3 & si3 are read from SIMD Coefficient pointer */ 
+    C3 = pCoeff[3u * ic]; 
+    /*  Butterfly process for the i0+3fftLen/4 sample */ 
+    /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 
+    out1 = __SMUAD(C3, R) >> 16u; 
+    /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 
+    out2 = __SMUSDX(C3, R); 
+    /* writing output(xd', yd') in little endian format */ 
+    pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 
+ 
+    /*  Twiddle coefficients index modifier */ 
+    ic = ic + twidCoefModifier; 
+ 
+    /*  Updating input index */ 
+    i0 = i0 + 1u; 
+ 
+  } while(--j); 
+  /* data is in 4.11(q11) format */ 
+ 
+  /* end of first stage process */ 
+ 
+ 
+  /* start of middle stage process */ 
+ 
+  /*  Twiddle coefficients index modifier */ 
+  twidCoefModifier <<= 2u; 
+ 
+  /*  Calculation of Middle stage */ 
+  for (k = fftLen / 4u; k > 4u; k >>= 2u) 
+  { 
+    /*  Initializations for the middle stage */ 
+    n1 = n2; 
+    n2 >>= 2u; 
+    ic = 0u; 
+ 
+    for (j = 0u; j <= (n2 - 1u); j++) 
+    { 
+      /*  index calculation for the coefficients */ 
+      C1 = pCoeff[ic]; 
+      C2 = pCoeff[2u * ic]; 
+      C3 = pCoeff[3u * ic]; 
+ 
+      /*  Twiddle coefficients index modifier */ 
+      ic = ic + twidCoefModifier; 
+ 
+      /*  Butterfly implementation */ 
+      for (i0 = j; i0 < fftLen; i0 += n1) 
+      { 
+        /*  index calculation for the input as, */ 
+        /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+        i1 = i0 + n2; 
+        i2 = i1 + n2; 
+        i3 = i2 + n2; 
+ 
+        /*  Reading i0, i0+fftLen/2 inputs */ 
+        /* Read ya (real), xa(imag) input */ 
+        T = pSrc[i0]; 
+ 
+        /* Read yc (real), xc(imag) input */ 
+        S = pSrc[i2]; 
+ 
+        /* R = packed( (ya + yc), (xa + xc)) */ 
+        R = __QADD16(T, S); 
+ 
+        /* S = packed((ya - yc), (xa - xc)) */ 
+        S = __QSUB16(T, S); 
+ 
+        /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 
+        /* Read yb (real), xb(imag) input */ 
+        T = pSrc[i1]; 
+ 
+        /* Read yd (real), xd(imag) input */ 
+        U = pSrc[i3]; 
+ 
+ 
+        /* T = packed( (yb + yd), (xb + xd)) */ 
+        T = __QADD16(T, U); 
+ 
+ 
+        /*  writing the butterfly processed i0 sample */ 
+ 
+        /* xa' = xa + xb + xc + xd */ 
+        /* ya' = ya + yb + yc + yd */ 
+        out1 = __SHADD16(R, T); 
+        in = ((int16_t) (out1 & 0xFFFF)) >> 1; 
+        out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF); 
+        pSrc[i0] = out1; 
+ 
+        /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 
+        R = __SHSUB16(R, T); 
+ 
+        /* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */ 
+        out1 = __SMUAD(C2, R) >> 16u; 
+ 
+        /* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 
+        out2 = __SMUSDX(C2, R); 
+ 
+        /*  Reading i0+3fftLen/4 */ 
+        /* Read yb (real), xb(imag) input */ 
+        T = pSrc[i1]; 
+ 
+        /*  writing the butterfly processed i0 + fftLen/4 sample */ 
+        /* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */ 
+        /* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */ 
+        pSrc[i1] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 
+ 
+        /*  Butterfly calculations */ 
+ 
+        /* Read yd (real), xd(imag) input */ 
+        U = pSrc[i3]; 
+ 
+        /* T = packed(yb-yd, xb-xd) */ 
+        T = __QSUB16(T, U); 
+ 
+        /* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */ 
+        R = __SHASX(S, T); 
+ 
+        /* S = packed((ya-yc) - (xb- xd),  (xa-xc) + (yb-yd)) */ 
+        S = __SHSAX(S, T); 
+ 
+ 
+        /*  Butterfly process for the i0+fftLen/2 sample */ 
+        out1 = __SMUAD(C1, S) >> 16u; 
+        out2 = __SMUSDX(C1, S); 
+ 
+        /* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */ 
+        /* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */ 
+        pSrc[i2] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 
+ 
+        /*  Butterfly process for the i0+3fftLen/4 sample */ 
+        out1 = __SMUAD(C3, R) >> 16u; 
+        out2 = __SMUSDX(C3, R); 
+        /* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */ 
+        /* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */ 
+        pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 
+      } 
+    } 
+    /*  Twiddle coefficients index modifier */ 
+    twidCoefModifier <<= 2u; 
+  } 
+  /* end of middle stage process */ 
+ 
+ 
+  /* data is in 10.6(q6) format for the 1024 point */ 
+  /* data is in 8.8(q8) format for the 256 point */ 
+  /* data is in 6.10(q10) format for the 64 point */ 
+  /* data is in 4.12(q12) format for the 16 point */ 
+ 
+  /*  Initializations for the last stage */ 
+  n1 = n2; 
+  n2 >>= 2u; 
+ 
+  /* start of last stage process */ 
+ 
+  /*  Butterfly implementation */ 
+  for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 
+  { 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /*  Reading i0, i0+fftLen/2 inputs */ 
+    /* Read ya (real), xa(imag) input */ 
+    T = pSrc[i0]; 
+    /* Read yc (real), xc(imag) input */ 
+    S = pSrc[i2]; 
+ 
+    /* R = packed((ya + yc), (xa + xc)) */ 
+    R = __QADD16(T, S); 
+    /* S = packed((ya - yc), (xa - xc)) */ 
+    S = __QSUB16(T, S); 
+ 
+    /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 
+    /* Read yb (real), xb(imag) input */ 
+    T = pSrc[i1]; 
+    /* Read yd (real), xd(imag) input */ 
+    U = pSrc[i3]; 
+ 
+    /* T = packed((yb + yd), (xb + xd)) */ 
+    T = __QADD16(T, U); 
+ 
+    /*  writing the butterfly processed i0 sample */ 
+    /* xa' = xa + xb + xc + xd */ 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[i0] = __SHADD16(R, T); 
+ 
+    /* R = packed((ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 
+    R = __SHSUB16(R, T); 
+ 
+    /* Read yb (real), xb(imag) input */ 
+    T = pSrc[i1]; 
+ 
+    /*  writing the butterfly processed i0 + fftLen/4 sample */ 
+    /* xc' = (xa-xb+xc-xd) */ 
+    /* yc' = (ya-yb+yc-yd) */ 
+    pSrc[i1] = R; 
+ 
+    /* Read yd (real), xd(imag) input */ 
+    U = pSrc[i3]; 
+    /* T = packed( (yb - yd), (xb - xd))  */ 
+    T = __QSUB16(T, U); 
+ 
+    /*  writing the butterfly processed i0 + fftLen/2 sample */ 
+    /* xb' = (xa+yb-xc-yd) */ 
+    /* yb' = (ya-xb-yc+xd) */ 
+    pSrc[i2] = __SHSAX(S, T); 
+ 
+    /*  writing the butterfly processed i0 + 3fftLen/4 sample */ 
+    /* xd' = (xa-yb-xc+yd) */ 
+    /* yd' = (ya+xb-yc-xd) */ 
+    pSrc[i3] = __SHASX(S, T); 
+ 
+  } 
+ 
+  /* end of last stage process */ 
+ 
+  /* output is in 11.5(q5) format for the 1024 point */ 
+  /* output is in 9.7(q7) format for the 256 point   */ 
+  /* output is in 7.9(q9) format for the 64 point  */ 
+  /* output is in 5.11(q11) format for the 16 point  */ 
+ 
+} 
+ 
+ 
+/**  
+ * @brief  Core function for the Q15 CIFFT butterfly process. 
+ * @param[in, out] *pSrc16          points to the in-place buffer of Q15 data type. 
+ * @param[in]      fftLen           length of the FFT. 
+ * @param[in]      *pCoef16         points to twiddle coefficient buffer. 
+ * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
+ * @return none. 
+ */ 
+ 
+/*  
+* Radix-4 IFFT algorithm used is :  
+*  
+* CIFFT uses same twiddle coefficients as CFFT function  
+*  x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]  
+*  
+*  
+* IFFT is implemented with following changes in equations from FFT  
+*  
+* Input real and imaginary data:  
+* x(n) = xa + j * ya  
+* x(n+N/4 ) = xb + j * yb  
+* x(n+N/2 ) = xc + j * yc  
+* x(n+3N 4) = xd + j * yd  
+*  
+*  
+* Output real and imaginary data:  
+* x(4r) = xa'+ j * ya'  
+* x(4r+1) = xb'+ j * yb'  
+* x(4r+2) = xc'+ j * yc'  
+* x(4r+3) = xd'+ j * yd'  
+*  
+*  
+* Twiddle factors for radix-4 IFFT:  
+* Wn = co1 + j * (si1)  
+* W2n = co2 + j * (si2)  
+* W3n = co3 + j * (si3)  
+  
+* The real and imaginary output values for the radix-4 butterfly are  
+* xa' = xa + xb + xc + xd  
+* ya' = ya + yb + yc + yd  
+* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)  
+* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)  
+* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)  
+* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)  
+* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)  
+* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)  
+*  
+*/ 
+ 
+void arm_radix4_butterfly_inverse_q15( 
+  q15_t * pSrc16, 
+  uint32_t fftLen, 
+  q15_t * pCoef16, 
+  uint32_t twidCoefModifier) 
+{ 
+  q31_t R, S, T, U; 
+  q31_t C1, C2, C3, out1, out2; 
+  q31_t *pSrc, *pCoeff; 
+  uint32_t n1, n2, ic, i0, i1, i2, i3, j, k; 
+  q15_t in; 
+ 
+  /* Total process is divided into three stages */ 
+ 
+  /* process first stage, middle stages, & last stage */ 
+ 
+  /*  pointer initializations for SIMD calculations */ 
+  pSrc = (q31_t *) pSrc16; 
+  pCoeff = (q31_t *) pCoef16; 
+ 
+  /*  Initializations for the first stage */ 
+  n2 = fftLen; 
+  n1 = n2; 
+ 
+  /* n2 = fftLen/4 */ 
+  n2 >>= 2u; 
+ 
+  /* Index for twiddle coefficient */ 
+  ic = 0u; 
+ 
+  /* Index for input read and output write */ 
+  i0 = 0u; 
+ 
+  j = n2; 
+ 
+  /* Input is in 1.15(q15) format */ 
+ 
+  /*  Start of first stage process */ 
+  do 
+  { 
+    /*  Butterfly implementation */ 
+ 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /*  Reading i0, i0+fftLen/2 inputs */ 
+    /* Read ya (real), xa(imag) input */ 
+    T = pSrc[i0]; 
+    in = ((int16_t) (T & 0xFFFF)) >> 2; 
+    T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+    /* Read yc (real), xc(imag) input */ 
+    S = pSrc[i2]; 
+    in = ((int16_t) (S & 0xFFFF)) >> 2; 
+    S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+ 
+    /* R = packed((ya + yc), (xa + xc) ) */ 
+    R = __QADD16(T, S); 
+    /* S = packed((ya - yc), (xa - xc) ) */ 
+    S = __QSUB16(T, S); 
+ 
+    /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 
+    /* Read yb (real), xb(imag) input */ 
+    T = pSrc[i1]; 
+    in = ((int16_t) (T & 0xFFFF)) >> 2; 
+    T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+    /* Read yd (real), xd(imag) input */ 
+    U = pSrc[i3]; 
+    in = ((int16_t) (U & 0xFFFF)) >> 2; 
+    U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+ 
+    /* T = packed((yb + yd), (xb + xd) ) */ 
+    T = __QADD16(T, U); 
+ 
+    /*  writing the butterfly processed i0 sample */ 
+    /* xa' = xa + xb + xc + xd */ 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[i0] = __SHADD16(R, T); 
+ 
+    /* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */ 
+    R = __QSUB16(R, T); 
+    /* co2 & si2 are read from SIMD Coefficient pointer */ 
+    C2 = pCoeff[2u * ic]; 
+    /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 
+    out1 = __SMUSD(C2, R) >> 16u; 
+    /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 
+    out2 = __SMUADX(C2, R); 
+ 
+    /*  Reading i0+fftLen/4 */ 
+    /* T = packed(yb, xb) */ 
+    T = pSrc[i1]; 
+    in = ((int16_t) (T & 0xFFFF)) >> 2; 
+    T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+ 
+    /* writing the butterfly processed i0 + fftLen/4 sample */ 
+    /* writing output(xc', yc') in little endian format */ 
+    pSrc[i1] = (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 
+ 
+    /*  Butterfly calculations */ 
+    /* U = packed(yd, xd) */ 
+    U = pSrc[i3]; 
+    in = ((int16_t) (U & 0xFFFF)) >> 2; 
+    U = ((U >> 2) & 0xFFFF0000) | (in & 0xFFFF); 
+ 
+    /* T = packed(yb-yd, xb-xd) */ 
+    T = __QSUB16(T, U); 
+    /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */ 
+    R = __QSAX(S, T); 
+    /* S = packed((ya-yc) + (xb- xd),  (xa-xc) - (yb-yd)) */ 
+    S = __QASX(S, T); 
+ 
+    /* co1 & si1 are read from SIMD Coefficient pointer */ 
+    C1 = pCoeff[ic]; 
+    /*  Butterfly process for the i0+fftLen/2 sample */ 
+    /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 
+    out1 = __SMUSD(C1, S) >> 16u; 
+    /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 
+    out2 = __SMUADX(C1, S); 
+    /* writing output(xb', yb') in little endian format */ 
+    pSrc[i2] = ((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF); 
+ 
+    /* co3 & si3 are read from SIMD Coefficient pointer */ 
+    C3 = pCoeff[3u * ic]; 
+    /*  Butterfly process for the i0+3fftLen/4 sample */ 
+    /* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */ 
+    out1 = __SMUSD(C3, R) >> 16u; 
+    /* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */ 
+    out2 = __SMUADX(C3, R); 
+    /* writing output(xd', yd') in little endian format */ 
+    pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 
+ 
+    /*  Twiddle coefficients index modifier */ 
+    ic = ic + twidCoefModifier; 
+ 
+    /*  Updating input index */ 
+    i0 = i0 + 1u; 
+ 
+  } while(--j); 
+ 
+  /*  End of first stage process */ 
+ 
+  /* data is in 4.11(q11) format */ 
+ 
+ 
+  /*  Start of Middle stage process */ 
+ 
+  /*  Twiddle coefficients index modifier */ 
+  twidCoefModifier <<= 2u; 
+ 
+  /*  Calculation of Middle stage */ 
+  for (k = fftLen / 4u; k > 4u; k >>= 2u) 
+  { 
+    /*  Initializations for the middle stage */ 
+    n1 = n2; 
+    n2 >>= 2u; 
+    ic = 0u; 
+ 
+    for (j = 0u; j <= (n2 - 1u); j++) 
+    { 
+      /*  index calculation for the coefficients */ 
+      C1 = pCoeff[ic]; 
+      C2 = pCoeff[2u * ic]; 
+      C3 = pCoeff[3u * ic]; 
+ 
+      /*  Twiddle coefficients index modifier */ 
+      ic = ic + twidCoefModifier; 
+ 
+      /*  Butterfly implementation */ 
+      for (i0 = j; i0 < fftLen; i0 += n1) 
+      { 
+        /*  index calculation for the input as, */ 
+        /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+        i1 = i0 + n2; 
+        i2 = i1 + n2; 
+        i3 = i2 + n2; 
+ 
+        /*  Reading i0, i0+fftLen/2 inputs */ 
+        /* Read ya (real), xa(imag) input */ 
+        T = pSrc[i0]; 
+ 
+        /* Read yc (real), xc(imag) input */ 
+        S = pSrc[i2]; 
+ 
+ 
+        /* R = packed( (ya + yc), (xa + xc)) */ 
+        R = __QADD16(T, S); 
+        /* S = packed((ya - yc), (xa - xc)) */ 
+        S = __QSUB16(T, S); 
+ 
+        /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 
+        /* Read yb (real), xb(imag) input */ 
+        T = pSrc[i1]; 
+ 
+        /* Read yd (real), xd(imag) input */ 
+        U = pSrc[i3]; 
+ 
+ 
+        /* T = packed( (yb + yd), (xb + xd)) */ 
+        T = __QADD16(T, U); 
+ 
+        /*  writing the butterfly processed i0 sample */ 
+        /* xa' = xa + xb + xc + xd */ 
+        /* ya' = ya + yb + yc + yd */ 
+        out1 = __SHADD16(R, T); 
+        in = ((int16_t) (out1 & 0xFFFF)) >> 1; 
+        out1 = ((out1 >> 1) & 0xFFFF0000) | (in & 0xFFFF); 
+        pSrc[i0] = out1; 
+ 
+ 
+ 
+        /* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 
+        R = __SHSUB16(R, T); 
+ 
+        /* (ya-yb+yc-yd)* (si2) - (xa-xb+xc-xd)* co2 */ 
+        out1 = __SMUSD(C2, R) >> 16u; 
+        /* (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 
+        out2 = __SMUADX(C2, R); 
+ 
+        /*  Reading i0+3fftLen/4 */ 
+        /* Read yb (real), xb(imag) input */ 
+        T = pSrc[i1]; 
+ 
+        /*  writing the butterfly processed i0 + fftLen/4 sample */ 
+        /* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2) */ 
+        /* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2) */ 
+        pSrc[i1] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 
+ 
+        /*  Butterfly calculations */ 
+        /* Read yd (real), xd(imag) input */ 
+        U = pSrc[i3]; 
+ 
+        /* T = packed(yb-yd, xb-xd) */ 
+        T = __QSUB16(T, U); 
+ 
+        /* R = packed((ya-yc) - (xb- xd) , (xa-xc) + (yb-yd)) */ 
+        R = __SHSAX(S, T); 
+ 
+        /* S = packed((ya-yc) + (xb- xd),  (xa-xc) - (yb-yd)) */ 
+        S = __SHASX(S, T); 
+ 
+        /*  Butterfly process for the i0+fftLen/2 sample */ 
+        out1 = __SMUSD(C1, S) >> 16u; 
+        out2 = __SMUADX(C1, S); 
+        /* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1) */ 
+        /* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1) */ 
+        pSrc[i2] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 
+ 
+        /*  Butterfly process for the i0+3fftLen/4 sample */ 
+        out1 = __SMUSD(C3, R) >> 16u; 
+        out2 = __SMUADX(C3, R); 
+        /* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3) */ 
+        /* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3) */ 
+        pSrc[i3] = ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF); 
+ 
+ 
+      } 
+    } 
+    /*  Twiddle coefficients index modifier */ 
+    twidCoefModifier <<= 2u; 
+  } 
+  /*  End of Middle stages process */ 
+ 
+ 
+  /* data is in 10.6(q6) format for the 1024 point */ 
+  /* data is in 8.8(q8) format for the 256 point   */ 
+  /* data is in 6.10(q10) format for the 64 point  */ 
+  /* data is in 4.12(q12) format for the 16 point  */ 
+ 
+  /* start of last stage process */ 
+ 
+ 
+  /*  Initializations for the last stage */ 
+  n1 = n2; 
+  n2 >>= 2u; 
+ 
+  /*  Butterfly implementation */ 
+  for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 
+  { 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /*  Reading i0, i0+fftLen/2 inputs */ 
+    /* Read ya (real), xa(imag) input */ 
+    T = pSrc[i0]; 
+    /* Read yc (real), xc(imag) input */ 
+    S = pSrc[i2]; 
+ 
+    /* R = packed((ya + yc), (xa + xc)) */ 
+    R = __QADD16(T, S); 
+    /* S = packed((ya - yc), (xa - xc)) */ 
+    S = __QSUB16(T, S); 
+ 
+    /*  Reading i0+fftLen/4 , i0+3fftLen/4 inputs */ 
+    /* Read yb (real), xb(imag) input */ 
+    T = pSrc[i1]; 
+    /* Read yd (real), xd(imag) input */ 
+    U = pSrc[i3]; 
+ 
+    /* T = packed((yb + yd), (xb + xd)) */ 
+    T = __QADD16(T, U); 
+ 
+    /*  writing the butterfly processed i0 sample */ 
+    /* xa' = xa + xb + xc + xd */ 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[i0] = __SHADD16(R, T); 
+ 
+    /* R = packed((ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */ 
+    R = __SHSUB16(R, T); 
+ 
+    /* Read yb (real), xb(imag) input */ 
+    T = pSrc[i1]; 
+ 
+    /*  writing the butterfly processed i0 + fftLen/4 sample */ 
+    /* xc' = (xa-xb+xc-xd) */ 
+    /* yc' = (ya-yb+yc-yd) */ 
+    pSrc[i1] = R; 
+ 
+    /* Read yd (real), xd(imag) input */ 
+    U = pSrc[i3]; 
+    /* T = packed( (yb - yd), (xb - xd))  */ 
+    T = __QSUB16(T, U); 
+ 
+    /*  writing the butterfly processed i0 + fftLen/2 sample */ 
+    /* xb' = (xa-yb-xc+yd) */ 
+    /* yb' = (ya+xb-yc-xd) */ 
+    pSrc[i2] = __SHASX(S, T); 
+ 
+    /*  writing the butterfly processed i0 + 3fftLen/4 sample */ 
+    /* xd' = (xa+yb-xc-yd) */ 
+    /* yd' = (ya-xb-yc+xd) */ 
+    pSrc[i3] = __SHSAX(S, T); 
+  } 
+  /* end of last stage  process */ 
+ 
+  /* output is in 11.5(q5) format for the 1024 point */ 
+  /* output is in 9.7(q7) format for the 256 point   */ 
+  /* output is in 7.9(q9) format for the 64 point  */ 
+  /* output is in 5.11(q11) format for the 16 point  */ 
+} 
+ 
+ 
+/*  
+   * @brief  In-place bit reversal function. 
+   * @param[in, out] *pSrc        points to the in-place buffer of Q15 data type. 
+   * @param[in]      fftLen       length of the FFT. 
+   * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table 
+   * @param[in]      *pBitRevTab  points to bit reversal table. 
+   * @return none. 
+ */ 
+ 
+void arm_bitreversal_q15( 
+  q15_t * pSrc16, 
+  uint32_t fftLen, 
+  uint16_t bitRevFactor, 
+  uint16_t * pBitRevTab) 
+{ 
+  q31_t *pSrc = (q31_t *) pSrc16; 
+  q31_t in; 
+  uint32_t fftLenBy2, fftLenBy2p1; 
+  uint32_t i, j; 
+ 
+  /*  Initializations */ 
+  j = 0u; 
+  fftLenBy2 = fftLen / 2u; 
+  fftLenBy2p1 = (fftLen / 2u) + 1u; 
+ 
+  /* Bit Reversal Implementation */ 
+  for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u) 
+  { 
+    if(i < j) 
+    { 
+      /*  pSrc[i] <-> pSrc[j]; */ 
+      /*  pSrc[i+1u] <-> pSrc[j+1u] */ 
+      in = pSrc[i]; 
+      pSrc[i] = pSrc[j]; 
+      pSrc[j] = in; 
+ 
+      /*  pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1];  */ 
+      /*  pSrc[i + fftLenBy2p1+1u] <-> pSrc[j + fftLenBy2p1+1u] */ 
+      in = pSrc[i + fftLenBy2p1]; 
+      pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1]; 
+      pSrc[j + fftLenBy2p1] = in; 
+    } 
+ 
+    /*  pSrc[i+1u] <-> pSrc[j+fftLenBy2];         */ 
+    /*  pSrc[i+2] <-> pSrc[j+fftLenBy2+1u]  */ 
+    in = pSrc[i + 1u]; 
+    pSrc[i + 1u] = pSrc[j + fftLenBy2]; 
+    pSrc[j + fftLenBy2] = in; 
+ 
+    /*  Reading the index for the bit reversal */ 
+    j = *pBitRevTab; 
+ 
+    /*  Updating the bit reversal index depending on the fft length  */ 
+    pBitRevTab += bitRevFactor; 
+  } 
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_cfft_radix4_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,903 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_cfft_radix4_q31.c  
+*  
+* Description:	This file has function definition of Radix-4 FFT & IFFT function and  
+*				In-place bit reversal using bit reversal table  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.5  2010/04/26   
+* 	 incorporated review comments and updated with latest CMSIS layer  
+*  
+* Version 0.0.3  2010/03/10   
+*    Initial version  
+* -------------------------------------------------------------------- */ 
+#include "arm_math.h" 
+ 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @addtogroup CFFT_CIFFT  
+ * @{  
+ */ 
+ 
+/**  
+ * @details  
+ * @brief Processing function for the Q31 CFFT/CIFFT.  
+ * @param[in]      *S    points to an instance of the Q31 CFFT/CIFFT structure. 
+ * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place. 
+ * @return none.  
+ *   
+ * \par Input and output formats:  
+ * \par  
+ * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 
+ * Hence the output format is different for different FFT sizes.  
+ * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT: 
+ * \par 
+ * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"  
+ * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"  
+ *  
+ */ 
+ 
+void arm_cfft_radix4_q31( 
+  const arm_cfft_radix4_instance_q31 * S, 
+  q31_t * pSrc) 
+{ 
+  if(S->ifftFlag == 1u) 
+  { 
+    /* Complex IFFT radix-4 */ 
+    arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle, 
+                                     S->twidCoefModifier); 
+  } 
+  else 
+  { 
+    /* Complex FFT radix-4 */ 
+    arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle, 
+                             S->twidCoefModifier); 
+  } 
+ 
+ 
+  if(S->bitReverseFlag == 1u) 
+  { 
+    /*  Bit Reversal */ 
+    arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable); 
+  } 
+ 
+} 
+ 
+/**  
+ * @} end of CFFT_CIFFT group  
+ */ 
+ 
+/*  
+* Radix-4 FFT algorithm used is :  
+*  
+* Input real and imaginary data:  
+* x(n) = xa + j * ya  
+* x(n+N/4 ) = xb + j * yb  
+* x(n+N/2 ) = xc + j * yc  
+* x(n+3N 4) = xd + j * yd  
+*  
+*  
+* Output real and imaginary data:  
+* x(4r) = xa'+ j * ya'  
+* x(4r+1) = xb'+ j * yb'  
+* x(4r+2) = xc'+ j * yc'  
+* x(4r+3) = xd'+ j * yd'  
+*  
+*  
+* Twiddle factors for radix-4 FFT:  
+* Wn = co1 + j * (- si1)  
+* W2n = co2 + j * (- si2)  
+* W3n = co3 + j * (- si3)  
+*  
+*  Butterfly implementation:  
+* xa' = xa + xb + xc + xd  
+* ya' = ya + yb + yc + yd  
+* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)  
+* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)  
+* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)  
+* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)  
+* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)  
+* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)  
+*  
+*/ 
+ 
+/**  
+ * @brief  Core function for the Q31 CFFT butterfly process. 
+ * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type. 
+ * @param[in]      fftLen           length of the FFT. 
+ * @param[in]      *pCoef           points to twiddle coefficient buffer. 
+ * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
+ * @return none. 
+ */ 
+ 
+void arm_radix4_butterfly_q31( 
+  q31_t * pSrc, 
+  uint32_t fftLen, 
+  q31_t * pCoef, 
+  uint32_t twidCoefModifier) 
+{ 
+  uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 
+  q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 
+ 
+ 
+  /* Total process is divided into three stages */ 
+ 
+  /* process first stage, middle stages, & last stage */ 
+ 
+ 
+  /* start of first stage process */ 
+ 
+  /*  Initializations for the first stage */ 
+  n2 = fftLen; 
+  n1 = n2; 
+  /* n2 = fftLen/4 */ 
+  n2 >>= 2u; 
+  i0 = 0u; 
+  ia1 = 0u; 
+ 
+  j = n2; 
+ 
+  /*  Calculation of first stage */ 
+  do 
+  { 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 
+ 
+    /*  Butterfly implementation */ 
+    /* xa + xc */ 
+    r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u); 
+    /* xa - xc */ 
+    r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 
+ 
+    /* ya + yc */ 
+    s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 
+    /* ya - yc */ 
+    s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 
+ 
+    /* xb + xd */ 
+    t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 
+ 
+    /* xa' = xa + xb + xc + xd */ 
+    pSrc[2u * i0] = (r1 + t1); 
+    /* (xa + xc) - (xb + xd) */ 
+    r1 = r1 - t1; 
+    /* yb + yd */ 
+    t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[(2u * i0) + 1u] = (s1 + t2); 
+ 
+    /* (ya + yc) - (yb + yd) */ 
+    s1 = s1 - t2; 
+ 
+    /* yb - yd */ 
+    t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 
+    /* xb - xd */ 
+    t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 
+ 
+    /*  index calculation for the coefficients */ 
+    ia2 = 2u * ia1; 
+    co2 = pCoef[ia2 * 2u]; 
+    si2 = pCoef[(ia2 * 2u) + 1u]; 
+ 
+    /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 
+    pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 
+                     ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 
+ 
+    /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 
+    pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 
+                            ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 
+ 
+    /* (xa - xc) + (yb - yd) */ 
+    r1 = r2 + t1; 
+    /* (xa - xc) - (yb - yd) */ 
+    r2 = r2 - t1; 
+ 
+    /* (ya - yc) - (xb - xd) */ 
+    s1 = s2 - t2; 
+    /* (ya - yc) + (xb - xd) */ 
+    s2 = s2 + t2; 
+ 
+    co1 = pCoef[ia1 * 2u]; 
+    si1 = pCoef[(ia1 * 2u) + 1u]; 
+ 
+    /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 
+    pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 
+                     ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 
+ 
+    /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 
+    pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 
+                            ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 
+ 
+    /*  index calculation for the coefficients */ 
+    ia3 = 3u * ia1; 
+    co3 = pCoef[ia3 * 2u]; 
+    si3 = pCoef[(ia3 * 2u) + 1u]; 
+ 
+    /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 
+    pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 
+                     ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 
+ 
+    /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 
+    pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 
+                            ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 
+ 
+    /*  Twiddle coefficients index modifier */ 
+    ia1 = ia1 + twidCoefModifier; 
+ 
+    /*  Updating input index */ 
+    i0 = i0 + 1u; 
+ 
+  } while(--j); 
+ 
+  /* end of first stage process */ 
+ 
+  /* data is in 5.27(q27) format */ 
+ 
+ 
+  /* start of Middle stages process */ 
+ 
+ 
+  /* each stage in middle stages provides two down scaling of the input */ 
+ 
+  twidCoefModifier <<= 2u; 
+ 
+ 
+  for (k = fftLen / 4u; k > 4u; k >>= 2u) 
+  { 
+    /*  Initializations for the first stage */ 
+    n1 = n2; 
+    n2 >>= 2u; 
+    ia1 = 0u; 
+ 
+    /*  Calculation of first stage */ 
+    for (j = 0u; j <= (n2 - 1u); j++) 
+    { 
+      /*  index calculation for the coefficients */ 
+      ia2 = ia1 + ia1; 
+      ia3 = ia2 + ia1; 
+      co1 = pCoef[ia1 * 2u]; 
+      si1 = pCoef[(ia1 * 2u) + 1u]; 
+      co2 = pCoef[ia2 * 2u]; 
+      si2 = pCoef[(ia2 * 2u) + 1u]; 
+      co3 = pCoef[ia3 * 2u]; 
+      si3 = pCoef[(ia3 * 2u) + 1u]; 
+      /*  Twiddle coefficients index modifier */ 
+      ia1 = ia1 + twidCoefModifier; 
+ 
+      for (i0 = j; i0 < fftLen; i0 += n1) 
+      { 
+        /*  index calculation for the input as, */ 
+        /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
+        i1 = i0 + n2; 
+        i2 = i1 + n2; 
+        i3 = i2 + n2; 
+ 
+        /*  Butterfly implementation */ 
+        /* xa + xc */ 
+        r1 = pSrc[2u * i0] + pSrc[2u * i2]; 
+        /* xa - xc */ 
+        r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
+ 
+        /* ya + yc */ 
+        s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
+        /* ya - yc */ 
+        s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
+ 
+        /* xb + xd */ 
+        t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
+ 
+        /* xa' = xa + xb + xc + xd */ 
+        pSrc[2u * i0] = (r1 + t1) >> 2u; 
+        /* xa + xc -(xb + xd) */ 
+        r1 = r1 - t1; 
+ 
+        /* yb + yd */ 
+        t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
+        /* ya' = ya + yb + yc + yd */ 
+        pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 
+ 
+        /* (ya + yc) - (yb + yd) */ 
+        s1 = s1 - t2; 
+ 
+        /* (yb - yd) */ 
+        t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
+        /* (xb - xd) */ 
+        t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
+ 
+        /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 
+        pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) + 
+                         ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u; 
+ 
+        /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 
+        pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) - 
+                                ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u; 
+ 
+        /* (xa - xc) + (yb - yd) */ 
+        r1 = r2 + t1; 
+        /* (xa - xc) - (yb - yd) */ 
+        r2 = r2 - t1; 
+ 
+        /* (ya - yc) -  (xb - xd) */ 
+        s1 = s2 - t2; 
+        /* (ya - yc) +  (xb - xd) */ 
+        s2 = s2 + t2; 
+ 
+        /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 
+        pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) + 
+                         ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 
+ 
+        /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 
+        pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) - 
+                                ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 
+ 
+        /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 
+        pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) + 
+                         ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 
+ 
+        /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 
+        pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) - 
+                                ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 
+      } 
+    } 
+    twidCoefModifier <<= 2u; 
+  } 
+ 
+  /* End of Middle stages process */ 
+ 
+  /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 
+  /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 
+  /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 
+  /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 
+ 
+ 
+  /* start of Last stage process */ 
+ 
+  /*  Initializations of last stage */ 
+  n1 = n2; 
+  n2 >>= 2u; 
+ 
+  /*  Calculations of last stage */ 
+  for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 
+  { 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /*  Butterfly implementation */ 
+    /* xa + xb */ 
+    r1 = pSrc[2u * i0] + pSrc[2u * i2]; 
+    /* xa - xb */ 
+    r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
+ 
+    /* ya + yc */ 
+    s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
+    /* ya - yc */ 
+    s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
+ 
+    /* xc + xd */ 
+    t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
+    /* xa' = xa + xb + xc + xd */ 
+    pSrc[2u * i0] = (r1 + t1); 
+    /* (xa + xb) - (xc + xd) */ 
+    r1 = r1 - t1; 
+ 
+    /* yb + yd */ 
+    t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[(2u * i0) + 1u] = (s1 + t2); 
+    /* (ya + yc) - (yb + yd) */ 
+    s1 = s1 - t2; 
+ 
+    /* (yb-yd) */ 
+    t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
+    /* (xb-xd) */ 
+    t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
+ 
+    /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */ 
+    pSrc[2u * i1] = r1; 
+    /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */ 
+    pSrc[(2u * i1) + 1u] = s1; 
+ 
+    /* (xa+yb-xc-yd) */ 
+    r1 = r2 + t1; 
+    /* (xa-yb-xc+yd) */ 
+    r2 = r2 - t1; 
+ 
+    /* (ya-xb-yc+xd) */ 
+    s1 = s2 - t2; 
+    /* (ya+xb-yc-xd) */ 
+    s2 = s2 + t2; 
+ 
+    /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */ 
+    pSrc[2u * i2] = r1; 
+    /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */ 
+    pSrc[(2u * i2) + 1u] = s1; 
+ 
+    /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */ 
+    pSrc[2u * i3] = r2; 
+    /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */ 
+    pSrc[(2u * i3) + 1u] = s2; 
+ 
+ 
+  } 
+ 
+  /* output is in 11.21(q21) format for the 1024 point */ 
+  /* output is in 9.23(q23) format for the 256 point */ 
+  /* output is in 7.25(q25) format for the 64 point */ 
+  /* output is in 5.27(q27) format for the 16 point */ 
+ 
+  /* End of last stage process */ 
+ 
+} 
+ 
+ 
+/**  
+ * @brief  Core function for the Q31 CIFFT butterfly process. 
+ * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type. 
+ * @param[in]      fftLen           length of the FFT. 
+ * @param[in]      *pCoef           points to twiddle coefficient buffer. 
+ * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
+ * @return none. 
+ */ 
+ 
+ 
+/*  
+* Radix-4 IFFT algorithm used is :  
+*  
+* CIFFT uses same twiddle coefficients as CFFT Function  
+*  x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]  
+*  
+*  
+* IFFT is implemented with following changes in equations from FFT  
+*  
+* Input real and imaginary data:  
+* x(n) = xa + j * ya  
+* x(n+N/4 ) = xb + j * yb  
+* x(n+N/2 ) = xc + j * yc  
+* x(n+3N 4) = xd + j * yd  
+*  
+*  
+* Output real and imaginary data:  
+* x(4r) = xa'+ j * ya'  
+* x(4r+1) = xb'+ j * yb'  
+* x(4r+2) = xc'+ j * yc'  
+* x(4r+3) = xd'+ j * yd'  
+*  
+*  
+* Twiddle factors for radix-4 IFFT:  
+* Wn = co1 + j * (si1)  
+* W2n = co2 + j * (si2)  
+* W3n = co3 + j * (si3)  
+  
+* The real and imaginary output values for the radix-4 butterfly are  
+* xa' = xa + xb + xc + xd  
+* ya' = ya + yb + yc + yd  
+* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)  
+* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)  
+* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)  
+* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)  
+* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)  
+* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)  
+*  
+*/ 
+ 
+void arm_radix4_butterfly_inverse_q31( 
+  q31_t * pSrc, 
+  uint32_t fftLen, 
+  q31_t * pCoef, 
+  uint32_t twidCoefModifier) 
+{ 
+  uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k; 
+  q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; 
+ 
+  /* input is be 1.31(q31) format for all FFT sizes */ 
+  /* Total process is divided into three stages */ 
+  /* process first stage, middle stages, & last stage */ 
+ 
+  /* Start of first stage process */ 
+ 
+  /* Initializations for the first stage */ 
+  n2 = fftLen; 
+  n1 = n2; 
+  /* n2 = fftLen/4 */ 
+  n2 >>= 2u; 
+  i0 = 0u; 
+  ia1 = 0u; 
+ 
+  j = n2; 
+ 
+  do 
+  { 
+ 
+    /* input is in 1.31(q31) format and provide 4 guard bits for the input */ 
+ 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /*  Butterfly implementation */ 
+    /* xa + xc */ 
+    r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u); 
+    /* xa - xc */ 
+    r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u); 
+ 
+    /* ya + yc */ 
+    s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u); 
+    /* ya - yc */ 
+    s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u); 
+ 
+    /* xb + xd */ 
+    t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u); 
+ 
+    /* xa' = xa + xb + xc + xd */ 
+    pSrc[2u * i0] = (r1 + t1); 
+    /* (xa + xc) - (xb + xd) */ 
+    r1 = r1 - t1; 
+    /* yb + yd */ 
+    t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u); 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[(2u * i0) + 1u] = (s1 + t2); 
+ 
+    /* (ya + yc) - (yb + yd) */ 
+    s1 = s1 - t2; 
+ 
+    /* yb - yd */ 
+    t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u); 
+    /* xb - xd */ 
+    t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u); 
+ 
+    /*  index calculation for the coefficients */ 
+    ia2 = 2u * ia1; 
+    co2 = pCoef[ia2 * 2u]; 
+    si2 = pCoef[(ia2 * 2u) + 1u]; 
+ 
+    /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 
+    pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) - 
+                     ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u; 
+ 
+    /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 
+    pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) + 
+                          ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u; 
+ 
+    /* (xa - xc) - (yb - yd) */ 
+    r1 = r2 - t1; 
+    /* (xa - xc) + (yb - yd) */ 
+    r2 = r2 + t1; 
+ 
+    /* (ya - yc) + (xb - xd) */ 
+    s1 = s2 + t2; 
+    /* (ya - yc) - (xb - xd) */ 
+    s2 = s2 - t2; 
+ 
+    co1 = pCoef[ia1 * 2u]; 
+    si1 = pCoef[(ia1 * 2u) + 1u]; 
+ 
+    /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 
+    pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 
+                     ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u; 
+ 
+    /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 
+    pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 
+                            ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u; 
+ 
+    /*  index calculation for the coefficients */ 
+    ia3 = 3u * ia1; 
+    co3 = pCoef[ia3 * 2u]; 
+    si3 = pCoef[(ia3 * 2u) + 1u]; 
+ 
+    /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 
+    pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 
+                     ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u; 
+ 
+    /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 
+    pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 
+                            ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u; 
+ 
+    /*  Twiddle coefficients index modifier */ 
+    ia1 = ia1 + twidCoefModifier; 
+ 
+    /*  Updating input index */ 
+    i0 = i0 + 1u; 
+ 
+  } while(--j); 
+ 
+  /* data is in 5.27(q27) format */ 
+  /* each stage provides two down scaling of the input */ 
+ 
+ 
+  /* Start of Middle stages process */ 
+ 
+  twidCoefModifier <<= 2u; 
+ 
+  /*  Calculation of second stage to excluding last stage */ 
+  for (k = fftLen / 4u; k > 4u; k >>= 2u) 
+  { 
+    /*  Initializations for the first stage */ 
+    n1 = n2; 
+    n2 >>= 2u; 
+    ia1 = 0u; 
+ 
+    for (j = 0; j <= (n2 - 1u); j++) 
+    { 
+      /*  index calculation for the coefficients */ 
+      ia2 = ia1 + ia1; 
+      ia3 = ia2 + ia1; 
+      co1 = pCoef[ia1 * 2u]; 
+      si1 = pCoef[(ia1 * 2u) + 1u]; 
+      co2 = pCoef[ia2 * 2u]; 
+      si2 = pCoef[(ia2 * 2u) + 1u]; 
+      co3 = pCoef[ia3 * 2u]; 
+      si3 = pCoef[(ia3 * 2u) + 1u]; 
+      /*  Twiddle coefficients index modifier */ 
+      ia1 = ia1 + twidCoefModifier; 
+ 
+      for (i0 = j; i0 < fftLen; i0 += n1) 
+      { 
+        /*  index calculation for the input as, */ 
+        /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
+        i1 = i0 + n2; 
+        i2 = i1 + n2; 
+        i3 = i2 + n2; 
+ 
+        /*  Butterfly implementation */ 
+        /* xa + xc */ 
+        r1 = pSrc[2u * i0] + pSrc[2u * i2]; 
+        /* xa - xc */ 
+        r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
+ 
+        /* ya + yc */ 
+        s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
+        /* ya - yc */ 
+        s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
+ 
+        /* xb + xd */ 
+        t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
+ 
+        /* xa' = xa + xb + xc + xd */ 
+        pSrc[2u * i0] = (r1 + t1) >> 2u; 
+        /* xa + xc -(xb + xd) */ 
+        r1 = r1 - t1; 
+        /* yb + yd */ 
+        t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
+        /* ya' = ya + yb + yc + yd */ 
+        pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u; 
+ 
+        /* (ya + yc) - (yb + yd) */ 
+        s1 = s1 - t2; 
+ 
+        /* (yb - yd) */ 
+        t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
+        /* (xb - xd) */ 
+        t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
+ 
+        /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 
+        pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) - 
+                         ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u; 
+ 
+        /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 
+        pSrc[(2u * i1) + 1u] = 
+          (((int32_t) (((q63_t) s1 * co2) >> 32u)) + 
+           ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u; 
+ 
+        /* (xa - xc) - (yb - yd) */ 
+        r1 = r2 - t1; 
+        /* (xa - xc) + (yb - yd) */ 
+        r2 = r2 + t1; 
+ 
+        /* (ya - yc) +  (xb - xd) */ 
+        s1 = s2 + t2; 
+        /* (ya - yc) -  (xb - xd) */ 
+        s2 = s2 - t2; 
+ 
+        /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 
+        pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) - 
+                         ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u; 
+ 
+        /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 
+        pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) + 
+                                ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u; 
+ 
+        /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 
+        pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) - 
+                           ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u; 
+ 
+        /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 
+        pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) + 
+                                ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u; 
+      } 
+    } 
+    twidCoefModifier <<= 2u; 
+  } 
+ 
+  /* End of Middle stages process */ 
+ 
+  /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */ 
+  /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */ 
+  /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */ 
+  /* data is in 5.27(q27) format for the 16 point as there are no middle stages */ 
+ 
+ 
+  /* Start of last stage process */ 
+ 
+ 
+  /*  Initializations of last stage */ 
+  n1 = n2; 
+  n2 >>= 2u; 
+ 
+  /*  Calculations of last stage */ 
+  for (i0 = 0u; i0 <= (fftLen - n1); i0 += n1) 
+  { 
+    /*  index calculation for the input as, */ 
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */ 
+    i1 = i0 + n2; 
+    i2 = i1 + n2; 
+    i3 = i2 + n2; 
+ 
+    /*  Butterfly implementation */ 
+    /* xa + xc */ 
+    r1 = pSrc[2u * i0] + pSrc[2u * i2]; 
+    /* xa - xc */ 
+    r2 = pSrc[2u * i0] - pSrc[2u * i2]; 
+ 
+    /* ya + yc */ 
+    s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u]; 
+    /* ya - yc */ 
+    s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u]; 
+ 
+    /* xc + xd */ 
+    t1 = pSrc[2u * i1] + pSrc[2u * i3]; 
+    /* xa' = xa + xb + xc + xd */ 
+    pSrc[2u * i0] = (r1 + t1); 
+    /* (xa + xb) - (xc + xd) */ 
+    r1 = r1 - t1; 
+ 
+    /* yb + yd */ 
+    t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u]; 
+    /* ya' = ya + yb + yc + yd */ 
+    pSrc[(2u * i0) + 1u] = (s1 + t2); 
+    /* (ya + yc) - (yb + yd) */ 
+    s1 = s1 - t2; 
+ 
+    /* (yb-yd) */ 
+    t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u]; 
+    /* (xb-xd) */ 
+    t2 = pSrc[2u * i1] - pSrc[2u * i3]; 
+ 
+    /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */ 
+    pSrc[2u * i1] = r1; 
+    /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */ 
+    pSrc[(2u * i1) + 1u] = s1; 
+ 
+    /* (xa - xc) - (yb-yd) */ 
+    r1 = r2 - t1; 
+ 
+    /* (xa - xc) + (yb-yd) */ 
+    r2 = r2 + t1; 
+ 
+    /* (ya - yc) + (xb-xd) */ 
+    s1 = s2 + t2; 
+ 
+    /* (ya - yc) - (xb-xd) */ 
+    s2 = s2 - t2; 
+ 
+    /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */ 
+    pSrc[2u * i2] = r1; 
+    /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */ 
+    pSrc[(2u * i2) + 1u] = s1; 
+ 
+    /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */ 
+    pSrc[2u * i3] = r2; 
+    /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */ 
+    pSrc[(2u * i3) + 1u] = s2; 
+ 
+  } 
+ 
+  /* output is in 11.21(q21) format for the 1024 point */ 
+  /* output is in 9.23(q23) format for the 256 point */ 
+  /* output is in 7.25(q25) format for the 64 point */ 
+  /* output is in 5.27(q27) format for the 16 point */ 
+ 
+  /* End of last stage process */ 
+} 
+ 
+ 
+/*  
+ * @brief  In-place bit reversal function. 
+ * @param[in, out] *pSrc        points to the in-place buffer of Q31 data type. 
+ * @param[in]      fftLen       length of the FFT. 
+ * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table 
+ * @param[in]      *pBitRevTab  points to bit reversal table. 
+ * @return none. 
+ */ 
+ 
+void arm_bitreversal_q31( 
+  q31_t * pSrc, 
+  uint32_t fftLen, 
+  uint16_t bitRevFactor, 
+  uint16_t * pBitRevTable) 
+{ 
+  uint32_t fftLenBy2, fftLenBy2p1, i, j; 
+  q31_t in; 
+ 
+  /*  Initializations      */ 
+  j = 0u; 
+  fftLenBy2 = fftLen / 2u; 
+  fftLenBy2p1 = (fftLen / 2u) + 1u; 
+ 
+  /* Bit Reversal Implementation */ 
+  for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u) 
+  { 
+    if(i < j) 
+    { 
+      /*  pSrc[i] <-> pSrc[j]; */ 
+      in = pSrc[2u * i]; 
+      pSrc[2u * i] = pSrc[2u * j]; 
+      pSrc[2u * j] = in; 
+ 
+      /*  pSrc[i+1u] <-> pSrc[j+1u] */ 
+      in = pSrc[(2u * i) + 1u]; 
+      pSrc[(2u * i) + 1u] = pSrc[(2u * j) + 1u]; 
+      pSrc[(2u * j) + 1u] = in; 
+ 
+      /*  pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */ 
+      in = pSrc[2u * (i + fftLenBy2p1)]; 
+      pSrc[2u * (i + fftLenBy2p1)] = pSrc[2u * (j + fftLenBy2p1)]; 
+      pSrc[2u * (j + fftLenBy2p1)] = in; 
+ 
+      /*  pSrc[i+fftLenBy2p1+1u] <-> pSrc[j+fftLenBy2p1+1u] */ 
+      in = pSrc[(2u * (i + fftLenBy2p1)) + 1u]; 
+      pSrc[(2u * (i + fftLenBy2p1)) + 1u] = 
+        pSrc[(2u * (j + fftLenBy2p1)) + 1u]; 
+      pSrc[(2u * (j + fftLenBy2p1)) + 1u] = in; 
+ 
+    } 
+ 
+    /*  pSrc[i+1u] <-> pSrc[j+1u] */ 
+    in = pSrc[2u * (i + 1u)]; 
+    pSrc[2u * (i + 1u)] = pSrc[2u * (j + fftLenBy2)]; 
+    pSrc[2u * (j + fftLenBy2)] = in; 
+ 
+    /*  pSrc[i+2u] <-> pSrc[j+2u] */ 
+    in = pSrc[(2u * (i + 1u)) + 1u]; 
+    pSrc[(2u * (i + 1u)) + 1u] = pSrc[(2u * (j + fftLenBy2)) + 1u]; 
+    pSrc[(2u * (j + fftLenBy2)) + 1u] = in; 
+ 
+    /*  Reading the index for the bit reversal */ 
+    j = *pBitRevTable; 
+ 
+    /*  Updating the bit reversal index depending on the fft length */ 
+    pBitRevTable += bitRevFactor; 
+  } 
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_dct4_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,335 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_dct4_f32.c  
+*  
+* Description:	Processing function of DCT4 & IDCT4 F32.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @defgroup DCT4_IDCT4 DCT Type IV Functions  
+ * Representation of signals by minimum number of values is important for storage and transmission.  
+ * The possibility of large discontinuity between the beginning and end of a period of a signal  
+ * in DFT can be avoided by extending the signal so that it is even-symmetric.  
+ * Discrete Cosine Transform (DCT) is constructed such that its energy is heavily concentrated in the lower part of the  
+ * spectrum and is very widely used in signal and image coding applications.  
+ * The family of DCTs (DCT type- 1,2,3,4) is the outcome of different combinations of homogeneous boundary conditions.  
+ * DCT has an excellent energy-packing capability, hence has many applications and in data compression in particular.  
+ *  
+ * DCT is essentially the Discrete Fourier Transform(DFT) of an even-extended real signal.  
+ * Reordering of the input data makes the computation of DCT just a problem of  
+ * computing the DFT of a real signal with a few additional operations.  
+ * This approach provides regular, simple, and very efficient DCT algorithms for practical hardware and software implementations.  
+ *   
+ * DCT type-II can be implemented using Fast fourier transform (FFT) internally, as the transform is applied on real values, Real FFT can be used.  
+ * DCT4 is implemented using DCT2 as their implementations are similar except with some added pre-processing and post-processing.  
+ * DCT2 implementation can be described in the following steps:  
+ * - Re-ordering input  
+ * - Calculating Real FFT  
+ * - Multiplication of weights and Real FFT output and getting real part from the product.  
+ *  
+ * This process is explained by the block diagram below:  
+ * \image html DCT4.gif "Discrete Cosine Transform - type-IV"  
+ *  
+ * \par Algorithm:  
+ * The N-point type-IV DCT is defined as a real, linear transformation by the formula:  
+ * \image html DCT4Equation.gif  
+ * where <code>k = 0,1,2,.....N-1</code>  
+ *\par  
+ * Its inverse is defined as follows:  
+ * \image html IDCT4Equation.gif  
+ * where <code>n = 0,1,2,.....N-1</code>  
+ *\par  
+ * The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N).  
+ * The symmetry of the transform matrix indicates that the fast algorithms for the forward  
+ * and inverse transform computation are identical.  
+ * Note that the implementation of Inverse DCT4 and DCT4 is same, hence same process function can be used for both.  
+ *  
+ * \par Lengths supported by the transform:  
+ *  As DCT4 internally uses Real FFT, it supports all the lengths supported by arm_rfft_f32().  
+ * The library provides separate functions for Q15, Q31, and floating-point data types.  
+ * \par Instance Structure  
+ * The instances for Real FFT and FFT, cosine values table and twiddle factor table are stored in an instance data structure.  
+ * A separate instance structure must be defined for each transform.  
+ * There are separate instance structure declarations for each of the 3 supported data types.  
+ *  
+ * \par Initialization Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Initializes Real FFT as its process function is used internally in DCT4, by calling arm_rfft_init_f32().  
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * Manually initialize the instance structure as follows:  
+ * <pre>  
+ *arm_dct4_instance_f32 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};  
+ *arm_dct4_instance_q31 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft}; 
+ *arm_dct4_instance_q15 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft}; 
+ * </pre> 
+ * where \c N is the length of the DCT4; \c Nby2 is half of the length of the DCT4; 
+ * \c normalize is normalizing factor used and is equal to <code>sqrt(2/N)</code>;  
+ * \c pTwiddle points to the twiddle factor table; 
+ * \c pCosFactor points to the cosFactor table; 
+ * \c pRfft points to the real FFT instance; 
+ * \c pCfft points to the complex FFT instance; 
+ * The CFFT and RFFT structures also needs to be initialized, refer to arm_cfft_radix4_f32() 
+ * and arm_rfft_f32() respectively for details regarding static initialization. 
+ * 
+ * \par Fixed-Point Behavior  
+ * Care must be taken when using the fixed-point versions of the DCT4 transform functions.  
+ * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.  
+ * Refer to the function specific documentation below for usage guidelines.  
+ */ 
+ 
+ /**  
+ * @addtogroup DCT4_IDCT4  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the floating-point DCT4/IDCT4. 
+ * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure. 
+ * @param[in]       *pState        points to state buffer. 
+ * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer. 
+ * @return none. 
+ */ 
+ 
+void arm_dct4_f32( 
+  const arm_dct4_instance_f32 * S, 
+  float32_t * pState, 
+  float32_t * pInlineBuffer) 
+{ 
+  uint32_t i;                                    /* Loop counter */ 
+  float32_t *weights = S->pTwiddle;              /* Pointer to the Weights table */ 
+  float32_t *cosFact = S->pCosFactor;            /* Pointer to the cos factors table */ 
+  float32_t *pS1, *pS2, *pbuff;                  /* Temporary pointers for input buffer and pState buffer */ 
+  float32_t in;                                  /* Temporary variable */ 
+ 
+ 
+  /* DCT4 computation involves DCT2 (which is calculated using RFFT)  
+   * along with some pre-processing and post-processing.  
+   * Computational procedure is explained as follows:  
+   * (a) Pre-processing involves multiplying input with cos factor,  
+   *     r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))  
+   *              where,  
+   *                 r(n) -- output of preprocessing  
+   *                 u(n) -- input to preprocessing(actual Source buffer)  
+   * (b) Calculation of DCT2 using FFT is divided into three steps:  
+   *                  Step1: Re-ordering of even and odd elements of input.  
+   *                  Step2: Calculating FFT of the re-ordered input.  
+   *                  Step3: Taking the real part of the product of FFT output and weights.  
+   * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:  
+   *                   Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)  
+   *                        where,  
+   *                           Y4 -- DCT4 output,   Y2 -- DCT2 output  
+   * (d) Multiplying the output with the normalizing factor sqrt(2/N).  
+   */ 
+ 
+        /*-------- Pre-processing ------------*/ 
+  /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */ 
+  arm_scale_f32(pInlineBuffer, 2.0f, pInlineBuffer, S->N); 
+  arm_mult_f32(pInlineBuffer, cosFact, pInlineBuffer, S->N); 
+ 
+  /* ----------------------------------------------------------------  
+   * Step1: Re-ordering of even and odd elements as,  
+   *             pState[i] =  pInlineBuffer[2*i] and  
+   *             pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2  
+   ---------------------------------------------------------------------*/ 
+ 
+  /* pS1 initialized to pState */ 
+  pS1 = pState; 
+ 
+  /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */ 
+  pS2 = pState + (S->N - 1u); 
+ 
+  /* pbuff initialized to input buffer */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */ 
+  i = (uint32_t) S->Nby2 >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  do 
+  { 
+    /* Re-ordering of even and odd elements */ 
+    /* pState[i] =  pInlineBuffer[2*i] */ 
+    *pS1++ = *pbuff++; 
+    /* pState[N-i-1] = pInlineBuffer[2*i+1] */ 
+    *pS2-- = *pbuff++; 
+ 
+    *pS1++ = *pbuff++; 
+    *pS2-- = *pbuff++; 
+ 
+    *pS1++ = *pbuff++; 
+    *pS2-- = *pbuff++; 
+ 
+    *pS1++ = *pbuff++; 
+    *pS2-- = *pbuff++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+  /* pbuff initialized to input buffer */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* pS1 initialized to pState */ 
+  pS1 = pState; 
+ 
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */ 
+  i = (uint32_t) S->N >> 2u; 
+ 
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.  
+   * Compute 4 outputs at a time */ 
+  do 
+  { 
+    /* Writing the re-ordered output back to inplace input buffer */ 
+    *pbuff++ = *pS1++; 
+    *pbuff++ = *pS1++; 
+    *pbuff++ = *pS1++; 
+    *pbuff++ = *pS1++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+ 
+  /* ---------------------------------------------------------  
+   *     Step2: Calculate RFFT for N-point input  
+   * ---------------------------------------------------------- */ 
+  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ 
+  arm_rfft_f32(S->pRfft, pInlineBuffer, pState); 
+ 
+        /*----------------------------------------------------------------------  
+	 *  Step3: Multiply the FFT output with the weights.  
+	 *----------------------------------------------------------------------*/ 
+  arm_cmplx_mult_cmplx_f32(pState, weights, pState, S->N); 
+ 
+  /* ----------- Post-processing ---------- */ 
+  /* DCT-IV can be obtained from DCT-II by the equation,  
+   *       Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)  
+   *       Hence, Y4(0) = Y2(0)/2  */ 
+  /* Getting only real part from the output and Converting to DCT-IV */ 
+ 
+  /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */ 
+  i = ((uint32_t) S->N - 1u) >> 2u; 
+ 
+  /* pbuff initialized to input buffer. */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* pS1 initialized to pState */ 
+  pS1 = pState; 
+ 
+  /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ 
+  in = *pS1++ * (float32_t) 0.5; 
+  /* input buffer acts as inplace, so output values are stored in the input itself. */ 
+  *pbuff++ = in; 
+ 
+  /* pState pointer is incremented twice as the real values are located alternatively in the array */ 
+  pS1++; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  do 
+  { 
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    /* points to the next real value */ 
+    pS1++; 
+ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    pS1++; 
+ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    pS1++; 
+ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    pS1++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  i = ((uint32_t) S->N - 1u) % 0x4u; 
+ 
+  while(i > 0u) 
+  { 
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    /* points to the next real value */ 
+    pS1++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+ 
+ 
+        /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ 
+ 
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */ 
+  i = (uint32_t) S->N >> 2u; 
+ 
+  /* pbuff initialized to the pInlineBuffer(now contains the output values) */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.  Compute 4 outputs at a time */ 
+  do 
+  { 
+    /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ 
+    in = *pbuff; 
+    *pbuff++ = in * S->normalize; 
+ 
+    in = *pbuff; 
+    *pbuff++ = in * S->normalize; 
+ 
+    in = *pbuff; 
+    *pbuff++ = in * S->normalize; 
+ 
+    in = *pbuff; 
+    *pbuff++ = in * S->normalize; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+} 
+ 
+/**  
+   * @} end of DCT4_IDCT4 group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_dct4_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,4205 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_dct4_init_f32.c  
+*  
+* Description:	Initialization function of DCT-4 & IDCT4 F32  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @addtogroup DCT4_IDCT4  
+ * @{  
+ */ 
+ 
+/*  
+* @brief  Weights Table  
+*/ 
+ 
+/**  
+* \par  
+* Weights tables are generated using the formula : <pre>weights[n] = e^(-j*n*pi/(2*N))</pre>  
+* \par  
+* C command to generate the table  
+* <pre>  
+* for(i = 0; i< N; i++)  
+* {  
+*    weights[2*i]= cos(i*c);  
+*    weights[(2*i)+1]= -sin(i * c);  
+* } </pre>  
+* \par  
+* Where <code>N</code> is the Number of weights to be calculated and <code>c</code> is <code>pi/(2*N)</code>  
+* \par  
+* In the tables below the real and imaginary values are placed alternatively, hence the  
+* array length is <code>2*N</code>.  
+*/ 
+ 
+static const float32_t Weights_128[256] = { 
+  1.000000000000000000f, 0.000000000000000000f, 0.999924701839144500f, 
+  -0.012271538285719925f, 
+  0.999698818696204250f, -0.024541228522912288f, 0.999322384588349540f, 
+  -0.036807222941358832f, 
+  0.998795456205172410f, -0.049067674327418015f, 0.998118112900149180f, 
+  -0.061320736302208578f, 
+  0.997290456678690210f, -0.073564563599667426f, 0.996312612182778000f, 
+  -0.085797312344439894f, 
+  0.995184726672196930f, -0.098017140329560604f, 0.993906970002356060f, 
+  -0.110222207293883060f, 
+  0.992479534598709970f, -0.122410675199216200f, 0.990902635427780010f, 
+  -0.134580708507126170f, 
+  0.989176509964781010f, -0.146730474455361750f, 0.987301418157858430f, 
+  -0.158858143333861450f, 
+  0.985277642388941220f, -0.170961888760301220f, 0.983105487431216290f, 
+  -0.183039887955140950f, 
+  0.980785280403230430f, -0.195090322016128250f, 0.978317370719627650f, 
+  -0.207111376192218560f, 
+  0.975702130038528570f, -0.219101240156869800f, 0.972939952205560180f, 
+  -0.231058108280671110f, 
+  0.970031253194543970f, -0.242980179903263870f, 0.966976471044852070f, 
+  -0.254865659604514570f, 
+  0.963776065795439840f, -0.266712757474898370f, 0.960430519415565790f, 
+  -0.278519689385053060f, 
+  0.956940335732208820f, -0.290284677254462330f, 0.953306040354193860f, 
+  -0.302005949319228080f, 
+  0.949528180593036670f, -0.313681740398891520f, 0.945607325380521280f, 
+  -0.325310292162262930f, 
+  0.941544065183020810f, -0.336889853392220050f, 0.937339011912574960f, 
+  -0.348418680249434560f, 
+  0.932992798834738960f, -0.359895036534988110f, 0.928506080473215590f, 
+  -0.371317193951837540f, 
+  0.923879532511286740f, -0.382683432365089780f, 0.919113851690057770f, 
+  -0.393992040061048100f, 
+  0.914209755703530690f, -0.405241314004989860f, 0.909167983090522380f, 
+  -0.416429560097637150f, 
+  0.903989293123443340f, -0.427555093430282080f, 0.898674465693953820f, 
+  -0.438616238538527660f, 
+  0.893224301195515320f, -0.449611329654606540f, 0.887639620402853930f, 
+  -0.460538710958240010f, 
+  0.881921264348355050f, -0.471396736825997640f, 0.876070094195406600f, 
+  -0.482183772079122720f, 
+  0.870086991108711460f, -0.492898192229784040f, 0.863972856121586810f, 
+  -0.503538383725717580f, 
+  0.857728610000272120f, -0.514102744193221660f, 0.851355193105265200f, 
+  -0.524589682678468950f, 
+  0.844853565249707120f, -0.534997619887097150f, 0.838224705554838080f, 
+  -0.545324988422046460f, 
+  0.831469612302545240f, -0.555570233019602180f, 0.824589302785025290f, 
+  -0.565731810783613120f, 
+  0.817584813151583710f, -0.575808191417845340f, 0.810457198252594770f, 
+  -0.585797857456438860f, 
+  0.803207531480644940f, -0.595699304492433360f, 0.795836904608883570f, 
+  -0.605511041404325550f, 
+  0.788346427626606340f, -0.615231590580626820f, 0.780737228572094490f, 
+  -0.624859488142386340f, 
+  0.773010453362736990f, -0.634393284163645490f, 0.765167265622458960f, 
+  -0.643831542889791390f, 
+  0.757208846506484570f, -0.653172842953776760f, 0.749136394523459370f, 
+  -0.662415777590171780f, 
+  0.740951125354959110f, -0.671558954847018330f, 0.732654271672412820f, 
+  -0.680600997795453020f, 
+  0.724247082951467000f, -0.689540544737066830f, 0.715730825283818590f, 
+  -0.698376249408972920f, 
+  0.707106781186547570f, -0.707106781186547460f, 0.698376249408972920f, 
+  -0.715730825283818590f, 
+  0.689540544737066940f, -0.724247082951466890f, 0.680600997795453130f, 
+  -0.732654271672412820f, 
+  0.671558954847018330f, -0.740951125354959110f, 0.662415777590171780f, 
+  -0.749136394523459260f, 
+  0.653172842953776760f, -0.757208846506484460f, 0.643831542889791500f, 
+  -0.765167265622458960f, 
+  0.634393284163645490f, -0.773010453362736990f, 0.624859488142386450f, 
+  -0.780737228572094380f, 
+  0.615231590580626820f, -0.788346427626606230f, 0.605511041404325550f, 
+  -0.795836904608883460f, 
+  0.595699304492433470f, -0.803207531480644830f, 0.585797857456438860f, 
+  -0.810457198252594770f, 
+  0.575808191417845340f, -0.817584813151583710f, 0.565731810783613230f, 
+  -0.824589302785025290f, 
+  0.555570233019602290f, -0.831469612302545240f, 0.545324988422046460f, 
+  -0.838224705554837970f, 
+  0.534997619887097260f, -0.844853565249707010f, 0.524589682678468840f, 
+  -0.851355193105265200f, 
+  0.514102744193221660f, -0.857728610000272120f, 0.503538383725717580f, 
+  -0.863972856121586700f, 
+  0.492898192229784090f, -0.870086991108711350f, 0.482183772079122830f, 
+  -0.876070094195406600f, 
+  0.471396736825997810f, -0.881921264348354940f, 0.460538710958240010f, 
+  -0.887639620402853930f, 
+  0.449611329654606600f, -0.893224301195515320f, 0.438616238538527710f, 
+  -0.898674465693953820f, 
+  0.427555093430282200f, -0.903989293123443340f, 0.416429560097637320f, 
+  -0.909167983090522270f, 
+  0.405241314004989860f, -0.914209755703530690f, 0.393992040061048100f, 
+  -0.919113851690057770f, 
+  0.382683432365089840f, -0.923879532511286740f, 0.371317193951837600f, 
+  -0.928506080473215480f, 
+  0.359895036534988280f, -0.932992798834738850f, 0.348418680249434510f, 
+  -0.937339011912574960f, 
+  0.336889853392220050f, -0.941544065183020810f, 0.325310292162262980f, 
+  -0.945607325380521280f, 
+  0.313681740398891570f, -0.949528180593036670f, 0.302005949319228200f, 
+  -0.953306040354193750f, 
+  0.290284677254462330f, -0.956940335732208940f, 0.278519689385053060f, 
+  -0.960430519415565790f, 
+  0.266712757474898420f, -0.963776065795439840f, 0.254865659604514630f, 
+  -0.966976471044852070f, 
+  0.242980179903263980f, -0.970031253194543970f, 0.231058108280671280f, 
+  -0.972939952205560070f, 
+  0.219101240156869770f, -0.975702130038528570f, 0.207111376192218560f, 
+  -0.978317370719627650f, 
+  0.195090322016128330f, -0.980785280403230430f, 0.183039887955141060f, 
+  -0.983105487431216290f, 
+  0.170961888760301360f, -0.985277642388941220f, 0.158858143333861390f, 
+  -0.987301418157858430f, 
+  0.146730474455361750f, -0.989176509964781010f, 0.134580708507126220f, 
+  -0.990902635427780010f, 
+  0.122410675199216280f, -0.992479534598709970f, 0.110222207293883180f, 
+  -0.993906970002356060f, 
+  0.098017140329560770f, -0.995184726672196820f, 0.085797312344439880f, 
+  -0.996312612182778000f, 
+  0.073564563599667454f, -0.997290456678690210f, 0.061320736302208648f, 
+  -0.998118112900149180f, 
+  0.049067674327418126f, -0.998795456205172410f, 0.036807222941358991f, 
+  -0.999322384588349540f, 
+  0.024541228522912264f, -0.999698818696204250f, 0.012271538285719944f, 
+  -0.999924701839144500f 
+}; 
+ 
+static const float32_t Weights_512[1024] = { 
+  1.000000000000000000f, 0.000000000000000000f, 0.999995293809576190f, 
+  -0.003067956762965976f, 
+  0.999981175282601110f, -0.006135884649154475f, 0.999957644551963900f, 
+  -0.009203754782059819f, 
+  0.999924701839144500f, -0.012271538285719925f, 0.999882347454212560f, 
+  -0.015339206284988100f, 
+  0.999830581795823400f, -0.018406729905804820f, 0.999769405351215280f, 
+  -0.021474080275469508f, 
+  0.999698818696204250f, -0.024541228522912288f, 0.999618822495178640f, 
+  -0.027608145778965740f, 
+  0.999529417501093140f, -0.030674803176636626f, 0.999430604555461730f, 
+  -0.033741171851377580f, 
+  0.999322384588349540f, -0.036807222941358832f, 0.999204758618363890f, 
+  -0.039872927587739811f, 
+  0.999077727752645360f, -0.042938256934940820f, 0.998941293186856870f, 
+  -0.046003182130914623f, 
+  0.998795456205172410f, -0.049067674327418015f, 0.998640218180265270f, 
+  -0.052131704680283324f, 
+  0.998475580573294770f, -0.055195244349689934f, 0.998301544933892890f, 
+  -0.058258264500435752f, 
+  0.998118112900149180f, -0.061320736302208578f, 0.997925286198596000f, 
+  -0.064382630929857465f, 
+  0.997723066644191640f, -0.067443919563664051f, 0.997511456140303450f, 
+  -0.070504573389613856f, 
+  0.997290456678690210f, -0.073564563599667426f, 0.997060070339482960f, 
+  -0.076623861392031492f, 
+  0.996820299291165670f, -0.079682437971430126f, 0.996571145790554840f, 
+  -0.082740264549375692f, 
+  0.996312612182778000f, -0.085797312344439894f, 0.996044700901251970f, 
+  -0.088853552582524600f, 
+  0.995767414467659820f, -0.091908956497132724f, 0.995480755491926940f, 
+  -0.094963495329638992f, 
+  0.995184726672196930f, -0.098017140329560604f, 0.994879330794805620f, 
+  -0.101069862754827820f, 
+  0.994564570734255420f, -0.104121633872054590f, 0.994240449453187900f, 
+  -0.107172424956808840f, 
+  0.993906970002356060f, -0.110222207293883060f, 0.993564135520595300f, 
+  -0.113270952177564350f, 
+  0.993211949234794500f, -0.116318630911904750f, 0.992850414459865100f, 
+  -0.119365214810991350f, 
+  0.992479534598709970f, -0.122410675199216200f, 0.992099313142191800f, 
+  -0.125454983411546230f, 
+  0.991709753669099530f, -0.128498110793793170f, 0.991310859846115440f, 
+  -0.131540028702883120f, 
+  0.990902635427780010f, -0.134580708507126170f, 0.990485084256457090f, 
+  -0.137620121586486040f, 
+  0.990058210262297120f, -0.140658239332849210f, 0.989622017463200890f, 
+  -0.143695033150294470f, 
+  0.989176509964781010f, -0.146730474455361750f, 0.988721691960323780f, 
+  -0.149764534677321510f, 
+  0.988257567730749460f, -0.152797185258443440f, 0.987784141644572180f, 
+  -0.155828397654265230f, 
+  0.987301418157858430f, -0.158858143333861450f, 0.986809401814185530f, 
+  -0.161886393780111830f, 
+  0.986308097244598670f, -0.164913120489969890f, 0.985797509167567480f, 
+  -0.167938294974731170f, 
+  0.985277642388941220f, -0.170961888760301220f, 0.984748501801904210f, 
+  -0.173983873387463820f, 
+  0.984210092386929030f, -0.177004220412148750f, 0.983662419211730250f, 
+  -0.180022901405699510f, 
+  0.983105487431216290f, -0.183039887955140950f, 0.982539302287441240f, 
+  -0.186055151663446630f, 
+  0.981963869109555240f, -0.189068664149806190f, 0.981379193313754560f, 
+  -0.192080397049892440f, 
+  0.980785280403230430f, -0.195090322016128250f, 0.980182135968117430f, 
+  -0.198098410717953560f, 
+  0.979569765685440520f, -0.201104634842091900f, 0.978948175319062200f, 
+  -0.204108966092816870f, 
+  0.978317370719627650f, -0.207111376192218560f, 0.977677357824509930f, 
+  -0.210111836880469610f, 
+  0.977028142657754390f, -0.213110319916091360f, 0.976369731330021140f, 
+  -0.216106797076219520f, 
+  0.975702130038528570f, -0.219101240156869800f, 0.975025345066994120f, 
+  -0.222093620973203510f, 
+  0.974339382785575860f, -0.225083911359792830f, 0.973644249650811980f, 
+  -0.228072083170885730f, 
+  0.972939952205560180f, -0.231058108280671110f, 0.972226497078936270f, 
+  -0.234041958583543430f, 
+  0.971503890986251780f, -0.237023605994367200f, 0.970772140728950350f, 
+  -0.240003022448741500f, 
+  0.970031253194543970f, -0.242980179903263870f, 0.969281235356548530f, 
+  -0.245955050335794590f, 
+  0.968522094274417380f, -0.248927605745720150f, 0.967753837093475510f, 
+  -0.251897818154216970f, 
+  0.966976471044852070f, -0.254865659604514570f, 0.966190003445412500f, 
+  -0.257831102162158990f, 
+  0.965394441697689400f, -0.260794117915275510f, 0.964589793289812760f, 
+  -0.263754678974831350f, 
+  0.963776065795439840f, -0.266712757474898370f, 0.962953266873683880f, 
+  -0.269668325572915090f, 
+  0.962121404269041580f, -0.272621355449948980f, 0.961280485811320640f, 
+  -0.275571819310958140f, 
+  0.960430519415565790f, -0.278519689385053060f, 0.959571513081984520f, 
+  -0.281464937925757940f, 
+  0.958703474895871600f, -0.284407537211271880f, 0.957826413027532910f, 
+  -0.287347459544729510f, 
+  0.956940335732208820f, -0.290284677254462330f, 0.956045251349996410f, 
+  -0.293219162694258630f, 
+  0.955141168305770780f, -0.296150888243623790f, 0.954228095109105670f, 
+  -0.299079826308040480f, 
+  0.953306040354193860f, -0.302005949319228080f, 0.952375012719765880f, 
+  -0.304929229735402370f, 
+  0.951435020969008340f, -0.307849640041534870f, 0.950486073949481700f, 
+  -0.310767152749611470f, 
+  0.949528180593036670f, -0.313681740398891520f, 0.948561349915730270f, 
+  -0.316593375556165850f, 
+  0.947585591017741090f, -0.319502030816015690f, 0.946600913083283530f, 
+  -0.322407678801069850f, 
+  0.945607325380521280f, -0.325310292162262930f, 0.944604837261480260f, 
+  -0.328209843579092500f, 
+  0.943593458161960390f, -0.331106305759876430f, 0.942573197601446870f, 
+  -0.333999651442009380f, 
+  0.941544065183020810f, -0.336889853392220050f, 0.940506070593268300f, 
+  -0.339776884406826850f, 
+  0.939459223602189920f, -0.342660717311994380f, 0.938403534063108060f, 
+  -0.345541324963989090f, 
+  0.937339011912574960f, -0.348418680249434560f, 0.936265667170278260f, 
+  -0.351292756085567090f, 
+  0.935183509938947610f, -0.354163525420490340f, 0.934092550404258980f, 
+  -0.357030961233429980f, 
+  0.932992798834738960f, -0.359895036534988110f, 0.931884265581668150f, 
+  -0.362755724367397230f, 
+  0.930766961078983710f, -0.365612997804773850f, 0.929640895843181330f, 
+  -0.368466829953372320f, 
+  0.928506080473215590f, -0.371317193951837540f, 0.927362525650401110f, 
+  -0.374164062971457930f, 
+  0.926210242138311380f, -0.377007410216418260f, 0.925049240782677580f, 
+  -0.379847208924051160f, 
+  0.923879532511286740f, -0.382683432365089780f, 0.922701128333878630f, 
+  -0.385516053843918850f, 
+  0.921514039342042010f, -0.388345046698826250f, 0.920318276709110590f, 
+  -0.391170384302253870f, 
+  0.919113851690057770f, -0.393992040061048100f, 0.917900775621390500f, 
+  -0.396809987416710310f, 
+  0.916679059921042700f, -0.399624199845646790f, 0.915448716088267830f, 
+  -0.402434650859418430f, 
+  0.914209755703530690f, -0.405241314004989860f, 0.912962190428398210f, 
+  -0.408044162864978690f, 
+  0.911706032005429880f, -0.410843171057903910f, 0.910441292258067250f, 
+  -0.413638312238434500f, 
+  0.909167983090522380f, -0.416429560097637150f, 0.907886116487666260f, 
+  -0.419216888363223910f, 
+  0.906595704514915330f, -0.422000270799799680f, 0.905296759318118820f, 
+  -0.424779681209108810f, 
+  0.903989293123443340f, -0.427555093430282080f, 0.902673318237258830f, 
+  -0.430326481340082610f, 
+  0.901348847046022030f, -0.433093818853151960f, 0.900015892016160280f, 
+  -0.435857079922255470f, 
+  0.898674465693953820f, -0.438616238538527660f, 0.897324580705418320f, 
+  -0.441371268731716670f, 
+  0.895966249756185220f, -0.444122144570429200f, 0.894599485631382700f, 
+  -0.446868840162374160f, 
+  0.893224301195515320f, -0.449611329654606540f, 0.891840709392342720f, 
+  -0.452349587233770890f, 
+  0.890448723244757880f, -0.455083587126343840f, 0.889048355854664570f, 
+  -0.457813303598877170f, 
+  0.887639620402853930f, -0.460538710958240010f, 0.886222530148880640f, 
+  -0.463259783551860150f, 
+  0.884797098430937790f, -0.465976495767966180f, 0.883363338665731580f, 
+  -0.468688822035827900f, 
+  0.881921264348355050f, -0.471396736825997640f, 0.880470889052160750f, 
+  -0.474100214650549970f, 
+  0.879012226428633530f, -0.476799230063322090f, 0.877545290207261350f, 
+  -0.479493757660153010f, 
+  0.876070094195406600f, -0.482183772079122720f, 0.874586652278176110f, 
+  -0.484869248000791060f, 
+  0.873094978418290090f, -0.487550160148436000f, 0.871595086655950980f, 
+  -0.490226483288291160f, 
+  0.870086991108711460f, -0.492898192229784040f, 0.868570705971340900f, 
+  -0.495565261825772540f, 
+  0.867046245515692650f, -0.498227666972781870f, 0.865513624090569090f, 
+  -0.500885382611240710f, 
+  0.863972856121586810f, -0.503538383725717580f, 0.862423956111040610f, 
+  -0.506186645345155230f, 
+  0.860866938637767310f, -0.508830142543106990f, 0.859301818357008470f, 
+  -0.511468850437970300f, 
+  0.857728610000272120f, -0.514102744193221660f, 0.856147328375194470f, 
+  -0.516731799017649870f, 
+  0.854557988365400530f, -0.519355990165589640f, 0.852960604930363630f, 
+  -0.521975292937154390f, 
+  0.851355193105265200f, -0.524589682678468950f, 0.849741768000852550f, 
+  -0.527199134781901280f, 
+  0.848120344803297230f, -0.529803624686294610f, 0.846490938774052130f, 
+  -0.532403127877197900f, 
+  0.844853565249707120f, -0.534997619887097150f, 0.843208239641845440f, 
+  -0.537587076295645390f, 
+  0.841554977436898440f, -0.540171472729892850f, 0.839893794195999520f, 
+  -0.542750784864515890f, 
+  0.838224705554838080f, -0.545324988422046460f, 0.836547727223512010f, 
+  -0.547894059173100190f, 
+  0.834862874986380010f, -0.550457972936604810f, 0.833170164701913190f, 
+  -0.553016705580027470f, 
+  0.831469612302545240f, -0.555570233019602180f, 0.829761233794523050f, 
+  -0.558118531220556100f, 
+  0.828045045257755800f, -0.560661576197336030f, 0.826321062845663530f, 
+  -0.563199344013834090f, 
+  0.824589302785025290f, -0.565731810783613120f, 0.822849781375826430f, 
+  -0.568258952670131490f, 
+  0.821102514991104650f, -0.570780745886967260f, 0.819347520076796900f, 
+  -0.573297166698042200f, 
+  0.817584813151583710f, -0.575808191417845340f, 0.815814410806733780f, 
+  -0.578313796411655590f, 
+  0.814036329705948410f, -0.580813958095764530f, 0.812250586585203880f, 
+  -0.583308652937698290f, 
+  0.810457198252594770f, -0.585797857456438860f, 0.808656181588174980f, 
+  -0.588281548222645220f, 
+  0.806847553543799330f, -0.590759701858874160f, 0.805031331142963660f, 
+  -0.593232295039799800f, 
+  0.803207531480644940f, -0.595699304492433360f, 0.801376171723140240f, 
+  -0.598160706996342270f, 
+  0.799537269107905010f, -0.600616479383868970f, 0.797690840943391160f, 
+  -0.603066598540348160f, 
+  0.795836904608883570f, -0.605511041404325550f, 0.793975477554337170f, 
+  -0.607949784967773630f, 
+  0.792106577300212390f, -0.610382806276309480f, 0.790230221437310030f, 
+  -0.612810082429409710f, 
+  0.788346427626606340f, -0.615231590580626820f, 0.786455213599085770f, 
+  -0.617647307937803870f, 
+  0.784556597155575240f, -0.620057211763289100f, 0.782650596166575730f, 
+  -0.622461279374149970f, 
+  0.780737228572094490f, -0.624859488142386340f, 0.778816512381475980f, 
+  -0.627251815495144080f, 
+  0.776888465673232440f, -0.629638238914926980f, 0.774953106594873930f, 
+  -0.632018735939809060f, 
+  0.773010453362736990f, -0.634393284163645490f, 0.771060524261813820f, 
+  -0.636761861236284200f, 
+  0.769103337645579700f, -0.639124444863775730f, 0.767138911935820400f, 
+  -0.641481012808583160f, 
+  0.765167265622458960f, -0.643831542889791390f, 0.763188417263381270f, 
+  -0.646176012983316280f, 
+  0.761202385484261780f, -0.648514401022112440f, 0.759209188978388070f, 
+  -0.650846684996380880f, 
+  0.757208846506484570f, -0.653172842953776760f, 0.755201376896536550f, 
+  -0.655492852999615350f, 
+  0.753186799043612520f, -0.657806693297078640f, 0.751165131909686480f, 
+  -0.660114342067420480f, 
+  0.749136394523459370f, -0.662415777590171780f, 0.747100605980180130f, 
+  -0.664710978203344790f, 
+  0.745057785441466060f, -0.666999922303637470f, 0.743007952135121720f, 
+  -0.669282588346636010f, 
+  0.740951125354959110f, -0.671558954847018330f, 0.738887324460615110f, 
+  -0.673829000378756040f, 
+  0.736816568877369900f, -0.676092703575315920f, 0.734738878095963500f, 
+  -0.678350043129861470f, 
+  0.732654271672412820f, -0.680600997795453020f, 0.730562769227827590f, 
+  -0.682845546385248080f, 
+  0.728464390448225200f, -0.685083667772700360f, 0.726359155084346010f, 
+  -0.687315340891759050f, 
+  0.724247082951467000f, -0.689540544737066830f, 0.722128193929215350f, 
+  -0.691759258364157750f, 
+  0.720002507961381650f, -0.693971460889654000f, 0.717870045055731710f, 
+  -0.696177131491462990f, 
+  0.715730825283818590f, -0.698376249408972920f, 0.713584868780793640f, 
+  -0.700568793943248340f, 
+  0.711432195745216430f, -0.702754744457225300f, 0.709272826438865690f, 
+  -0.704934080375904880f, 
+  0.707106781186547570f, -0.707106781186547460f, 0.704934080375904990f, 
+  -0.709272826438865580f, 
+  0.702754744457225300f, -0.711432195745216430f, 0.700568793943248450f, 
+  -0.713584868780793520f, 
+  0.698376249408972920f, -0.715730825283818590f, 0.696177131491462990f, 
+  -0.717870045055731710f, 
+  0.693971460889654000f, -0.720002507961381650f, 0.691759258364157750f, 
+  -0.722128193929215350f, 
+  0.689540544737066940f, -0.724247082951466890f, 0.687315340891759160f, 
+  -0.726359155084346010f, 
+  0.685083667772700360f, -0.728464390448225200f, 0.682845546385248080f, 
+  -0.730562769227827590f, 
+  0.680600997795453130f, -0.732654271672412820f, 0.678350043129861580f, 
+  -0.734738878095963390f, 
+  0.676092703575316030f, -0.736816568877369790f, 0.673829000378756150f, 
+  -0.738887324460615110f, 
+  0.671558954847018330f, -0.740951125354959110f, 0.669282588346636010f, 
+  -0.743007952135121720f, 
+  0.666999922303637470f, -0.745057785441465950f, 0.664710978203344900f, 
+  -0.747100605980180130f, 
+  0.662415777590171780f, -0.749136394523459260f, 0.660114342067420480f, 
+  -0.751165131909686370f, 
+  0.657806693297078640f, -0.753186799043612410f, 0.655492852999615460f, 
+  -0.755201376896536550f, 
+  0.653172842953776760f, -0.757208846506484460f, 0.650846684996380990f, 
+  -0.759209188978387960f, 
+  0.648514401022112550f, -0.761202385484261780f, 0.646176012983316390f, 
+  -0.763188417263381270f, 
+  0.643831542889791500f, -0.765167265622458960f, 0.641481012808583160f, 
+  -0.767138911935820400f, 
+  0.639124444863775730f, -0.769103337645579590f, 0.636761861236284200f, 
+  -0.771060524261813710f, 
+  0.634393284163645490f, -0.773010453362736990f, 0.632018735939809060f, 
+  -0.774953106594873820f, 
+  0.629638238914927100f, -0.776888465673232440f, 0.627251815495144190f, 
+  -0.778816512381475870f, 
+  0.624859488142386450f, -0.780737228572094380f, 0.622461279374150080f, 
+  -0.782650596166575730f, 
+  0.620057211763289210f, -0.784556597155575240f, 0.617647307937803980f, 
+  -0.786455213599085770f, 
+  0.615231590580626820f, -0.788346427626606230f, 0.612810082429409710f, 
+  -0.790230221437310030f, 
+  0.610382806276309480f, -0.792106577300212390f, 0.607949784967773740f, 
+  -0.793975477554337170f, 
+  0.605511041404325550f, -0.795836904608883460f, 0.603066598540348280f, 
+  -0.797690840943391040f, 
+  0.600616479383868970f, -0.799537269107905010f, 0.598160706996342380f, 
+  -0.801376171723140130f, 
+  0.595699304492433470f, -0.803207531480644830f, 0.593232295039799800f, 
+  -0.805031331142963660f, 
+  0.590759701858874280f, -0.806847553543799220f, 0.588281548222645330f, 
+  -0.808656181588174980f, 
+  0.585797857456438860f, -0.810457198252594770f, 0.583308652937698290f, 
+  -0.812250586585203880f, 
+  0.580813958095764530f, -0.814036329705948300f, 0.578313796411655590f, 
+  -0.815814410806733780f, 
+  0.575808191417845340f, -0.817584813151583710f, 0.573297166698042320f, 
+  -0.819347520076796900f, 
+  0.570780745886967370f, -0.821102514991104650f, 0.568258952670131490f, 
+  -0.822849781375826320f, 
+  0.565731810783613230f, -0.824589302785025290f, 0.563199344013834090f, 
+  -0.826321062845663420f, 
+  0.560661576197336030f, -0.828045045257755800f, 0.558118531220556100f, 
+  -0.829761233794523050f, 
+  0.555570233019602290f, -0.831469612302545240f, 0.553016705580027580f, 
+  -0.833170164701913190f, 
+  0.550457972936604810f, -0.834862874986380010f, 0.547894059173100190f, 
+  -0.836547727223511890f, 
+  0.545324988422046460f, -0.838224705554837970f, 0.542750784864516000f, 
+  -0.839893794195999410f, 
+  0.540171472729892970f, -0.841554977436898330f, 0.537587076295645510f, 
+  -0.843208239641845440f, 
+  0.534997619887097260f, -0.844853565249707010f, 0.532403127877198010f, 
+  -0.846490938774052020f, 
+  0.529803624686294830f, -0.848120344803297120f, 0.527199134781901390f, 
+  -0.849741768000852440f, 
+  0.524589682678468840f, -0.851355193105265200f, 0.521975292937154390f, 
+  -0.852960604930363630f, 
+  0.519355990165589530f, -0.854557988365400530f, 0.516731799017649980f, 
+  -0.856147328375194470f, 
+  0.514102744193221660f, -0.857728610000272120f, 0.511468850437970520f, 
+  -0.859301818357008360f, 
+  0.508830142543106990f, -0.860866938637767310f, 0.506186645345155450f, 
+  -0.862423956111040500f, 
+  0.503538383725717580f, -0.863972856121586700f, 0.500885382611240940f, 
+  -0.865513624090568980f, 
+  0.498227666972781870f, -0.867046245515692650f, 0.495565261825772490f, 
+  -0.868570705971340900f, 
+  0.492898192229784090f, -0.870086991108711350f, 0.490226483288291100f, 
+  -0.871595086655951090f, 
+  0.487550160148436050f, -0.873094978418290090f, 0.484869248000791120f, 
+  -0.874586652278176110f, 
+  0.482183772079122830f, -0.876070094195406600f, 0.479493757660153010f, 
+  -0.877545290207261240f, 
+  0.476799230063322250f, -0.879012226428633410f, 0.474100214650550020f, 
+  -0.880470889052160750f, 
+  0.471396736825997810f, -0.881921264348354940f, 0.468688822035827960f, 
+  -0.883363338665731580f, 
+  0.465976495767966130f, -0.884797098430937790f, 0.463259783551860260f, 
+  -0.886222530148880640f, 
+  0.460538710958240010f, -0.887639620402853930f, 0.457813303598877290f, 
+  -0.889048355854664570f, 
+  0.455083587126343840f, -0.890448723244757880f, 0.452349587233771000f, 
+  -0.891840709392342720f, 
+  0.449611329654606600f, -0.893224301195515320f, 0.446868840162374330f, 
+  -0.894599485631382580f, 
+  0.444122144570429260f, -0.895966249756185110f, 0.441371268731716620f, 
+  -0.897324580705418320f, 
+  0.438616238538527710f, -0.898674465693953820f, 0.435857079922255470f, 
+  -0.900015892016160280f, 
+  0.433093818853152010f, -0.901348847046022030f, 0.430326481340082610f, 
+  -0.902673318237258830f, 
+  0.427555093430282200f, -0.903989293123443340f, 0.424779681209108810f, 
+  -0.905296759318118820f, 
+  0.422000270799799790f, -0.906595704514915330f, 0.419216888363223960f, 
+  -0.907886116487666150f, 
+  0.416429560097637320f, -0.909167983090522270f, 0.413638312238434560f, 
+  -0.910441292258067140f, 
+  0.410843171057903910f, -0.911706032005429880f, 0.408044162864978740f, 
+  -0.912962190428398100f, 
+  0.405241314004989860f, -0.914209755703530690f, 0.402434650859418540f, 
+  -0.915448716088267830f, 
+  0.399624199845646790f, -0.916679059921042700f, 0.396809987416710420f, 
+  -0.917900775621390390f, 
+  0.393992040061048100f, -0.919113851690057770f, 0.391170384302253980f, 
+  -0.920318276709110480f, 
+  0.388345046698826300f, -0.921514039342041900f, 0.385516053843919020f, 
+  -0.922701128333878520f, 
+  0.382683432365089840f, -0.923879532511286740f, 0.379847208924051110f, 
+  -0.925049240782677580f, 
+  0.377007410216418310f, -0.926210242138311270f, 0.374164062971457990f, 
+  -0.927362525650401110f, 
+  0.371317193951837600f, -0.928506080473215480f, 0.368466829953372320f, 
+  -0.929640895843181330f, 
+  0.365612997804773960f, -0.930766961078983710f, 0.362755724367397230f, 
+  -0.931884265581668150f, 
+  0.359895036534988280f, -0.932992798834738850f, 0.357030961233430030f, 
+  -0.934092550404258870f, 
+  0.354163525420490510f, -0.935183509938947500f, 0.351292756085567150f, 
+  -0.936265667170278260f, 
+  0.348418680249434510f, -0.937339011912574960f, 0.345541324963989150f, 
+  -0.938403534063108060f, 
+  0.342660717311994380f, -0.939459223602189920f, 0.339776884406826960f, 
+  -0.940506070593268300f, 
+  0.336889853392220050f, -0.941544065183020810f, 0.333999651442009490f, 
+  -0.942573197601446870f, 
+  0.331106305759876430f, -0.943593458161960390f, 0.328209843579092660f, 
+  -0.944604837261480260f, 
+  0.325310292162262980f, -0.945607325380521280f, 0.322407678801070020f, 
+  -0.946600913083283530f, 
+  0.319502030816015750f, -0.947585591017741090f, 0.316593375556165850f, 
+  -0.948561349915730270f, 
+  0.313681740398891570f, -0.949528180593036670f, 0.310767152749611470f, 
+  -0.950486073949481700f, 
+  0.307849640041534980f, -0.951435020969008340f, 0.304929229735402430f, 
+  -0.952375012719765880f, 
+  0.302005949319228200f, -0.953306040354193750f, 0.299079826308040480f, 
+  -0.954228095109105670f, 
+  0.296150888243623960f, -0.955141168305770670f, 0.293219162694258680f, 
+  -0.956045251349996410f, 
+  0.290284677254462330f, -0.956940335732208940f, 0.287347459544729570f, 
+  -0.957826413027532910f, 
+  0.284407537211271820f, -0.958703474895871600f, 0.281464937925758050f, 
+  -0.959571513081984520f, 
+  0.278519689385053060f, -0.960430519415565790f, 0.275571819310958250f, 
+  -0.961280485811320640f, 
+  0.272621355449948980f, -0.962121404269041580f, 0.269668325572915200f, 
+  -0.962953266873683880f, 
+  0.266712757474898420f, -0.963776065795439840f, 0.263754678974831510f, 
+  -0.964589793289812650f, 
+  0.260794117915275570f, -0.965394441697689400f, 0.257831102162158930f, 
+  -0.966190003445412620f, 
+  0.254865659604514630f, -0.966976471044852070f, 0.251897818154216910f, 
+  -0.967753837093475510f, 
+  0.248927605745720260f, -0.968522094274417270f, 0.245955050335794590f, 
+  -0.969281235356548530f, 
+  0.242980179903263980f, -0.970031253194543970f, 0.240003022448741500f, 
+  -0.970772140728950350f, 
+  0.237023605994367340f, -0.971503890986251780f, 0.234041958583543460f, 
+  -0.972226497078936270f, 
+  0.231058108280671280f, -0.972939952205560070f, 0.228072083170885790f, 
+  -0.973644249650811870f, 
+  0.225083911359792780f, -0.974339382785575860f, 0.222093620973203590f, 
+  -0.975025345066994120f, 
+  0.219101240156869770f, -0.975702130038528570f, 0.216106797076219600f, 
+  -0.976369731330021140f, 
+  0.213110319916091360f, -0.977028142657754390f, 0.210111836880469720f, 
+  -0.977677357824509930f, 
+  0.207111376192218560f, -0.978317370719627650f, 0.204108966092817010f, 
+  -0.978948175319062200f, 
+  0.201104634842091960f, -0.979569765685440520f, 0.198098410717953730f, 
+  -0.980182135968117320f, 
+  0.195090322016128330f, -0.980785280403230430f, 0.192080397049892380f, 
+  -0.981379193313754560f, 
+  0.189068664149806280f, -0.981963869109555240f, 0.186055151663446630f, 
+  -0.982539302287441240f, 
+  0.183039887955141060f, -0.983105487431216290f, 0.180022901405699510f, 
+  -0.983662419211730250f, 
+  0.177004220412148860f, -0.984210092386929030f, 0.173983873387463850f, 
+  -0.984748501801904210f, 
+  0.170961888760301360f, -0.985277642388941220f, 0.167938294974731230f, 
+  -0.985797509167567370f, 
+  0.164913120489970090f, -0.986308097244598670f, 0.161886393780111910f, 
+  -0.986809401814185420f, 
+  0.158858143333861390f, -0.987301418157858430f, 0.155828397654265320f, 
+  -0.987784141644572180f, 
+  0.152797185258443410f, -0.988257567730749460f, 0.149764534677321620f, 
+  -0.988721691960323780f, 
+  0.146730474455361750f, -0.989176509964781010f, 0.143695033150294580f, 
+  -0.989622017463200780f, 
+  0.140658239332849240f, -0.990058210262297120f, 0.137620121586486180f, 
+  -0.990485084256456980f, 
+  0.134580708507126220f, -0.990902635427780010f, 0.131540028702883280f, 
+  -0.991310859846115440f, 
+  0.128498110793793220f, -0.991709753669099530f, 0.125454983411546210f, 
+  -0.992099313142191800f, 
+  0.122410675199216280f, -0.992479534598709970f, 0.119365214810991350f, 
+  -0.992850414459865100f, 
+  0.116318630911904880f, -0.993211949234794500f, 0.113270952177564360f, 
+  -0.993564135520595300f, 
+  0.110222207293883180f, -0.993906970002356060f, 0.107172424956808870f, 
+  -0.994240449453187900f, 
+  0.104121633872054730f, -0.994564570734255420f, 0.101069862754827880f, 
+  -0.994879330794805620f, 
+  0.098017140329560770f, -0.995184726672196820f, 0.094963495329639061f, 
+  -0.995480755491926940f, 
+  0.091908956497132696f, -0.995767414467659820f, 0.088853552582524684f, 
+  -0.996044700901251970f, 
+  0.085797312344439880f, -0.996312612182778000f, 0.082740264549375803f, 
+  -0.996571145790554840f, 
+  0.079682437971430126f, -0.996820299291165670f, 0.076623861392031617f, 
+  -0.997060070339482960f, 
+  0.073564563599667454f, -0.997290456678690210f, 0.070504573389614009f, 
+  -0.997511456140303450f, 
+  0.067443919563664106f, -0.997723066644191640f, 0.064382630929857410f, 
+  -0.997925286198596000f, 
+  0.061320736302208648f, -0.998118112900149180f, 0.058258264500435732f, 
+  -0.998301544933892890f, 
+  0.055195244349690031f, -0.998475580573294770f, 0.052131704680283317f, 
+  -0.998640218180265270f, 
+  0.049067674327418126f, -0.998795456205172410f, 0.046003182130914644f, 
+  -0.998941293186856870f, 
+  0.042938256934940959f, -0.999077727752645360f, 0.039872927587739845f, 
+  -0.999204758618363890f, 
+  0.036807222941358991f, -0.999322384588349540f, 0.033741171851377642f, 
+  -0.999430604555461730f, 
+  0.030674803176636581f, -0.999529417501093140f, 0.027608145778965820f, 
+  -0.999618822495178640f, 
+  0.024541228522912264f, -0.999698818696204250f, 0.021474080275469605f, 
+  -0.999769405351215280f, 
+  0.018406729905804820f, -0.999830581795823400f, 0.015339206284988220f, 
+  -0.999882347454212560f, 
+  0.012271538285719944f, -0.999924701839144500f, 0.009203754782059960f, 
+  -0.999957644551963900f, 
+  0.006135884649154515f, -0.999981175282601110f, 0.003067956762966138f, 
+  -0.999995293809576190f 
+}; 
+ 
+static const float32_t Weights_2048[4096] = { 
+  1.000000000000000000f, 0.000000000000000000f, 0.999999705862882230f, 
+  -0.000766990318742704f, 
+  0.999998823451701880f, -0.001533980186284766f, 0.999997352766978210f, 
+  -0.002300969151425805f, 
+  0.999995293809576190f, -0.003067956762965976f, 0.999992646580707190f, 
+  -0.003834942569706228f, 
+  0.999989411081928400f, -0.004601926120448571f, 0.999985587315143200f, 
+  -0.005368906963996343f, 
+  0.999981175282601110f, -0.006135884649154475f, 0.999976174986897610f, 
+  -0.006902858724729756f, 
+  0.999970586430974140f, -0.007669828739531097f, 0.999964409618118280f, 
+  -0.008436794242369799f, 
+  0.999957644551963900f, -0.009203754782059819f, 0.999950291236490480f, 
+  -0.009970709907418031f, 
+  0.999942349676023910f, -0.010737659167264491f, 0.999933819875236000f, 
+  -0.011504602110422714f, 
+  0.999924701839144500f, -0.012271538285719925f, 0.999914995573113470f, 
+  -0.013038467241987334f, 
+  0.999904701082852900f, -0.013805388528060391f, 0.999893818374418490f, 
+  -0.014572301692779064f, 
+  0.999882347454212560f, -0.015339206284988100f, 0.999870288328982950f, 
+  -0.016106101853537287f, 
+  0.999857641005823860f, -0.016872987947281710f, 0.999844405492175240f, 
+  -0.017639864115082053f, 
+  0.999830581795823400f, -0.018406729905804820f, 0.999816169924900410f, 
+  -0.019173584868322623f, 
+  0.999801169887884260f, -0.019940428551514441f, 0.999785581693599210f, 
+  -0.020707260504265895f, 
+  0.999769405351215280f, -0.021474080275469508f, 0.999752640870248840f, 
+  -0.022240887414024961f, 
+  0.999735288260561680f, -0.023007681468839369f, 0.999717347532362190f, 
+  -0.023774461988827555f, 
+  0.999698818696204250f, -0.024541228522912288f, 0.999679701762987930f, 
+  -0.025307980620024571f, 
+  0.999659996743959220f, -0.026074717829103901f, 0.999639703650710200f, 
+  -0.026841439699098531f, 
+  0.999618822495178640f, -0.027608145778965740f, 0.999597353289648380f, 
+  -0.028374835617672099f, 
+  0.999575296046749220f, -0.029141508764193722f, 0.999552650779456990f, 
+  -0.029908164767516555f, 
+  0.999529417501093140f, -0.030674803176636626f, 0.999505596225325310f, 
+  -0.031441423540560301f, 
+  0.999481186966166950f, -0.032208025408304586f, 0.999456189737977340f, 
+  -0.032974608328897335f, 
+  0.999430604555461730f, -0.033741171851377580f, 0.999404431433671300f, 
+  -0.034507715524795750f, 
+  0.999377670388002850f, -0.035274238898213947f, 0.999350321434199440f, 
+  -0.036040741520706229f, 
+  0.999322384588349540f, -0.036807222941358832f, 0.999293859866887790f, 
+  -0.037573682709270494f, 
+  0.999264747286594420f, -0.038340120373552694f, 0.999235046864595850f, 
+  -0.039106535483329888f, 
+  0.999204758618363890f, -0.039872927587739811f, 0.999173882565716380f, 
+  -0.040639296235933736f, 
+  0.999142418724816910f, -0.041405640977076739f, 0.999110367114174890f, 
+  -0.042171961360347947f, 
+  0.999077727752645360f, -0.042938256934940820f, 0.999044500659429290f, 
+  -0.043704527250063421f, 
+  0.999010685854073380f, -0.044470771854938668f, 0.998976283356469820f, 
+  -0.045236990298804590f, 
+  0.998941293186856870f, -0.046003182130914623f, 0.998905715365818290f, 
+  -0.046769346900537863f, 
+  0.998869549914283560f, -0.047535484156959303f, 0.998832796853527990f, 
+  -0.048301593449480144f, 
+  0.998795456205172410f, -0.049067674327418015f, 0.998757527991183340f, 
+  -0.049833726340107277f, 
+  0.998719012233872940f, -0.050599749036899282f, 0.998679908955899090f, 
+  -0.051365741967162593f, 
+  0.998640218180265270f, -0.052131704680283324f, 0.998599939930320370f, 
+  -0.052897636725665324f, 
+  0.998559074229759310f, -0.053663537652730520f, 0.998517621102622210f, 
+  -0.054429407010919133f, 
+  0.998475580573294770f, -0.055195244349689934f, 0.998432952666508440f, 
+  -0.055961049218520569f, 
+  0.998389737407340160f, -0.056726821166907748f, 0.998345934821212370f, 
+  -0.057492559744367566f, 
+  0.998301544933892890f, -0.058258264500435752f, 0.998256567771495180f, 
+  -0.059023934984667931f, 
+  0.998211003360478190f, -0.059789570746639868f, 0.998164851727646240f, 
+  -0.060555171335947788f, 
+  0.998118112900149180f, -0.061320736302208578f, 0.998070786905482340f, 
+  -0.062086265195060088f, 
+  0.998022873771486240f, -0.062851757564161406f, 0.997974373526346990f, 
+  -0.063617212959193106f, 
+  0.997925286198596000f, -0.064382630929857465f, 0.997875611817110150f, 
+  -0.065148011025878833f, 
+  0.997825350411111640f, -0.065913352797003805f, 0.997774502010167820f, 
+  -0.066678655793001557f, 
+  0.997723066644191640f, -0.067443919563664051f, 0.997671044343441000f, 
+  -0.068209143658806329f, 
+  0.997618435138519550f, -0.068974327628266746f, 0.997565239060375750f, 
+  -0.069739471021907307f, 
+  0.997511456140303450f, -0.070504573389613856f, 0.997457086409941910f, 
+  -0.071269634281296401f, 
+  0.997402129901275300f, -0.072034653246889332f, 0.997346586646633230f, 
+  -0.072799629836351673f, 
+  0.997290456678690210f, -0.073564563599667426f, 0.997233740030466280f, 
+  -0.074329454086845756f, 
+  0.997176436735326190f, -0.075094300847921305f, 0.997118546826979980f, 
+  -0.075859103432954447f, 
+  0.997060070339482960f, -0.076623861392031492f, 0.997001007307235290f, 
+  -0.077388574275265049f, 
+  0.996941357764982160f, -0.078153241632794232f, 0.996881121747813850f, 
+  -0.078917863014784942f, 
+  0.996820299291165670f, -0.079682437971430126f, 0.996758890430818000f, 
+  -0.080446966052950014f, 
+  0.996696895202896060f, -0.081211446809592441f, 0.996634313643869900f, 
+  -0.081975879791633066f, 
+  0.996571145790554840f, -0.082740264549375692f, 0.996507391680110820f, 
+  -0.083504600633152432f, 
+  0.996443051350042630f, -0.084268887593324071f, 0.996378124838200210f, 
+  -0.085033124980280275f, 
+  0.996312612182778000f, -0.085797312344439894f, 0.996246513422315520f, 
+  -0.086561449236251170f, 
+  0.996179828595696980f, -0.087325535206192059f, 0.996112557742151130f, 
+  -0.088089569804770507f, 
+  0.996044700901251970f, -0.088853552582524600f, 0.995976258112917790f, 
+  -0.089617483090022959f, 
+  0.995907229417411720f, -0.090381360877864983f, 0.995837614855341610f, 
+  -0.091145185496681005f, 
+  0.995767414467659820f, -0.091908956497132724f, 0.995696628295663520f, 
+  -0.092672673429913310f, 
+  0.995625256380994310f, -0.093436335845747787f, 0.995553298765638470f, 
+  -0.094199943295393204f, 
+  0.995480755491926940f, -0.094963495329638992f, 0.995407626602534900f, 
+  -0.095726991499307162f, 
+  0.995333912140482280f, -0.096490431355252593f, 0.995259612149133390f, 
+  -0.097253814448363271f, 
+  0.995184726672196930f, -0.098017140329560604f, 0.995109255753726110f, 
+  -0.098780408549799623f, 
+  0.995033199438118630f, -0.099543618660069319f, 0.994956557770116380f, 
+  -0.100306770211392860f, 
+  0.994879330794805620f, -0.101069862754827820f, 0.994801518557617110f, 
+  -0.101832895841466530f, 
+  0.994723121104325700f, -0.102595869022436280f, 0.994644138481050710f, 
+  -0.103358781848899610f, 
+  0.994564570734255420f, -0.104121633872054590f, 0.994484417910747600f, 
+  -0.104884424643134970f, 
+  0.994403680057679100f, -0.105647153713410620f, 0.994322357222545810f, 
+  -0.106409820634187680f, 
+  0.994240449453187900f, -0.107172424956808840f, 0.994157956797789730f, 
+  -0.107934966232653650f, 
+  0.994074879304879370f, -0.108697444013138720f, 0.993991217023329380f, 
+  -0.109459857849717980f, 
+  0.993906970002356060f, -0.110222207293883060f, 0.993822138291519660f, 
+  -0.110984491897163390f, 
+  0.993736721940724600f, -0.111746711211126590f, 0.993650721000219120f, 
+  -0.112508864787378690f, 
+  0.993564135520595300f, -0.113270952177564350f, 0.993476965552789190f, 
+  -0.114032972933367200f, 
+  0.993389211148080650f, -0.114794926606510080f, 0.993300872358093280f, 
+  -0.115556812748755260f, 
+  0.993211949234794500f, -0.116318630911904750f, 0.993122441830495580f, 
+  -0.117080380647800590f, 
+  0.993032350197851410f, -0.117842061508324980f, 0.992941674389860470f, 
+  -0.118603673045400720f, 
+  0.992850414459865100f, -0.119365214810991350f, 0.992758570461551140f, 
+  -0.120126686357101500f, 
+  0.992666142448948020f, -0.120888087235777080f, 0.992573130476428810f, 
+  -0.121649416999105530f, 
+  0.992479534598709970f, -0.122410675199216200f, 0.992385354870851670f, 
+  -0.123171861388280480f, 
+  0.992290591348257370f, -0.123932975118512160f, 0.992195244086673920f, 
+  -0.124694015942167640f, 
+  0.992099313142191800f, -0.125454983411546230f, 0.992002798571244520f, 
+  -0.126215877078990350f, 
+  0.991905700430609330f, -0.126976696496885870f, 0.991808018777406430f, 
+  -0.127737441217662310f, 
+  0.991709753669099530f, -0.128498110793793170f, 0.991610905163495370f, 
+  -0.129258704777796140f, 
+  0.991511473318743900f, -0.130019222722233350f, 0.991411458193338540f, 
+  -0.130779664179711710f, 
+  0.991310859846115440f, -0.131540028702883120f, 0.991209678336254060f, 
+  -0.132300315844444650f, 
+  0.991107913723276890f, -0.133060525157139060f, 0.991005566067049370f, 
+  -0.133820656193754720f, 
+  0.990902635427780010f, -0.134580708507126170f, 0.990799121866020370f, 
+  -0.135340681650134210f, 
+  0.990695025442664630f, -0.136100575175706200f, 0.990590346218950150f, 
+  -0.136860388636816380f, 
+  0.990485084256457090f, -0.137620121586486040f, 0.990379239617108160f, 
+  -0.138379773577783890f, 
+  0.990272812363169110f, -0.139139344163826200f, 0.990165802557248400f, 
+  -0.139898832897777210f, 
+  0.990058210262297120f, -0.140658239332849210f, 0.989950035541608990f, 
+  -0.141417563022303020f, 
+  0.989841278458820530f, -0.142176803519448030f, 0.989731939077910570f, 
+  -0.142935960377642670f, 
+  0.989622017463200890f, -0.143695033150294470f, 0.989511513679355190f, 
+  -0.144454021390860470f, 
+  0.989400427791380380f, -0.145212924652847460f, 0.989288759864625170f, 
+  -0.145971742489812210f, 
+  0.989176509964781010f, -0.146730474455361750f, 0.989063678157881540f, 
+  -0.147489120103153570f, 
+  0.988950264510302990f, -0.148247678986896030f, 0.988836269088763540f, 
+  -0.149006150660348450f, 
+  0.988721691960323780f, -0.149764534677321510f, 0.988606533192386450f, 
+  -0.150522830591677400f, 
+  0.988490792852696590f, -0.151281037957330220f, 0.988374471009341280f, 
+  -0.152039156328246050f, 
+  0.988257567730749460f, -0.152797185258443440f, 0.988140083085692570f, 
+  -0.153555124301993450f, 
+  0.988022017143283530f, -0.154312973013020100f, 0.987903369972977790f, 
+  -0.155070730945700510f, 
+  0.987784141644572180f, -0.155828397654265230f, 0.987664332228205710f, 
+  -0.156585972692998430f, 
+  0.987543941794359230f, -0.157343455616238250f, 0.987422970413855410f, 
+  -0.158100845978376980f, 
+  0.987301418157858430f, -0.158858143333861450f, 0.987179285097874340f, 
+  -0.159615347237193060f, 
+  0.987056571305750970f, -0.160372457242928280f, 0.986933276853677710f, 
+  -0.161129472905678810f, 
+  0.986809401814185530f, -0.161886393780111830f, 0.986684946260146690f, 
+  -0.162643219420950310f, 
+  0.986559910264775410f, -0.163399949382973230f, 0.986434293901627180f, 
+  -0.164156583221015810f, 
+  0.986308097244598670f, -0.164913120489969890f, 0.986181320367928270f, 
+  -0.165669560744784120f, 
+  0.986053963346195440f, -0.166425903540464100f, 0.985926026254321130f, 
+  -0.167182148432072940f, 
+  0.985797509167567480f, -0.167938294974731170f, 0.985668412161537550f, 
+  -0.168694342723617330f, 
+  0.985538735312176060f, -0.169450291233967960f, 0.985408478695768420f, 
+  -0.170206140061078070f, 
+  0.985277642388941220f, -0.170961888760301220f, 0.985146226468662230f, 
+  -0.171717536887049970f, 
+  0.985014231012239840f, -0.172473083996795950f, 0.984881656097323700f, 
+  -0.173228529645070320f, 
+  0.984748501801904210f, -0.173983873387463820f, 0.984614768204312600f, 
+  -0.174739114779627200f, 
+  0.984480455383220930f, -0.175494253377271430f, 0.984345563417641900f, 
+  -0.176249288736167880f, 
+  0.984210092386929030f, -0.177004220412148750f, 0.984074042370776450f, 
+  -0.177759047961107170f, 
+  0.983937413449218920f, -0.178513770938997510f, 0.983800205702631600f, 
+  -0.179268388901835750f, 
+  0.983662419211730250f, -0.180022901405699510f, 0.983524054057571260f, 
+  -0.180777308006728590f, 
+  0.983385110321551180f, -0.181531608261124970f, 0.983245588085407070f, 
+  -0.182285801725153300f, 
+  0.983105487431216290f, -0.183039887955140950f, 0.982964808441396440f, 
+  -0.183793866507478450f, 
+  0.982823551198705240f, -0.184547736938619620f, 0.982681715786240860f, 
+  -0.185301498805081900f, 
+  0.982539302287441240f, -0.186055151663446630f, 0.982396310786084690f, 
+  -0.186808695070359270f, 
+  0.982252741366289370f, -0.187562128582529600f, 0.982108594112513610f, 
+  -0.188315451756732120f, 
+  0.981963869109555240f, -0.189068664149806190f, 0.981818566442552500f, 
+  -0.189821765318656410f, 
+  0.981672686196983110f, -0.190574754820252740f, 0.981526228458664770f, 
+  -0.191327632211630900f, 
+  0.981379193313754560f, -0.192080397049892440f, 0.981231580848749730f, 
+  -0.192833048892205230f, 
+  0.981083391150486710f, -0.193585587295803610f, 0.980934624306141640f, 
+  -0.194338011817988600f, 
+  0.980785280403230430f, -0.195090322016128250f, 0.980635359529608120f, 
+  -0.195842517447657850f, 
+  0.980484861773469380f, -0.196594597670080220f, 0.980333787223347960f, 
+  -0.197346562240965920f, 
+  0.980182135968117430f, -0.198098410717953560f, 0.980029908096990090f, 
+  -0.198850142658750090f, 
+  0.979877103699517640f, -0.199601757621130970f, 0.979723722865591170f, 
+  -0.200353255162940450f, 
+  0.979569765685440520f, -0.201104634842091900f, 0.979415232249634780f, 
+  -0.201855896216568050f, 
+  0.979260122649082020f, -0.202607038844421130f, 0.979104436975029250f, 
+  -0.203358062283773320f, 
+  0.978948175319062200f, -0.204108966092816870f, 0.978791337773105670f, 
+  -0.204859749829814420f, 
+  0.978633924429423210f, -0.205610413053099240f, 0.978475935380616830f, 
+  -0.206360955321075510f, 
+  0.978317370719627650f, -0.207111376192218560f, 0.978158230539735050f, 
+  -0.207861675225075070f, 
+  0.977998514934557140f, -0.208611851978263490f, 0.977838223998050430f, 
+  -0.209361906010474160f, 
+  0.977677357824509930f, -0.210111836880469610f, 0.977515916508569280f, 
+  -0.210861644147084860f, 
+  0.977353900145199960f, -0.211611327369227550f, 0.977191308829712280f, 
+  -0.212360886105878420f, 
+  0.977028142657754390f, -0.213110319916091360f, 0.976864401725312640f, 
+  -0.213859628358993750f, 
+  0.976700086128711840f, -0.214608810993786760f, 0.976535195964614470f, 
+  -0.215357867379745550f, 
+  0.976369731330021140f, -0.216106797076219520f, 0.976203692322270560f, 
+  -0.216855599642632620f, 
+  0.976037079039039020f, -0.217604274638483640f, 0.975869891578341030f, 
+  -0.218352821623346320f, 
+  0.975702130038528570f, -0.219101240156869800f, 0.975533794518291360f, 
+  -0.219849529798778700f, 
+  0.975364885116656980f, -0.220597690108873510f, 0.975195401932990370f, 
+  -0.221345720647030810f, 
+  0.975025345066994120f, -0.222093620973203510f, 0.974854714618708430f, 
+  -0.222841390647421120f, 
+  0.974683510688510670f, -0.223589029229789990f, 0.974511733377115720f, 
+  -0.224336536280493600f, 
+  0.974339382785575860f, -0.225083911359792830f, 0.974166459015280320f, 
+  -0.225831154028026170f, 
+  0.973992962167955830f, -0.226578263845610000f, 0.973818892345666100f, 
+  -0.227325240373038860f, 
+  0.973644249650811980f, -0.228072083170885730f, 0.973469034186131070f, 
+  -0.228818791799802220f, 
+  0.973293246054698250f, -0.229565365820518870f, 0.973116885359925130f, 
+  -0.230311804793845440f, 
+  0.972939952205560180f, -0.231058108280671110f, 0.972762446695688570f, 
+  -0.231804275841964780f, 
+  0.972584368934732210f, -0.232550307038775240f, 0.972405719027449770f, 
+  -0.233296201432231590f, 
+  0.972226497078936270f, -0.234041958583543430f, 0.972046703194623500f, 
+  -0.234787578054000970f, 
+  0.971866337480279400f, -0.235533059404975490f, 0.971685400042008540f, 
+  -0.236278402197919570f, 
+  0.971503890986251780f, -0.237023605994367200f, 0.971321810419786160f, 
+  -0.237768670355934190f, 
+  0.971139158449725090f, -0.238513594844318420f, 0.970955935183517970f, 
+  -0.239258379021299980f, 
+  0.970772140728950350f, -0.240003022448741500f, 0.970587775194143630f, 
+  -0.240747524688588430f, 
+  0.970402838687555500f, -0.241491885302869330f, 0.970217331317979160f, 
+  -0.242236103853696010f, 
+  0.970031253194543970f, -0.242980179903263870f, 0.969844604426714830f, 
+  -0.243724113013852160f, 
+  0.969657385124292450f, -0.244467902747824150f, 0.969469595397413060f, 
+  -0.245211548667627540f, 
+  0.969281235356548530f, -0.245955050335794590f, 0.969092305112506210f, 
+  -0.246698407314942410f, 
+  0.968902804776428870f, -0.247441619167773270f, 0.968712734459794780f, 
+  -0.248184685457074780f, 
+  0.968522094274417380f, -0.248927605745720150f, 0.968330884332445190f, 
+  -0.249670379596668570f, 
+  0.968139104746362440f, -0.250413006572965220f, 0.967946755628987800f, 
+  -0.251155486237741920f, 
+  0.967753837093475510f, -0.251897818154216970f, 0.967560349253314360f, 
+  -0.252640001885695520f, 
+  0.967366292222328510f, -0.253382036995570160f, 0.967171666114676640f, 
+  -0.254123923047320620f, 
+  0.966976471044852070f, -0.254865659604514570f, 0.966780707127683270f, 
+  -0.255607246230807380f, 
+  0.966584374478333120f, -0.256348682489942910f, 0.966387473212298900f, 
+  -0.257089967945753120f, 
+  0.966190003445412500f, -0.257831102162158990f, 0.965991965293840570f, 
+  -0.258572084703170340f, 
+  0.965793358874083680f, -0.259312915132886230f, 0.965594184302976830f, 
+  -0.260053593015495190f, 
+  0.965394441697689400f, -0.260794117915275510f, 0.965194131175724720f, 
+  -0.261534489396595520f, 
+  0.964993252854920320f, -0.262274707023913590f, 0.964791806853447900f, 
+  -0.263014770361779000f, 
+  0.964589793289812760f, -0.263754678974831350f, 0.964387212282854290f, 
+  -0.264494432427801630f, 
+  0.964184063951745830f, -0.265234030285511790f, 0.963980348415994110f, 
+  -0.265973472112875590f, 
+  0.963776065795439840f, -0.266712757474898370f, 0.963571216210257320f, 
+  -0.267451885936677620f, 
+  0.963365799780954050f, -0.268190857063403180f, 0.963159816628371360f, 
+  -0.268929670420357260f, 
+  0.962953266873683880f, -0.269668325572915090f, 0.962746150638399410f, 
+  -0.270406822086544820f, 
+  0.962538468044359160f, -0.271145159526808010f, 0.962330219213737400f, 
+  -0.271883337459359720f, 
+  0.962121404269041580f, -0.272621355449948980f, 0.961912023333112210f, 
+  -0.273359213064418680f, 
+  0.961702076529122540f, -0.274096909868706380f, 0.961491563980579000f, 
+  -0.274834445428843940f, 
+  0.961280485811320640f, -0.275571819310958140f, 0.961068842145519350f, 
+  -0.276309031081271080f, 
+  0.960856633107679660f, -0.277046080306099900f, 0.960643858822638590f, 
+  -0.277782966551857690f, 
+  0.960430519415565790f, -0.278519689385053060f, 0.960216615011963430f, 
+  -0.279256248372291180f, 
+  0.960002145737665960f, -0.279992643080273220f, 0.959787111718839900f, 
+  -0.280728873075797190f, 
+  0.959571513081984520f, -0.281464937925757940f, 0.959355349953930790f, 
+  -0.282200837197147560f, 
+  0.959138622461841890f, -0.282936570457055390f, 0.958921330733213170f, 
+  -0.283672137272668430f, 
+  0.958703474895871600f, -0.284407537211271880f, 0.958485055077976100f, 
+  -0.285142769840248670f, 
+  0.958266071408017670f, -0.285877834727080620f, 0.958046524014818600f, 
+  -0.286612731439347790f, 
+  0.957826413027532910f, -0.287347459544729510f, 0.957605738575646350f, 
+  -0.288082018611004130f, 
+  0.957384500788975860f, -0.288816408206049480f, 0.957162699797670210f, 
+  -0.289550627897843030f, 
+  0.956940335732208820f, -0.290284677254462330f, 0.956717408723403050f, 
+  -0.291018555844085090f, 
+  0.956493918902395100f, -0.291752263234989260f, 0.956269866400658030f, 
+  -0.292485798995553880f, 
+  0.956045251349996410f, -0.293219162694258630f, 0.955820073882545420f, 
+  -0.293952353899684660f, 
+  0.955594334130771110f, -0.294685372180514330f, 0.955368032227470350f, 
+  -0.295418217105532010f, 
+  0.955141168305770780f, -0.296150888243623790f, 0.954913742499130520f, 
+  -0.296883385163778270f, 
+  0.954685754941338340f, -0.297615707435086200f, 0.954457205766513490f, 
+  -0.298347854626741400f, 
+  0.954228095109105670f, -0.299079826308040480f, 0.953998423103894490f, 
+  -0.299811622048383350f, 
+  0.953768189885990330f, -0.300543241417273450f, 0.953537395590833280f, 
+  -0.301274683984317950f, 
+  0.953306040354193860f, -0.302005949319228080f, 0.953074124312172200f, 
+  -0.302737036991819140f, 
+  0.952841647601198720f, -0.303467946572011320f, 0.952608610358033350f, 
+  -0.304198677629829110f, 
+  0.952375012719765880f, -0.304929229735402370f, 0.952140854823815830f, 
+  -0.305659602458966120f, 
+  0.951906136807932350f, -0.306389795370860920f, 0.951670858810193860f, 
+  -0.307119808041533100f, 
+  0.951435020969008340f, -0.307849640041534870f, 0.951198623423113230f, 
+  -0.308579290941525090f, 
+  0.950961666311575080f, -0.309308760312268730f, 0.950724149773789610f, 
+  -0.310038047724637890f, 
+  0.950486073949481700f, -0.310767152749611470f, 0.950247438978705230f, 
+  -0.311496074958275910f, 
+  0.950008245001843000f, -0.312224813921824880f, 0.949768492159606680f, 
+  -0.312953369211560200f, 
+  0.949528180593036670f, -0.313681740398891520f, 0.949287310443502120f, 
+  -0.314409927055336660f, 
+  0.949045881852700560f, -0.315137928752522440f, 0.948803894962658490f, 
+  -0.315865745062183960f, 
+  0.948561349915730270f, -0.316593375556165850f, 0.948318246854599090f, 
+  -0.317320819806421740f, 
+  0.948074585922276230f, -0.318048077385014950f, 0.947830367262101010f, 
+  -0.318775147864118480f, 
+  0.947585591017741090f, -0.319502030816015690f, 0.947340257333192050f, 
+  -0.320228725813099860f, 
+  0.947094366352777220f, -0.320955232427875210f, 0.946847918221148000f, 
+  -0.321681550232956580f, 
+  0.946600913083283530f, -0.322407678801069850f, 0.946353351084490590f, 
+  -0.323133617705052330f, 
+  0.946105232370403450f, -0.323859366517852850f, 0.945856557086983910f, 
+  -0.324584924812532150f, 
+  0.945607325380521280f, -0.325310292162262930f, 0.945357537397632290f, 
+  -0.326035468140330240f, 
+  0.945107193285260610f, -0.326760452320131730f, 0.944856293190677210f, 
+  -0.327485244275178000f, 
+  0.944604837261480260f, -0.328209843579092500f, 0.944352825645594750f, 
+  -0.328934249805612200f, 
+  0.944100258491272660f, -0.329658462528587490f, 0.943847135947092690f, 
+  -0.330382481321982780f, 
+  0.943593458161960390f, -0.331106305759876430f, 0.943339225285107720f, 
+  -0.331829935416461110f, 
+  0.943084437466093490f, -0.332553369866044220f, 0.942829094854802710f, 
+  -0.333276608683047930f, 
+  0.942573197601446870f, -0.333999651442009380f, 0.942316745856563780f, 
+  -0.334722497717581220f, 
+  0.942059739771017310f, -0.335445147084531600f, 0.941802179495997650f, 
+  -0.336167599117744520f, 
+  0.941544065183020810f, -0.336889853392220050f, 0.941285396983928660f, 
+  -0.337611909483074620f, 
+  0.941026175050889260f, -0.338333766965541130f, 0.940766399536396070f, 
+  -0.339055425414969640f, 
+  0.940506070593268300f, -0.339776884406826850f, 0.940245188374650880f, 
+  -0.340498143516697160f, 
+  0.939983753034014050f, -0.341219202320282360f, 0.939721764725153340f, 
+  -0.341940060393402190f, 
+  0.939459223602189920f, -0.342660717311994380f, 0.939196129819569900f, 
+  -0.343381172652115040f, 
+  0.938932483532064600f, -0.344101425989938810f, 0.938668284894770170f, 
+  -0.344821476901759290f, 
+  0.938403534063108060f, -0.345541324963989090f, 0.938138231192824360f, 
+  -0.346260969753160010f, 
+  0.937872376439989890f, -0.346980410845923680f, 0.937605969960999990f, 
+  -0.347699647819051380f, 
+  0.937339011912574960f, -0.348418680249434560f, 0.937071502451759190f, 
+  -0.349137507714084970f, 
+  0.936803441735921560f, -0.349856129790134920f, 0.936534829922755500f, 
+  -0.350574546054837510f, 
+  0.936265667170278260f, -0.351292756085567090f, 0.935995953636831410f, 
+  -0.352010759459819080f, 
+  0.935725689481080370f, -0.352728555755210730f, 0.935454874862014620f, 
+  -0.353446144549480810f, 
+  0.935183509938947610f, -0.354163525420490340f, 0.934911594871516090f, 
+  -0.354880697946222790f, 
+  0.934639129819680780f, -0.355597661704783850f, 0.934366114943725790f, 
+  -0.356314416274402410f, 
+  0.934092550404258980f, -0.357030961233429980f, 0.933818436362210960f, 
+  -0.357747296160341900f, 
+  0.933543772978836170f, -0.358463420633736540f, 0.933268560415712050f, 
+  -0.359179334232336500f, 
+  0.932992798834738960f, -0.359895036534988110f, 0.932716488398140250f, 
+  -0.360610527120662270f, 
+  0.932439629268462360f, -0.361325805568454280f, 0.932162221608574430f, 
+  -0.362040871457584180f, 
+  0.931884265581668150f, -0.362755724367397230f, 0.931605761351257830f, 
+  -0.363470363877363760f, 
+  0.931326709081180430f, -0.364184789567079890f, 0.931047108935595280f, 
+  -0.364899001016267320f, 
+  0.930766961078983710f, -0.365612997804773850f, 0.930486265676149780f, 
+  -0.366326779512573590f, 
+  0.930205022892219070f, -0.367040345719767180f, 0.929923232892639670f, 
+  -0.367753696006581980f, 
+  0.929640895843181330f, -0.368466829953372320f, 0.929358011909935500f, 
+  -0.369179747140620020f, 
+  0.929074581259315860f, -0.369892447148934100f, 0.928790604058057020f, 
+  -0.370604929559051670f, 
+  0.928506080473215590f, -0.371317193951837540f, 0.928221010672169440f, 
+  -0.372029239908285010f, 
+  0.927935394822617890f, -0.372741067009515760f, 0.927649233092581180f, 
+  -0.373452674836780300f, 
+  0.927362525650401110f, -0.374164062971457930f, 0.927075272664740100f, 
+  -0.374875230995057540f, 
+  0.926787474304581750f, -0.375586178489217220f, 0.926499130739230510f, 
+  -0.376296905035704790f, 
+  0.926210242138311380f, -0.377007410216418260f, 0.925920808671770070f, 
+  -0.377717693613385640f, 
+  0.925630830509872720f, -0.378427754808765560f, 0.925340307823206310f, 
+  -0.379137593384847320f, 
+  0.925049240782677580f, -0.379847208924051160f, 0.924757629559513910f, 
+  -0.380556601008928520f, 
+  0.924465474325262600f, -0.381265769222162380f, 0.924172775251791200f, 
+  -0.381974713146567220f, 
+  0.923879532511286740f, -0.382683432365089780f, 0.923585746276256670f, 
+  -0.383391926460808660f, 
+  0.923291416719527640f, -0.384100195016935040f, 0.922996544014246250f, 
+  -0.384808237616812880f, 
+  0.922701128333878630f, -0.385516053843918850f, 0.922405169852209880f, 
+  -0.386223643281862980f, 
+  0.922108668743345180f, -0.386931005514388580f, 0.921811625181708120f, 
+  -0.387638140125372730f, 
+  0.921514039342042010f, -0.388345046698826250f, 0.921215911399408730f, 
+  -0.389051724818894380f, 
+  0.920917241529189520f, -0.389758174069856410f, 0.920618029907083970f, 
+  -0.390464394036126590f, 
+  0.920318276709110590f, -0.391170384302253870f, 0.920017982111606570f, 
+  -0.391876144452922350f, 
+  0.919717146291227360f, -0.392581674072951470f, 0.919415769424947070f, 
+  -0.393286972747296400f, 
+  0.919113851690057770f, -0.393992040061048100f, 0.918811393264170050f, 
+  -0.394696875599433560f, 
+  0.918508394325212250f, -0.395401478947816350f, 0.918204855051430900f, 
+  -0.396105849691696270f, 
+  0.917900775621390500f, -0.396809987416710310f, 0.917596156213972950f, 
+  -0.397513891708632330f, 
+  0.917290997008377910f, -0.398217562153373560f, 0.916985298184123000f, 
+  -0.398920998336982910f, 
+  0.916679059921042700f, -0.399624199845646790f, 0.916372282399289140f, 
+  -0.400327166265690090f, 
+  0.916064965799331720f, -0.401029897183575620f, 0.915757110301956720f, 
+  -0.401732392185905010f, 
+  0.915448716088267830f, -0.402434650859418430f, 0.915139783339685260f, 
+  -0.403136672790995300f, 
+  0.914830312237946200f, -0.403838457567654070f, 0.914520302965104450f, 
+  -0.404540004776553000f, 
+  0.914209755703530690f, -0.405241314004989860f, 0.913898670635911680f, 
+  -0.405942384840402510f, 
+  0.913587047945250810f, -0.406643216870369030f, 0.913274887814867760f, 
+  -0.407343809682607970f, 
+  0.912962190428398210f, -0.408044162864978690f, 0.912648955969793900f, 
+  -0.408744276005481360f, 
+  0.912335184623322750f, -0.409444148692257590f, 0.912020876573568340f, 
+  -0.410143780513590240f, 
+  0.911706032005429880f, -0.410843171057903910f, 0.911390651104122430f, 
+  -0.411542319913765220f, 
+  0.911074734055176360f, -0.412241226669882890f, 0.910758281044437570f, 
+  -0.412939890915108080f, 
+  0.910441292258067250f, -0.413638312238434500f, 0.910123767882541680f, 
+  -0.414336490228999100f, 
+  0.909805708104652220f, -0.415034424476081630f, 0.909487113111505430f, 
+  -0.415732114569105360f, 
+  0.909167983090522380f, -0.416429560097637150f, 0.908848318229439120f, 
+  -0.417126760651387870f, 
+  0.908528118716306120f, -0.417823715820212270f, 0.908207384739488700f, 
+  -0.418520425194109700f, 
+  0.907886116487666260f, -0.419216888363223910f, 0.907564314149832630f, 
+  -0.419913104917843620f, 
+  0.907241977915295820f, -0.420609074448402510f, 0.906919107973678140f, 
+  -0.421304796545479640f, 
+  0.906595704514915330f, -0.422000270799799680f, 0.906271767729257660f, 
+  -0.422695496802232950f, 
+  0.905947297807268460f, -0.423390474143796050f, 0.905622294939825270f, 
+  -0.424085202415651560f, 
+  0.905296759318118820f, -0.424779681209108810f, 0.904970691133653250f, 
+  -0.425473910115623800f, 
+  0.904644090578246240f, -0.426167888726799620f, 0.904316957844028320f, 
+  -0.426861616634386430f, 
+  0.903989293123443340f, -0.427555093430282080f, 0.903661096609247980f, 
+  -0.428248318706531960f, 
+  0.903332368494511820f, -0.428941292055329490f, 0.903003108972617150f, 
+  -0.429634013069016380f, 
+  0.902673318237258830f, -0.430326481340082610f, 0.902342996482444200f, 
+  -0.431018696461167030f, 
+  0.902012143902493180f, -0.431710658025057260f, 0.901680760692037730f, 
+  -0.432402365624690140f, 
+  0.901348847046022030f, -0.433093818853151960f, 0.901016403159702330f, 
+  -0.433785017303678520f, 
+  0.900683429228646970f, -0.434475960569655650f, 0.900349925448735600f, 
+  -0.435166648244619260f, 
+  0.900015892016160280f, -0.435857079922255470f, 0.899681329127423930f, 
+  -0.436547255196401200f, 
+  0.899346236979341570f, -0.437237173661044090f, 0.899010615769039070f, 
+  -0.437926834910322860f, 
+  0.898674465693953820f, -0.438616238538527660f, 0.898337786951834310f, 
+  -0.439305384140099950f, 
+  0.898000579740739880f, -0.439994271309633260f, 0.897662844259040860f, 
+  -0.440682899641872900f, 
+  0.897324580705418320f, -0.441371268731716670f, 0.896985789278863970f, 
+  -0.442059378174214700f, 
+  0.896646470178680150f, -0.442747227564570020f, 0.896306623604479550f, 
+  -0.443434816498138480f, 
+  0.895966249756185220f, -0.444122144570429200f, 0.895625348834030110f, 
+  -0.444809211377104880f, 
+  0.895283921038557580f, -0.445496016513981740f, 0.894941966570620750f, 
+  -0.446182559577030070f, 
+  0.894599485631382700f, -0.446868840162374160f, 0.894256478422316040f, 
+  -0.447554857866293010f, 
+  0.893912945145203250f, -0.448240612285219890f, 0.893568886002135910f, 
+  -0.448926103015743260f, 
+  0.893224301195515320f, -0.449611329654606540f, 0.892879190928051680f, 
+  -0.450296291798708610f, 
+  0.892533555402764580f, -0.450980989045103860f, 0.892187394822982480f, 
+  -0.451665420991002490f, 
+  0.891840709392342720f, -0.452349587233770890f, 0.891493499314791380f, 
+  -0.453033487370931580f, 
+  0.891145764794583180f, -0.453717121000163870f, 0.890797506036281490f, 
+  -0.454400487719303580f, 
+  0.890448723244757880f, -0.455083587126343840f, 0.890099416625192320f, 
+  -0.455766418819434640f, 
+  0.889749586383072780f, -0.456448982396883920f, 0.889399232724195520f, 
+  -0.457131277457156980f, 
+  0.889048355854664570f, -0.457813303598877170f, 0.888696955980891600f, 
+  -0.458495060420826270f, 
+  0.888345033309596350f, -0.459176547521944090f, 0.887992588047805560f, 
+  -0.459857764501329540f, 
+  0.887639620402853930f, -0.460538710958240010f, 0.887286130582383150f, 
+  -0.461219386492092380f, 
+  0.886932118794342190f, -0.461899790702462730f, 0.886577585246987040f, 
+  -0.462579923189086810f, 
+  0.886222530148880640f, -0.463259783551860150f, 0.885866953708892790f, 
+  -0.463939371390838520f, 
+  0.885510856136199950f, -0.464618686306237820f, 0.885154237640285110f, 
+  -0.465297727898434600f, 
+  0.884797098430937790f, -0.465976495767966180f, 0.884439438718253810f, 
+  -0.466654989515530920f, 
+  0.884081258712634990f, -0.467333208741988420f, 0.883722558624789660f, 
+  -0.468011153048359830f, 
+  0.883363338665731580f, -0.468688822035827900f, 0.883003599046780830f, 
+  -0.469366215305737520f, 
+  0.882643339979562790f, -0.470043332459595620f, 0.882282561676008710f, 
+  -0.470720173099071600f, 
+  0.881921264348355050f, -0.471396736825997640f, 0.881559448209143780f, 
+  -0.472073023242368660f, 
+  0.881197113471222090f, -0.472749031950342790f, 0.880834260347742040f, 
+  -0.473424762552241530f, 
+  0.880470889052160750f, -0.474100214650549970f, 0.880106999798240360f, 
+  -0.474775387847917120f, 
+  0.879742592800047410f, -0.475450281747155870f, 0.879377668271953290f, 
+  -0.476124895951243580f, 
+  0.879012226428633530f, -0.476799230063322090f, 0.878646267485068130f, 
+  -0.477473283686698060f, 
+  0.878279791656541580f, -0.478147056424843010f, 0.877912799158641840f, 
+  -0.478820547881393890f, 
+  0.877545290207261350f, -0.479493757660153010f, 0.877177265018595940f, 
+  -0.480166685365088390f, 
+  0.876808723809145650f, -0.480839330600333960f, 0.876439666795713610f, 
+  -0.481511692970189860f, 
+  0.876070094195406600f, -0.482183772079122720f, 0.875700006225634600f, 
+  -0.482855567531765670f, 
+  0.875329403104110890f, -0.483527078932918740f, 0.874958285048851650f, 
+  -0.484198305887549030f, 
+  0.874586652278176110f, -0.484869248000791060f, 0.874214505010706300f, 
+  -0.485539904877946960f, 
+  0.873841843465366860f, -0.486210276124486420f, 0.873468667861384880f, 
+  -0.486880361346047340f, 
+  0.873094978418290090f, -0.487550160148436000f, 0.872720775355914300f, 
+  -0.488219672137626790f, 
+  0.872346058894391540f, -0.488888896919763170f, 0.871970829254157810f, 
+  -0.489557834101157440f, 
+  0.871595086655950980f, -0.490226483288291160f, 0.871218831320811020f, 
+  -0.490894844087815090f, 
+  0.870842063470078980f, -0.491562916106549900f, 0.870464783325397670f, 
+  -0.492230698951486020f, 
+  0.870086991108711460f, -0.492898192229784040f, 0.869708687042265670f, 
+  -0.493565395548774770f, 
+  0.869329871348606840f, -0.494232308515959670f, 0.868950544250582380f, 
+  -0.494898930739011260f, 
+  0.868570705971340900f, -0.495565261825772540f, 0.868190356734331310f, 
+  -0.496231301384258250f, 
+  0.867809496763303320f, -0.496897049022654470f, 0.867428126282306920f, 
+  -0.497562504349319150f, 
+  0.867046245515692650f, -0.498227666972781870f, 0.866663854688111130f, 
+  -0.498892536501744590f, 
+  0.866280954024512990f, -0.499557112545081840f, 0.865897543750148820f, 
+  -0.500221394711840680f, 
+  0.865513624090569090f, -0.500885382611240710f, 0.865129195271623800f, 
+  -0.501549075852675390f, 
+  0.864744257519462380f, -0.502212474045710790f, 0.864358811060534030f, 
+  -0.502875576800086990f, 
+  0.863972856121586810f, -0.503538383725717580f, 0.863586392929668100f, 
+  -0.504200894432690340f, 
+  0.863199421712124160f, -0.504863108531267590f, 0.862811942696600330f, 
+  -0.505525025631885390f, 
+  0.862423956111040610f, -0.506186645345155230f, 0.862035462183687210f, 
+  -0.506847967281863210f, 
+  0.861646461143081300f, -0.507508991052970870f, 0.861256953218062170f, 
+  -0.508169716269614600f, 
+  0.860866938637767310f, -0.508830142543106990f, 0.860476417631632070f, 
+  -0.509490269484936360f, 
+  0.860085390429390140f, -0.510150096706766810f, 0.859693857261072610f, 
+  -0.510809623820439040f, 
+  0.859301818357008470f, -0.511468850437970300f, 0.858909273947823900f, 
+  -0.512127776171554690f, 
+  0.858516224264442740f, -0.512786400633562960f, 0.858122669538086140f, 
+  -0.513444723436543460f, 
+  0.857728610000272120f, -0.514102744193221660f, 0.857334045882815590f, 
+  -0.514760462516501200f, 
+  0.856938977417828760f, -0.515417878019462930f, 0.856543404837719960f, 
+  -0.516074990315366630f, 
+  0.856147328375194470f, -0.516731799017649870f, 0.855750748263253920f, 
+  -0.517388303739929060f, 
+  0.855353664735196030f, -0.518044504095999340f, 0.854956078024614930f, 
+  -0.518700399699834950f, 
+  0.854557988365400530f, -0.519355990165589640f, 0.854159395991738850f, 
+  -0.520011275107596040f, 
+  0.853760301138111410f, -0.520666254140367160f, 0.853360704039295430f, 
+  -0.521320926878595660f, 
+  0.852960604930363630f, -0.521975292937154390f, 0.852560004046684080f, 
+  -0.522629351931096610f, 
+  0.852158901623919830f, -0.523283103475656430f, 0.851757297898029120f, 
+  -0.523936547186248600f, 
+  0.851355193105265200f, -0.524589682678468950f, 0.850952587482175730f, 
+  -0.525242509568094710f, 
+  0.850549481265603480f, -0.525895027471084630f, 0.850145874692685210f, 
+  -0.526547236003579440f, 
+  0.849741768000852550f, -0.527199134781901280f, 0.849337161427830780f, 
+  -0.527850723422555230f, 
+  0.848932055211639610f, -0.528502001542228480f, 0.848526449590592650f, 
+  -0.529152968757790610f, 
+  0.848120344803297230f, -0.529803624686294610f, 0.847713741088654380f, 
+  -0.530453968944976320f, 
+  0.847306638685858320f, -0.531104001151255000f, 0.846899037834397240f, 
+  -0.531753720922733320f, 
+  0.846490938774052130f, -0.532403127877197900f, 0.846082341744897050f, 
+  -0.533052221632619450f, 
+  0.845673246987299070f, -0.533701001807152960f, 0.845263654741918220f, 
+  -0.534349468019137520f, 
+  0.844853565249707120f, -0.534997619887097150f, 0.844442978751910660f, 
+  -0.535645457029741090f, 
+  0.844031895490066410f, -0.536292979065963180f, 0.843620315706004150f, 
+  -0.536940185614842910f, 
+  0.843208239641845440f, -0.537587076295645390f, 0.842795667540004120f, 
+  -0.538233650727821700f, 
+  0.842382599643185850f, -0.538879908531008420f, 0.841969036194387680f, 
+  -0.539525849325028890f, 
+  0.841554977436898440f, -0.540171472729892850f, 0.841140423614298080f, 
+  -0.540816778365796670f, 
+  0.840725374970458070f, -0.541461765853123440f, 0.840309831749540770f, 
+  -0.542106434812443920f, 
+  0.839893794195999520f, -0.542750784864515890f, 0.839477262554578550f, 
+  -0.543394815630284800f, 
+  0.839060237070312740f, -0.544038526730883820f, 0.838642717988527300f, 
+  -0.544681917787634530f, 
+  0.838224705554838080f, -0.545324988422046460f, 0.837806200015150940f, 
+  -0.545967738255817570f, 
+  0.837387201615661940f, -0.546610166910834860f, 0.836967710602857020f, 
+  -0.547252274009174090f, 
+  0.836547727223512010f, -0.547894059173100190f, 0.836127251724692270f, 
+  -0.548535522025067390f, 
+  0.835706284353752600f, -0.549176662187719660f, 0.835284825358337370f, 
+  -0.549817479283890910f, 
+  0.834862874986380010f, -0.550457972936604810f, 0.834440433486103190f, 
+  -0.551098142769075430f, 
+  0.834017501106018130f, -0.551737988404707340f, 0.833594078094925140f, 
+  -0.552377509467096070f, 
+  0.833170164701913190f, -0.553016705580027470f, 0.832745761176359460f, 
+  -0.553655576367479310f, 
+  0.832320867767929680f, -0.554294121453620000f, 0.831895484726577590f, 
+  -0.554932340462810370f, 
+  0.831469612302545240f, -0.555570233019602180f, 0.831043250746362320f, 
+  -0.556207798748739930f, 
+  0.830616400308846310f, -0.556845037275160100f, 0.830189061241102370f, 
+  -0.557481948223991550f, 
+  0.829761233794523050f, -0.558118531220556100f, 0.829332918220788250f, 
+  -0.558754785890368310f, 
+  0.828904114771864870f, -0.559390711859136140f, 0.828474823700007130f, 
+  -0.560026308752760380f, 
+  0.828045045257755800f, -0.560661576197336030f, 0.827614779697938400f, 
+  -0.561296513819151470f, 
+  0.827184027273669130f, -0.561931121244689470f, 0.826752788238348520f, 
+  -0.562565398100626560f, 
+  0.826321062845663530f, -0.563199344013834090f, 0.825888851349586780f, 
+  -0.563832958611378170f, 
+  0.825456154004377550f, -0.564466241520519500f, 0.825022971064580220f, 
+  -0.565099192368713980f, 
+  0.824589302785025290f, -0.565731810783613120f, 0.824155149420828570f, 
+  -0.566364096393063840f, 
+  0.823720511227391430f, -0.566996048825108680f, 0.823285388460400110f, 
+  -0.567627667707986230f, 
+  0.822849781375826430f, -0.568258952670131490f, 0.822413690229926390f, 
+  -0.568889903340175860f, 
+  0.821977115279241550f, -0.569520519346947140f, 0.821540056780597610f, 
+  -0.570150800319470300f, 
+  0.821102514991104650f, -0.570780745886967260f, 0.820664490168157460f, 
+  -0.571410355678857230f, 
+  0.820225982569434690f, -0.572039629324757050f, 0.819786992452898990f, 
+  -0.572668566454481160f, 
+  0.819347520076796900f, -0.573297166698042200f, 0.818907565699658950f, 
+  -0.573925429685650750f, 
+  0.818467129580298660f, -0.574553355047715760f, 0.818026211977813440f, 
+  -0.575180942414845080f, 
+  0.817584813151583710f, -0.575808191417845340f, 0.817142933361272970f, 
+  -0.576435101687721830f, 
+  0.816700572866827850f, -0.577061672855679440f, 0.816257731928477390f, 
+  -0.577687904553122800f, 
+  0.815814410806733780f, -0.578313796411655590f, 0.815370609762391290f, 
+  -0.578939348063081780f, 
+  0.814926329056526620f, -0.579564559139405630f, 0.814481568950498610f, 
+  -0.580189429272831680f, 
+  0.814036329705948410f, -0.580813958095764530f, 0.813590611584798510f, 
+  -0.581438145240810170f, 
+  0.813144414849253590f, -0.582061990340775440f, 0.812697739761799490f, 
+  -0.582685493028668460f, 
+  0.812250586585203880f, -0.583308652937698290f, 0.811802955582515470f, 
+  -0.583931469701276180f, 
+  0.811354847017063730f, -0.584553942953015330f, 0.810906261152459670f, 
+  -0.585176072326730410f, 
+  0.810457198252594770f, -0.585797857456438860f, 0.810007658581641140f, 
+  -0.586419297976360500f, 
+  0.809557642404051260f, -0.587040393520917970f, 0.809107149984558240f, 
+  -0.587661143724736660f, 
+  0.808656181588174980f, -0.588281548222645220f, 0.808204737480194720f, 
+  -0.588901606649675720f, 
+  0.807752817926190360f, -0.589521318641063940f, 0.807300423192014450f, 
+  -0.590140683832248820f, 
+  0.806847553543799330f, -0.590759701858874160f, 0.806394209247956240f, 
+  -0.591378372356787580f, 
+  0.805940390571176280f, -0.591996694962040990f, 0.805486097780429230f, 
+  -0.592614669310891130f, 
+  0.805031331142963660f, -0.593232295039799800f, 0.804576090926307110f, 
+  -0.593849571785433630f, 
+  0.804120377398265810f, -0.594466499184664430f, 0.803664190826924090f, 
+  -0.595083076874569960f, 
+  0.803207531480644940f, -0.595699304492433360f, 0.802750399628069160f, 
+  -0.596315181675743710f, 
+  0.802292795538115720f, -0.596930708062196500f, 0.801834719479981310f, 
+  -0.597545883289693160f, 
+  0.801376171723140240f, -0.598160706996342270f, 0.800917152537344300f, 
+  -0.598775178820458720f, 
+  0.800457662192622820f, -0.599389298400564540f, 0.799997700959281910f, 
+  -0.600003065375388940f, 
+  0.799537269107905010f, -0.600616479383868970f, 0.799076366909352350f, 
+  -0.601229540065148500f, 
+  0.798614994634760820f, -0.601842247058580030f, 0.798153152555543750f, 
+  -0.602454600003723750f, 
+  0.797690840943391160f, -0.603066598540348160f, 0.797228060070268810f, 
+  -0.603678242308430370f, 
+  0.796764810208418830f, -0.604289530948155960f, 0.796301091630359110f, 
+  -0.604900464099919820f, 
+  0.795836904608883570f, -0.605511041404325550f, 0.795372249417061310f, 
+  -0.606121262502186120f, 
+  0.794907126328237010f, -0.606731127034524480f, 0.794441535616030590f, 
+  -0.607340634642572930f, 
+  0.793975477554337170f, -0.607949784967773630f, 0.793508952417326660f, 
+  -0.608558577651779450f, 
+  0.793041960479443640f, -0.609167012336453210f, 0.792574502015407690f, 
+  -0.609775088663868430f, 
+  0.792106577300212390f, -0.610382806276309480f, 0.791638186609125880f, 
+  -0.610990164816271660f, 
+  0.791169330217690200f, -0.611597163926461910f, 0.790700008401721610f, 
+  -0.612203803249797950f, 
+  0.790230221437310030f, -0.612810082429409710f, 0.789759969600819070f, 
+  -0.613416001108638590f, 
+  0.789289253168885650f, -0.614021558931038380f, 0.788818072418420280f, 
+  -0.614626755540375050f, 
+  0.788346427626606340f, -0.615231590580626820f, 0.787874319070900220f, 
+  -0.615836063695985090f, 
+  0.787401747029031430f, -0.616440174530853650f, 0.786928711779001810f, 
+  -0.617043922729849760f, 
+  0.786455213599085770f, -0.617647307937803870f, 0.785981252767830150f, 
+  -0.618250329799760250f, 
+  0.785506829564053930f, -0.618852987960976320f, 0.785031944266848080f, 
+  -0.619455282066924020f, 
+  0.784556597155575240f, -0.620057211763289100f, 0.784080788509869950f, 
+  -0.620658776695972140f, 
+  0.783604518609638200f, -0.621259976511087550f, 0.783127787735057310f, 
+  -0.621860810854965360f, 
+  0.782650596166575730f, -0.622461279374149970f, 0.782172944184913010f, 
+  -0.623061381715401260f, 
+  0.781694832071059390f, -0.623661117525694530f, 0.781216260106276090f, 
+  -0.624260486452220650f, 
+  0.780737228572094490f, -0.624859488142386340f, 0.780257737750316590f, 
+  -0.625458122243814360f, 
+  0.779777787923014550f, -0.626056388404343520f, 0.779297379372530300f, 
+  -0.626654286272029350f, 
+  0.778816512381475980f, -0.627251815495144080f, 0.778335187232733210f, 
+  -0.627848975722176460f, 
+  0.777853404209453150f, -0.628445766601832710f, 0.777371163595056310f, 
+  -0.629042187783036000f, 
+  0.776888465673232440f, -0.629638238914926980f, 0.776405310727940390f, 
+  -0.630233919646864370f, 
+  0.775921699043407690f, -0.630829229628424470f, 0.775437630904130540f, 
+  -0.631424168509401860f, 
+  0.774953106594873930f, -0.632018735939809060f, 0.774468126400670860f, 
+  -0.632612931569877410f, 
+  0.773982690606822900f, -0.633206755050057190f, 0.773496799498899050f, 
+  -0.633800206031017280f, 
+  0.773010453362736990f, -0.634393284163645490f, 0.772523652484441330f, 
+  -0.634985989099049460f, 
+  0.772036397150384520f, -0.635578320488556110f, 0.771548687647206300f, 
+  -0.636170277983712170f, 
+  0.771060524261813820f, -0.636761861236284200f, 0.770571907281380810f, 
+  -0.637353069898259130f, 
+  0.770082836993347900f, -0.637943903621844060f, 0.769593313685422940f, 
+  -0.638534362059466790f, 
+  0.769103337645579700f, -0.639124444863775730f, 0.768612909162058380f, 
+  -0.639714151687640450f, 
+  0.768122028523365420f, -0.640303482184151670f, 0.767630696018273380f, 
+  -0.640892436006621380f, 
+  0.767138911935820400f, -0.641481012808583160f, 0.766646676565310380f, 
+  -0.642069212243792540f, 
+  0.766153990196312920f, -0.642657033966226860f, 0.765660853118662500f, 
+  -0.643244477630085850f, 
+  0.765167265622458960f, -0.643831542889791390f, 0.764673227998067140f, 
+  -0.644418229399988380f, 
+  0.764178740536116670f, -0.645004536815543930f, 0.763683803527501870f, 
+  -0.645590464791548690f, 
+  0.763188417263381270f, -0.646176012983316280f, 0.762692582035177980f, 
+  -0.646761181046383920f, 
+  0.762196298134578900f, -0.647345968636512060f, 0.761699565853535380f, 
+  -0.647930375409685340f, 
+  0.761202385484261780f, -0.648514401022112440f, 0.760704757319236920f, 
+  -0.649098045130225950f, 
+  0.760206681651202420f, -0.649681307390683190f, 0.759708158773163440f, 
+  -0.650264187460365850f, 
+  0.759209188978388070f, -0.650846684996380880f, 0.758709772560407390f, 
+  -0.651428799656059820f, 
+  0.758209909813015280f, -0.652010531096959500f, 0.757709601030268080f, 
+  -0.652591878976862440f, 
+  0.757208846506484570f, -0.653172842953776760f, 0.756707646536245670f, 
+  -0.653753422685936060f, 
+  0.756206001414394540f, -0.654333617831800440f, 0.755703911436035880f, 
+  -0.654913428050056030f, 
+  0.755201376896536550f, -0.655492852999615350f, 0.754698398091524500f, 
+  -0.656071892339617600f, 
+  0.754194975316889170f, -0.656650545729428940f, 0.753691108868781210f, 
+  -0.657228812828642540f, 
+  0.753186799043612520f, -0.657806693297078640f, 0.752682046138055340f, 
+  -0.658384186794785050f, 
+  0.752176850449042810f, -0.658961292982037320f, 0.751671212273768430f, 
+  -0.659538011519338660f, 
+  0.751165131909686480f, -0.660114342067420480f, 0.750658609654510700f, 
+  -0.660690284287242300f, 
+  0.750151645806215070f, -0.661265837839992270f, 0.749644240663033480f, 
+  -0.661841002387086870f, 
+  0.749136394523459370f, -0.662415777590171780f, 0.748628107686245440f, 
+  -0.662990163111121470f, 
+  0.748119380450403600f, -0.663564158612039770f, 0.747610213115205150f, 
+  -0.664137763755260010f, 
+  0.747100605980180130f, -0.664710978203344790f, 0.746590559345117310f, 
+  -0.665283801619087180f, 
+  0.746080073510063780f, -0.665856233665509720f, 0.745569148775325430f, 
+  -0.666428274005865240f, 
+  0.745057785441466060f, -0.666999922303637470f, 0.744545983809307370f, 
+  -0.667571178222540310f, 
+  0.744033744179929290f, -0.668142041426518450f, 0.743521066854669120f, 
+  -0.668712511579747980f, 
+  0.743007952135121720f, -0.669282588346636010f, 0.742494400323139180f, 
+  -0.669852271391821020f, 
+  0.741980411720831070f, -0.670421560380173090f, 0.741465986630563290f, 
+  -0.670990454976794220f, 
+  0.740951125354959110f, -0.671558954847018330f, 0.740435828196898020f, 
+  -0.672127059656411730f, 
+  0.739920095459516200f, -0.672694769070772860f, 0.739403927446205760f, 
+  -0.673262082756132970f, 
+  0.738887324460615110f, -0.673829000378756040f, 0.738370286806648620f, 
+  -0.674395521605139050f, 
+  0.737852814788465980f, -0.674961646102011930f, 0.737334908710482910f, 
+  -0.675527373536338520f, 
+  0.736816568877369900f, -0.676092703575315920f, 0.736297795594053170f, 
+  -0.676657635886374950f, 
+  0.735778589165713590f, -0.677222170137180330f, 0.735258949897786840f, 
+  -0.677786305995631500f, 
+  0.734738878095963500f, -0.678350043129861470f, 0.734218374066188280f, 
+  -0.678913381208238410f, 
+  0.733697438114660370f, -0.679476319899364970f, 0.733176070547832740f, 
+  -0.680038858872078930f, 
+  0.732654271672412820f, -0.680600997795453020f, 0.732132041795361290f, 
+  -0.681162736338795430f, 
+  0.731609381223892630f, -0.681724074171649710f, 0.731086290265474340f, 
+  -0.682285010963795570f, 
+  0.730562769227827590f, -0.682845546385248080f, 0.730038818418926260f, 
+  -0.683405680106258680f, 
+  0.729514438146997010f, -0.683965411797315400f, 0.728989628720519420f, 
+  -0.684524741129142300f, 
+  0.728464390448225200f, -0.685083667772700360f, 0.727938723639098620f, 
+  -0.685642191399187470f, 
+  0.727412628602375770f, -0.686200311680038590f, 0.726886105647544970f, 
+  -0.686758028286925890f, 
+  0.726359155084346010f, -0.687315340891759050f, 0.725831777222770370f, 
+  -0.687872249166685550f, 
+  0.725303972373060770f, -0.688428752784090440f, 0.724775740845711280f, 
+  -0.688984851416597040f, 
+  0.724247082951467000f, -0.689540544737066830f, 0.723717999001323500f, 
+  -0.690095832418599950f, 
+  0.723188489306527460f, -0.690650714134534600f, 0.722658554178575610f, 
+  -0.691205189558448450f, 
+  0.722128193929215350f, -0.691759258364157750f, 0.721597408870443770f, 
+  -0.692312920225718220f, 
+  0.721066199314508110f, -0.692866174817424630f, 0.720534565573905270f, 
+  -0.693419021813811760f, 
+  0.720002507961381650f, -0.693971460889654000f, 0.719470026789932990f, 
+  -0.694523491719965520f, 
+  0.718937122372804490f, -0.695075113980000880f, 0.718403795023489830f, 
+  -0.695626327345254870f, 
+  0.717870045055731710f, -0.696177131491462990f, 0.717335872783521730f, 
+  -0.696727526094601200f, 
+  0.716801278521099540f, -0.697277510830886520f, 0.716266262582953120f, 
+  -0.697827085376777290f, 
+  0.715730825283818590f, -0.698376249408972920f, 0.715194966938680120f, 
+  -0.698925002604414150f, 
+  0.714658687862769090f, -0.699473344640283770f, 0.714121988371564820f, 
+  -0.700021275194006250f, 
+  0.713584868780793640f, -0.700568793943248340f, 0.713047329406429340f, 
+  -0.701115900565918660f, 
+  0.712509370564692320f, -0.701662594740168450f, 0.711970992572050100f, 
+  -0.702208876144391870f, 
+  0.711432195745216430f, -0.702754744457225300f, 0.710892980401151680f, 
+  -0.703300199357548730f, 
+  0.710353346857062420f, -0.703845240524484940f, 0.709813295430400840f, 
+  -0.704389867637400410f, 
+  0.709272826438865690f, -0.704934080375904880f, 0.708731940200400650f, 
+  -0.705477878419852100f, 
+  0.708190637033195400f, -0.706021261449339740f, 0.707648917255684350f, 
+  -0.706564229144709510f, 
+  0.707106781186547570f, -0.707106781186547460f, 0.706564229144709620f, 
+  -0.707648917255684350f, 
+  0.706021261449339740f, -0.708190637033195290f, 0.705477878419852210f, 
+  -0.708731940200400650f, 
+  0.704934080375904990f, -0.709272826438865580f, 0.704389867637400410f, 
+  -0.709813295430400840f, 
+  0.703845240524484940f, -0.710353346857062310f, 0.703300199357548730f, 
+  -0.710892980401151680f, 
+  0.702754744457225300f, -0.711432195745216430f, 0.702208876144391870f, 
+  -0.711970992572049990f, 
+  0.701662594740168570f, -0.712509370564692320f, 0.701115900565918660f, 
+  -0.713047329406429230f, 
+  0.700568793943248450f, -0.713584868780793520f, 0.700021275194006360f, 
+  -0.714121988371564710f, 
+  0.699473344640283770f, -0.714658687862768980f, 0.698925002604414150f, 
+  -0.715194966938680010f, 
+  0.698376249408972920f, -0.715730825283818590f, 0.697827085376777290f, 
+  -0.716266262582953120f, 
+  0.697277510830886630f, -0.716801278521099540f, 0.696727526094601200f, 
+  -0.717335872783521730f, 
+  0.696177131491462990f, -0.717870045055731710f, 0.695626327345254870f, 
+  -0.718403795023489720f, 
+  0.695075113980000880f, -0.718937122372804380f, 0.694523491719965520f, 
+  -0.719470026789932990f, 
+  0.693971460889654000f, -0.720002507961381650f, 0.693419021813811880f, 
+  -0.720534565573905270f, 
+  0.692866174817424740f, -0.721066199314508110f, 0.692312920225718220f, 
+  -0.721597408870443660f, 
+  0.691759258364157750f, -0.722128193929215350f, 0.691205189558448450f, 
+  -0.722658554178575610f, 
+  0.690650714134534720f, -0.723188489306527350f, 0.690095832418599950f, 
+  -0.723717999001323390f, 
+  0.689540544737066940f, -0.724247082951466890f, 0.688984851416597150f, 
+  -0.724775740845711280f, 
+  0.688428752784090550f, -0.725303972373060660f, 0.687872249166685550f, 
+  -0.725831777222770370f, 
+  0.687315340891759160f, -0.726359155084346010f, 0.686758028286925890f, 
+  -0.726886105647544970f, 
+  0.686200311680038700f, -0.727412628602375770f, 0.685642191399187470f, 
+  -0.727938723639098620f, 
+  0.685083667772700360f, -0.728464390448225200f, 0.684524741129142300f, 
+  -0.728989628720519310f, 
+  0.683965411797315510f, -0.729514438146996900f, 0.683405680106258790f, 
+  -0.730038818418926150f, 
+  0.682845546385248080f, -0.730562769227827590f, 0.682285010963795570f, 
+  -0.731086290265474230f, 
+  0.681724074171649820f, -0.731609381223892520f, 0.681162736338795430f, 
+  -0.732132041795361290f, 
+  0.680600997795453130f, -0.732654271672412820f, 0.680038858872079040f, 
+  -0.733176070547832740f, 
+  0.679476319899365080f, -0.733697438114660260f, 0.678913381208238410f, 
+  -0.734218374066188170f, 
+  0.678350043129861580f, -0.734738878095963390f, 0.677786305995631500f, 
+  -0.735258949897786730f, 
+  0.677222170137180450f, -0.735778589165713480f, 0.676657635886374950f, 
+  -0.736297795594053060f, 
+  0.676092703575316030f, -0.736816568877369790f, 0.675527373536338630f, 
+  -0.737334908710482790f, 
+  0.674961646102012040f, -0.737852814788465980f, 0.674395521605139050f, 
+  -0.738370286806648510f, 
+  0.673829000378756150f, -0.738887324460615110f, 0.673262082756132970f, 
+  -0.739403927446205760f, 
+  0.672694769070772970f, -0.739920095459516090f, 0.672127059656411840f, 
+  -0.740435828196898020f, 
+  0.671558954847018330f, -0.740951125354959110f, 0.670990454976794220f, 
+  -0.741465986630563290f, 
+  0.670421560380173090f, -0.741980411720830960f, 0.669852271391821130f, 
+  -0.742494400323139180f, 
+  0.669282588346636010f, -0.743007952135121720f, 0.668712511579748090f, 
+  -0.743521066854669120f, 
+  0.668142041426518560f, -0.744033744179929180f, 0.667571178222540310f, 
+  -0.744545983809307250f, 
+  0.666999922303637470f, -0.745057785441465950f, 0.666428274005865350f, 
+  -0.745569148775325430f, 
+  0.665856233665509720f, -0.746080073510063780f, 0.665283801619087180f, 
+  -0.746590559345117310f, 
+  0.664710978203344900f, -0.747100605980180130f, 0.664137763755260010f, 
+  -0.747610213115205150f, 
+  0.663564158612039880f, -0.748119380450403490f, 0.662990163111121470f, 
+  -0.748628107686245330f, 
+  0.662415777590171780f, -0.749136394523459260f, 0.661841002387086870f, 
+  -0.749644240663033480f, 
+  0.661265837839992270f, -0.750151645806214960f, 0.660690284287242300f, 
+  -0.750658609654510590f, 
+  0.660114342067420480f, -0.751165131909686370f, 0.659538011519338770f, 
+  -0.751671212273768430f, 
+  0.658961292982037320f, -0.752176850449042700f, 0.658384186794785050f, 
+  -0.752682046138055230f, 
+  0.657806693297078640f, -0.753186799043612410f, 0.657228812828642650f, 
+  -0.753691108868781210f, 
+  0.656650545729429050f, -0.754194975316889170f, 0.656071892339617710f, 
+  -0.754698398091524390f, 
+  0.655492852999615460f, -0.755201376896536550f, 0.654913428050056150f, 
+  -0.755703911436035880f, 
+  0.654333617831800550f, -0.756206001414394540f, 0.653753422685936170f, 
+  -0.756707646536245670f, 
+  0.653172842953776760f, -0.757208846506484460f, 0.652591878976862550f, 
+  -0.757709601030268080f, 
+  0.652010531096959500f, -0.758209909813015280f, 0.651428799656059820f, 
+  -0.758709772560407390f, 
+  0.650846684996380990f, -0.759209188978387960f, 0.650264187460365960f, 
+  -0.759708158773163440f, 
+  0.649681307390683190f, -0.760206681651202420f, 0.649098045130226060f, 
+  -0.760704757319236920f, 
+  0.648514401022112550f, -0.761202385484261780f, 0.647930375409685460f, 
+  -0.761699565853535270f, 
+  0.647345968636512060f, -0.762196298134578900f, 0.646761181046383920f, 
+  -0.762692582035177870f, 
+  0.646176012983316390f, -0.763188417263381270f, 0.645590464791548800f, 
+  -0.763683803527501870f, 
+  0.645004536815544040f, -0.764178740536116670f, 0.644418229399988380f, 
+  -0.764673227998067140f, 
+  0.643831542889791500f, -0.765167265622458960f, 0.643244477630085850f, 
+  -0.765660853118662390f, 
+  0.642657033966226860f, -0.766153990196312810f, 0.642069212243792540f, 
+  -0.766646676565310380f, 
+  0.641481012808583160f, -0.767138911935820400f, 0.640892436006621380f, 
+  -0.767630696018273270f, 
+  0.640303482184151670f, -0.768122028523365310f, 0.639714151687640450f, 
+  -0.768612909162058270f, 
+  0.639124444863775730f, -0.769103337645579590f, 0.638534362059466790f, 
+  -0.769593313685422940f, 
+  0.637943903621844170f, -0.770082836993347900f, 0.637353069898259130f, 
+  -0.770571907281380700f, 
+  0.636761861236284200f, -0.771060524261813710f, 0.636170277983712170f, 
+  -0.771548687647206300f, 
+  0.635578320488556230f, -0.772036397150384410f, 0.634985989099049460f, 
+  -0.772523652484441330f, 
+  0.634393284163645490f, -0.773010453362736990f, 0.633800206031017280f, 
+  -0.773496799498899050f, 
+  0.633206755050057190f, -0.773982690606822790f, 0.632612931569877520f, 
+  -0.774468126400670860f, 
+  0.632018735939809060f, -0.774953106594873820f, 0.631424168509401860f, 
+  -0.775437630904130430f, 
+  0.630829229628424470f, -0.775921699043407580f, 0.630233919646864480f, 
+  -0.776405310727940390f, 
+  0.629638238914927100f, -0.776888465673232440f, 0.629042187783036000f, 
+  -0.777371163595056200f, 
+  0.628445766601832710f, -0.777853404209453040f, 0.627848975722176570f, 
+  -0.778335187232733090f, 
+  0.627251815495144190f, -0.778816512381475870f, 0.626654286272029460f, 
+  -0.779297379372530300f, 
+  0.626056388404343520f, -0.779777787923014440f, 0.625458122243814360f, 
+  -0.780257737750316590f, 
+  0.624859488142386450f, -0.780737228572094380f, 0.624260486452220650f, 
+  -0.781216260106276090f, 
+  0.623661117525694640f, -0.781694832071059390f, 0.623061381715401370f, 
+  -0.782172944184912900f, 
+  0.622461279374150080f, -0.782650596166575730f, 0.621860810854965360f, 
+  -0.783127787735057310f, 
+  0.621259976511087660f, -0.783604518609638200f, 0.620658776695972140f, 
+  -0.784080788509869950f, 
+  0.620057211763289210f, -0.784556597155575240f, 0.619455282066924020f, 
+  -0.785031944266848080f, 
+  0.618852987960976320f, -0.785506829564053930f, 0.618250329799760250f, 
+  -0.785981252767830150f, 
+  0.617647307937803980f, -0.786455213599085770f, 0.617043922729849760f, 
+  -0.786928711779001700f, 
+  0.616440174530853650f, -0.787401747029031320f, 0.615836063695985090f, 
+  -0.787874319070900110f, 
+  0.615231590580626820f, -0.788346427626606230f, 0.614626755540375050f, 
+  -0.788818072418420170f, 
+  0.614021558931038490f, -0.789289253168885650f, 0.613416001108638590f, 
+  -0.789759969600819070f, 
+  0.612810082429409710f, -0.790230221437310030f, 0.612203803249798060f, 
+  -0.790700008401721610f, 
+  0.611597163926462020f, -0.791169330217690090f, 0.610990164816271770f, 
+  -0.791638186609125770f, 
+  0.610382806276309480f, -0.792106577300212390f, 0.609775088663868430f, 
+  -0.792574502015407580f, 
+  0.609167012336453210f, -0.793041960479443640f, 0.608558577651779450f, 
+  -0.793508952417326660f, 
+  0.607949784967773740f, -0.793975477554337170f, 0.607340634642572930f, 
+  -0.794441535616030590f, 
+  0.606731127034524480f, -0.794907126328237010f, 0.606121262502186230f, 
+  -0.795372249417061190f, 
+  0.605511041404325550f, -0.795836904608883460f, 0.604900464099919930f, 
+  -0.796301091630359110f, 
+  0.604289530948156070f, -0.796764810208418720f, 0.603678242308430370f, 
+  -0.797228060070268700f, 
+  0.603066598540348280f, -0.797690840943391040f, 0.602454600003723860f, 
+  -0.798153152555543750f, 
+  0.601842247058580030f, -0.798614994634760820f, 0.601229540065148620f, 
+  -0.799076366909352350f, 
+  0.600616479383868970f, -0.799537269107905010f, 0.600003065375389060f, 
+  -0.799997700959281910f, 
+  0.599389298400564540f, -0.800457662192622710f, 0.598775178820458720f, 
+  -0.800917152537344300f, 
+  0.598160706996342380f, -0.801376171723140130f, 0.597545883289693270f, 
+  -0.801834719479981310f, 
+  0.596930708062196500f, -0.802292795538115720f, 0.596315181675743820f, 
+  -0.802750399628069160f, 
+  0.595699304492433470f, -0.803207531480644830f, 0.595083076874569960f, 
+  -0.803664190826924090f, 
+  0.594466499184664540f, -0.804120377398265700f, 0.593849571785433630f, 
+  -0.804576090926307000f, 
+  0.593232295039799800f, -0.805031331142963660f, 0.592614669310891130f, 
+  -0.805486097780429120f, 
+  0.591996694962040990f, -0.805940390571176280f, 0.591378372356787580f, 
+  -0.806394209247956240f, 
+  0.590759701858874280f, -0.806847553543799220f, 0.590140683832248940f, 
+  -0.807300423192014450f, 
+  0.589521318641063940f, -0.807752817926190360f, 0.588901606649675840f, 
+  -0.808204737480194720f, 
+  0.588281548222645330f, -0.808656181588174980f, 0.587661143724736770f, 
+  -0.809107149984558130f, 
+  0.587040393520918080f, -0.809557642404051260f, 0.586419297976360500f, 
+  -0.810007658581641140f, 
+  0.585797857456438860f, -0.810457198252594770f, 0.585176072326730410f, 
+  -0.810906261152459670f, 
+  0.584553942953015330f, -0.811354847017063730f, 0.583931469701276300f, 
+  -0.811802955582515360f, 
+  0.583308652937698290f, -0.812250586585203880f, 0.582685493028668460f, 
+  -0.812697739761799490f, 
+  0.582061990340775550f, -0.813144414849253590f, 0.581438145240810280f, 
+  -0.813590611584798510f, 
+  0.580813958095764530f, -0.814036329705948300f, 0.580189429272831680f, 
+  -0.814481568950498610f, 
+  0.579564559139405740f, -0.814926329056526620f, 0.578939348063081890f, 
+  -0.815370609762391290f, 
+  0.578313796411655590f, -0.815814410806733780f, 0.577687904553122800f, 
+  -0.816257731928477390f, 
+  0.577061672855679550f, -0.816700572866827850f, 0.576435101687721830f, 
+  -0.817142933361272970f, 
+  0.575808191417845340f, -0.817584813151583710f, 0.575180942414845190f, 
+  -0.818026211977813440f, 
+  0.574553355047715760f, -0.818467129580298660f, 0.573925429685650750f, 
+  -0.818907565699658950f, 
+  0.573297166698042320f, -0.819347520076796900f, 0.572668566454481160f, 
+  -0.819786992452898990f, 
+  0.572039629324757050f, -0.820225982569434690f, 0.571410355678857340f, 
+  -0.820664490168157460f, 
+  0.570780745886967370f, -0.821102514991104650f, 0.570150800319470300f, 
+  -0.821540056780597610f, 
+  0.569520519346947250f, -0.821977115279241550f, 0.568889903340175970f, 
+  -0.822413690229926390f, 
+  0.568258952670131490f, -0.822849781375826320f, 0.567627667707986230f, 
+  -0.823285388460400110f, 
+  0.566996048825108680f, -0.823720511227391320f, 0.566364096393063950f, 
+  -0.824155149420828570f, 
+  0.565731810783613230f, -0.824589302785025290f, 0.565099192368714090f, 
+  -0.825022971064580220f, 
+  0.564466241520519500f, -0.825456154004377440f, 0.563832958611378170f, 
+  -0.825888851349586780f, 
+  0.563199344013834090f, -0.826321062845663420f, 0.562565398100626560f, 
+  -0.826752788238348520f, 
+  0.561931121244689470f, -0.827184027273669020f, 0.561296513819151470f, 
+  -0.827614779697938400f, 
+  0.560661576197336030f, -0.828045045257755800f, 0.560026308752760380f, 
+  -0.828474823700007130f, 
+  0.559390711859136140f, -0.828904114771864870f, 0.558754785890368310f, 
+  -0.829332918220788250f, 
+  0.558118531220556100f, -0.829761233794523050f, 0.557481948223991660f, 
+  -0.830189061241102370f, 
+  0.556845037275160100f, -0.830616400308846200f, 0.556207798748739930f, 
+  -0.831043250746362320f, 
+  0.555570233019602290f, -0.831469612302545240f, 0.554932340462810370f, 
+  -0.831895484726577590f, 
+  0.554294121453620110f, -0.832320867767929680f, 0.553655576367479310f, 
+  -0.832745761176359460f, 
+  0.553016705580027580f, -0.833170164701913190f, 0.552377509467096070f, 
+  -0.833594078094925140f, 
+  0.551737988404707450f, -0.834017501106018130f, 0.551098142769075430f, 
+  -0.834440433486103190f, 
+  0.550457972936604810f, -0.834862874986380010f, 0.549817479283891020f, 
+  -0.835284825358337370f, 
+  0.549176662187719770f, -0.835706284353752600f, 0.548535522025067390f, 
+  -0.836127251724692160f, 
+  0.547894059173100190f, -0.836547727223511890f, 0.547252274009174090f, 
+  -0.836967710602857020f, 
+  0.546610166910834860f, -0.837387201615661940f, 0.545967738255817680f, 
+  -0.837806200015150940f, 
+  0.545324988422046460f, -0.838224705554837970f, 0.544681917787634530f, 
+  -0.838642717988527300f, 
+  0.544038526730883930f, -0.839060237070312630f, 0.543394815630284800f, 
+  -0.839477262554578550f, 
+  0.542750784864516000f, -0.839893794195999410f, 0.542106434812444030f, 
+  -0.840309831749540770f, 
+  0.541461765853123560f, -0.840725374970458070f, 0.540816778365796670f, 
+  -0.841140423614298080f, 
+  0.540171472729892970f, -0.841554977436898330f, 0.539525849325029010f, 
+  -0.841969036194387680f, 
+  0.538879908531008420f, -0.842382599643185960f, 0.538233650727821700f, 
+  -0.842795667540004120f, 
+  0.537587076295645510f, -0.843208239641845440f, 0.536940185614843020f, 
+  -0.843620315706004040f, 
+  0.536292979065963180f, -0.844031895490066410f, 0.535645457029741090f, 
+  -0.844442978751910660f, 
+  0.534997619887097260f, -0.844853565249707010f, 0.534349468019137520f, 
+  -0.845263654741918220f, 
+  0.533701001807152960f, -0.845673246987299070f, 0.533052221632619670f, 
+  -0.846082341744896940f, 
+  0.532403127877198010f, -0.846490938774052020f, 0.531753720922733320f, 
+  -0.846899037834397350f, 
+  0.531104001151255000f, -0.847306638685858320f, 0.530453968944976320f, 
+  -0.847713741088654270f, 
+  0.529803624686294830f, -0.848120344803297120f, 0.529152968757790720f, 
+  -0.848526449590592650f, 
+  0.528502001542228480f, -0.848932055211639610f, 0.527850723422555460f, 
+  -0.849337161427830670f, 
+  0.527199134781901390f, -0.849741768000852440f, 0.526547236003579330f, 
+  -0.850145874692685210f, 
+  0.525895027471084740f, -0.850549481265603370f, 0.525242509568094710f, 
+  -0.850952587482175730f, 
+  0.524589682678468840f, -0.851355193105265200f, 0.523936547186248600f, 
+  -0.851757297898029120f, 
+  0.523283103475656430f, -0.852158901623919830f, 0.522629351931096720f, 
+  -0.852560004046683970f, 
+  0.521975292937154390f, -0.852960604930363630f, 0.521320926878595550f, 
+  -0.853360704039295430f, 
+  0.520666254140367270f, -0.853760301138111300f, 0.520011275107596040f, 
+  -0.854159395991738730f, 
+  0.519355990165589530f, -0.854557988365400530f, 0.518700399699835170f, 
+  -0.854956078024614820f, 
+  0.518044504095999340f, -0.855353664735196030f, 0.517388303739929060f, 
+  -0.855750748263253920f, 
+  0.516731799017649980f, -0.856147328375194470f, 0.516074990315366630f, 
+  -0.856543404837719960f, 
+  0.515417878019463150f, -0.856938977417828650f, 0.514760462516501200f, 
+  -0.857334045882815590f, 
+  0.514102744193221660f, -0.857728610000272120f, 0.513444723436543570f, 
+  -0.858122669538086020f, 
+  0.512786400633563070f, -0.858516224264442740f, 0.512127776171554690f, 
+  -0.858909273947823900f, 
+  0.511468850437970520f, -0.859301818357008360f, 0.510809623820439040f, 
+  -0.859693857261072610f, 
+  0.510150096706766700f, -0.860085390429390140f, 0.509490269484936360f, 
+  -0.860476417631632070f, 
+  0.508830142543106990f, -0.860866938637767310f, 0.508169716269614710f, 
+  -0.861256953218062060f, 
+  0.507508991052970870f, -0.861646461143081300f, 0.506847967281863320f, 
+  -0.862035462183687210f, 
+  0.506186645345155450f, -0.862423956111040500f, 0.505525025631885510f, 
+  -0.862811942696600330f, 
+  0.504863108531267480f, -0.863199421712124160f, 0.504200894432690560f, 
+  -0.863586392929667990f, 
+  0.503538383725717580f, -0.863972856121586700f, 0.502875576800086880f, 
+  -0.864358811060534030f, 
+  0.502212474045710900f, -0.864744257519462380f, 0.501549075852675390f, 
+  -0.865129195271623690f, 
+  0.500885382611240940f, -0.865513624090568980f, 0.500221394711840680f, 
+  -0.865897543750148820f, 
+  0.499557112545081890f, -0.866280954024512990f, 0.498892536501744750f, 
+  -0.866663854688111020f, 
+  0.498227666972781870f, -0.867046245515692650f, 0.497562504349319090f, 
+  -0.867428126282306920f, 
+  0.496897049022654640f, -0.867809496763303210f, 0.496231301384258310f, 
+  -0.868190356734331310f, 
+  0.495565261825772490f, -0.868570705971340900f, 0.494898930739011310f, 
+  -0.868950544250582380f, 
+  0.494232308515959730f, -0.869329871348606730f, 0.493565395548774880f, 
+  -0.869708687042265560f, 
+  0.492898192229784090f, -0.870086991108711350f, 0.492230698951486080f, 
+  -0.870464783325397670f, 
+  0.491562916106550060f, -0.870842063470078860f, 0.490894844087815140f, 
+  -0.871218831320810900f, 
+  0.490226483288291100f, -0.871595086655951090f, 0.489557834101157550f, 
+  -0.871970829254157700f, 
+  0.488888896919763230f, -0.872346058894391540f, 0.488219672137626740f, 
+  -0.872720775355914300f, 
+  0.487550160148436050f, -0.873094978418290090f, 0.486880361346047400f, 
+  -0.873468667861384880f, 
+  0.486210276124486530f, -0.873841843465366750f, 0.485539904877947020f, 
+  -0.874214505010706300f, 
+  0.484869248000791120f, -0.874586652278176110f, 0.484198305887549140f, 
+  -0.874958285048851540f, 
+  0.483527078932918740f, -0.875329403104110780f, 0.482855567531765670f, 
+  -0.875700006225634600f, 
+  0.482183772079122830f, -0.876070094195406600f, 0.481511692970189920f, 
+  -0.876439666795713610f, 
+  0.480839330600333900f, -0.876808723809145760f, 0.480166685365088440f, 
+  -0.877177265018595940f, 
+  0.479493757660153010f, -0.877545290207261240f, 0.478820547881394050f, 
+  -0.877912799158641730f, 
+  0.478147056424843120f, -0.878279791656541460f, 0.477473283686698060f, 
+  -0.878646267485068130f, 
+  0.476799230063322250f, -0.879012226428633410f, 0.476124895951243630f, 
+  -0.879377668271953180f, 
+  0.475450281747155870f, -0.879742592800047410f, 0.474775387847917230f, 
+  -0.880106999798240360f, 
+  0.474100214650550020f, -0.880470889052160750f, 0.473424762552241530f, 
+  -0.880834260347742040f, 
+  0.472749031950342900f, -0.881197113471221980f, 0.472073023242368660f, 
+  -0.881559448209143780f, 
+  0.471396736825997810f, -0.881921264348354940f, 0.470720173099071710f, 
+  -0.882282561676008600f, 
+  0.470043332459595620f, -0.882643339979562790f, 0.469366215305737630f, 
+  -0.883003599046780720f, 
+  0.468688822035827960f, -0.883363338665731580f, 0.468011153048359830f, 
+  -0.883722558624789660f, 
+  0.467333208741988530f, -0.884081258712634990f, 0.466654989515530970f, 
+  -0.884439438718253700f, 
+  0.465976495767966130f, -0.884797098430937790f, 0.465297727898434650f, 
+  -0.885154237640285110f, 
+  0.464618686306237820f, -0.885510856136199950f, 0.463939371390838460f, 
+  -0.885866953708892790f, 
+  0.463259783551860260f, -0.886222530148880640f, 0.462579923189086810f, 
+  -0.886577585246987040f, 
+  0.461899790702462840f, -0.886932118794342080f, 0.461219386492092430f, 
+  -0.887286130582383150f, 
+  0.460538710958240010f, -0.887639620402853930f, 0.459857764501329650f, 
+  -0.887992588047805560f, 
+  0.459176547521944150f, -0.888345033309596240f, 0.458495060420826220f, 
+  -0.888696955980891710f, 
+  0.457813303598877290f, -0.889048355854664570f, 0.457131277457156980f, 
+  -0.889399232724195520f, 
+  0.456448982396883860f, -0.889749586383072890f, 0.455766418819434750f, 
+  -0.890099416625192210f, 
+  0.455083587126343840f, -0.890448723244757880f, 0.454400487719303750f, 
+  -0.890797506036281490f, 
+  0.453717121000163930f, -0.891145764794583180f, 0.453033487370931580f, 
+  -0.891493499314791380f, 
+  0.452349587233771000f, -0.891840709392342720f, 0.451665420991002540f, 
+  -0.892187394822982480f, 
+  0.450980989045103810f, -0.892533555402764690f, 0.450296291798708730f, 
+  -0.892879190928051680f, 
+  0.449611329654606600f, -0.893224301195515320f, 0.448926103015743260f, 
+  -0.893568886002136020f, 
+  0.448240612285220000f, -0.893912945145203250f, 0.447554857866293010f, 
+  -0.894256478422316040f, 
+  0.446868840162374330f, -0.894599485631382580f, 0.446182559577030120f, 
+  -0.894941966570620750f, 
+  0.445496016513981740f, -0.895283921038557580f, 0.444809211377105000f, 
+  -0.895625348834030000f, 
+  0.444122144570429260f, -0.895966249756185110f, 0.443434816498138430f, 
+  -0.896306623604479660f, 
+  0.442747227564570130f, -0.896646470178680150f, 0.442059378174214760f, 
+  -0.896985789278863970f, 
+  0.441371268731716620f, -0.897324580705418320f, 0.440682899641873020f, 
+  -0.897662844259040750f, 
+  0.439994271309633260f, -0.898000579740739880f, 0.439305384140100060f, 
+  -0.898337786951834190f, 
+  0.438616238538527710f, -0.898674465693953820f, 0.437926834910322860f, 
+  -0.899010615769039070f, 
+  0.437237173661044200f, -0.899346236979341460f, 0.436547255196401250f, 
+  -0.899681329127423930f, 
+  0.435857079922255470f, -0.900015892016160280f, 0.435166648244619370f, 
+  -0.900349925448735600f, 
+  0.434475960569655710f, -0.900683429228646860f, 0.433785017303678520f, 
+  -0.901016403159702330f, 
+  0.433093818853152010f, -0.901348847046022030f, 0.432402365624690140f, 
+  -0.901680760692037730f, 
+  0.431710658025057370f, -0.902012143902493070f, 0.431018696461167080f, 
+  -0.902342996482444200f, 
+  0.430326481340082610f, -0.902673318237258830f, 0.429634013069016500f, 
+  -0.903003108972617040f, 
+  0.428941292055329550f, -0.903332368494511820f, 0.428248318706531910f, 
+  -0.903661096609247980f, 
+  0.427555093430282200f, -0.903989293123443340f, 0.426861616634386490f, 
+  -0.904316957844028320f, 
+  0.426167888726799620f, -0.904644090578246240f, 0.425473910115623910f, 
+  -0.904970691133653250f, 
+  0.424779681209108810f, -0.905296759318118820f, 0.424085202415651670f, 
+  -0.905622294939825160f, 
+  0.423390474143796100f, -0.905947297807268460f, 0.422695496802232950f, 
+  -0.906271767729257660f, 
+  0.422000270799799790f, -0.906595704514915330f, 0.421304796545479700f, 
+  -0.906919107973678030f, 
+  0.420609074448402510f, -0.907241977915295930f, 0.419913104917843730f, 
+  -0.907564314149832520f, 
+  0.419216888363223960f, -0.907886116487666150f, 0.418520425194109700f, 
+  -0.908207384739488700f, 
+  0.417823715820212380f, -0.908528118716306120f, 0.417126760651387870f, 
+  -0.908848318229439120f, 
+  0.416429560097637320f, -0.909167983090522270f, 0.415732114569105420f, 
+  -0.909487113111505430f, 
+  0.415034424476081630f, -0.909805708104652220f, 0.414336490228999210f, 
+  -0.910123767882541570f, 
+  0.413638312238434560f, -0.910441292258067140f, 0.412939890915108020f, 
+  -0.910758281044437570f, 
+  0.412241226669883000f, -0.911074734055176250f, 0.411542319913765280f, 
+  -0.911390651104122320f, 
+  0.410843171057903910f, -0.911706032005429880f, 0.410143780513590350f, 
+  -0.912020876573568230f, 
+  0.409444148692257590f, -0.912335184623322750f, 0.408744276005481520f, 
+  -0.912648955969793900f, 
+  0.408044162864978740f, -0.912962190428398100f, 0.407343809682607970f, 
+  -0.913274887814867760f, 
+  0.406643216870369140f, -0.913587047945250810f, 0.405942384840402570f, 
+  -0.913898670635911680f, 
+  0.405241314004989860f, -0.914209755703530690f, 0.404540004776553110f, 
+  -0.914520302965104450f, 
+  0.403838457567654130f, -0.914830312237946090f, 0.403136672790995240f, 
+  -0.915139783339685260f, 
+  0.402434650859418540f, -0.915448716088267830f, 0.401732392185905010f, 
+  -0.915757110301956720f, 
+  0.401029897183575790f, -0.916064965799331610f, 0.400327166265690150f, 
+  -0.916372282399289140f, 
+  0.399624199845646790f, -0.916679059921042700f, 0.398920998336983020f, 
+  -0.916985298184122890f, 
+  0.398217562153373620f, -0.917290997008377910f, 0.397513891708632330f, 
+  -0.917596156213972950f, 
+  0.396809987416710420f, -0.917900775621390390f, 0.396105849691696320f, 
+  -0.918204855051430900f, 
+  0.395401478947816300f, -0.918508394325212250f, 0.394696875599433670f, 
+  -0.918811393264169940f, 
+  0.393992040061048100f, -0.919113851690057770f, 0.393286972747296570f, 
+  -0.919415769424946960f, 
+  0.392581674072951530f, -0.919717146291227360f, 0.391876144452922350f, 
+  -0.920017982111606570f, 
+  0.391170384302253980f, -0.920318276709110480f, 0.390464394036126650f, 
+  -0.920618029907083860f, 
+  0.389758174069856410f, -0.920917241529189520f, 0.389051724818894500f, 
+  -0.921215911399408730f, 
+  0.388345046698826300f, -0.921514039342041900f, 0.387638140125372680f, 
+  -0.921811625181708120f, 
+  0.386931005514388690f, -0.922108668743345070f, 0.386223643281862980f, 
+  -0.922405169852209880f, 
+  0.385516053843919020f, -0.922701128333878520f, 0.384808237616812930f, 
+  -0.922996544014246250f, 
+  0.384100195016935040f, -0.923291416719527640f, 0.383391926460808770f, 
+  -0.923585746276256560f, 
+  0.382683432365089840f, -0.923879532511286740f, 0.381974713146567220f, 
+  -0.924172775251791200f, 
+  0.381265769222162490f, -0.924465474325262600f, 0.380556601008928570f, 
+  -0.924757629559513910f, 
+  0.379847208924051110f, -0.925049240782677580f, 0.379137593384847430f, 
+  -0.925340307823206200f, 
+  0.378427754808765620f, -0.925630830509872720f, 0.377717693613385810f, 
+  -0.925920808671769960f, 
+  0.377007410216418310f, -0.926210242138311270f, 0.376296905035704790f, 
+  -0.926499130739230510f, 
+  0.375586178489217330f, -0.926787474304581750f, 0.374875230995057600f, 
+  -0.927075272664740100f, 
+  0.374164062971457990f, -0.927362525650401110f, 0.373452674836780410f, 
+  -0.927649233092581180f, 
+  0.372741067009515810f, -0.927935394822617890f, 0.372029239908284960f, 
+  -0.928221010672169440f, 
+  0.371317193951837600f, -0.928506080473215480f, 0.370604929559051670f, 
+  -0.928790604058057020f, 
+  0.369892447148934270f, -0.929074581259315750f, 0.369179747140620070f, 
+  -0.929358011909935500f, 
+  0.368466829953372320f, -0.929640895843181330f, 0.367753696006582090f, 
+  -0.929923232892639560f, 
+  0.367040345719767240f, -0.930205022892219070f, 0.366326779512573590f, 
+  -0.930486265676149780f, 
+  0.365612997804773960f, -0.930766961078983710f, 0.364899001016267380f, 
+  -0.931047108935595170f, 
+  0.364184789567079840f, -0.931326709081180430f, 0.363470363877363870f, 
+  -0.931605761351257830f, 
+  0.362755724367397230f, -0.931884265581668150f, 0.362040871457584350f, 
+  -0.932162221608574320f, 
+  0.361325805568454340f, -0.932439629268462360f, 0.360610527120662270f, 
+  -0.932716488398140250f, 
+  0.359895036534988280f, -0.932992798834738850f, 0.359179334232336560f, 
+  -0.933268560415712050f, 
+  0.358463420633736540f, -0.933543772978836170f, 0.357747296160342010f, 
+  -0.933818436362210960f, 
+  0.357030961233430030f, -0.934092550404258870f, 0.356314416274402360f, 
+  -0.934366114943725900f, 
+  0.355597661704783960f, -0.934639129819680780f, 0.354880697946222790f, 
+  -0.934911594871516090f, 
+  0.354163525420490510f, -0.935183509938947500f, 0.353446144549480870f, 
+  -0.935454874862014620f, 
+  0.352728555755210730f, -0.935725689481080370f, 0.352010759459819240f, 
+  -0.935995953636831300f, 
+  0.351292756085567150f, -0.936265667170278260f, 0.350574546054837570f, 
+  -0.936534829922755500f, 
+  0.349856129790135030f, -0.936803441735921560f, 0.349137507714085030f, 
+  -0.937071502451759190f, 
+  0.348418680249434510f, -0.937339011912574960f, 0.347699647819051490f, 
+  -0.937605969960999990f, 
+  0.346980410845923680f, -0.937872376439989890f, 0.346260969753160170f, 
+  -0.938138231192824360f, 
+  0.345541324963989150f, -0.938403534063108060f, 0.344821476901759290f, 
+  -0.938668284894770170f, 
+  0.344101425989938980f, -0.938932483532064490f, 0.343381172652115100f, 
+  -0.939196129819569900f, 
+  0.342660717311994380f, -0.939459223602189920f, 0.341940060393402300f, 
+  -0.939721764725153340f, 
+  0.341219202320282410f, -0.939983753034013940f, 0.340498143516697100f, 
+  -0.940245188374650880f, 
+  0.339776884406826960f, -0.940506070593268300f, 0.339055425414969640f, 
+  -0.940766399536396070f, 
+  0.338333766965541290f, -0.941026175050889260f, 0.337611909483074680f, 
+  -0.941285396983928660f, 
+  0.336889853392220050f, -0.941544065183020810f, 0.336167599117744690f, 
+  -0.941802179495997650f, 
+  0.335445147084531660f, -0.942059739771017310f, 0.334722497717581220f, 
+  -0.942316745856563780f, 
+  0.333999651442009490f, -0.942573197601446870f, 0.333276608683047980f, 
+  -0.942829094854802710f, 
+  0.332553369866044220f, -0.943084437466093490f, 0.331829935416461220f, 
+  -0.943339225285107720f, 
+  0.331106305759876430f, -0.943593458161960390f, 0.330382481321982950f, 
+  -0.943847135947092690f, 
+  0.329658462528587550f, -0.944100258491272660f, 0.328934249805612200f, 
+  -0.944352825645594750f, 
+  0.328209843579092660f, -0.944604837261480260f, 0.327485244275178060f, 
+  -0.944856293190677210f, 
+  0.326760452320131790f, -0.945107193285260610f, 0.326035468140330350f, 
+  -0.945357537397632290f, 
+  0.325310292162262980f, -0.945607325380521280f, 0.324584924812532150f, 
+  -0.945856557086983910f, 
+  0.323859366517852960f, -0.946105232370403340f, 0.323133617705052330f, 
+  -0.946353351084490590f, 
+  0.322407678801070020f, -0.946600913083283530f, 0.321681550232956640f, 
+  -0.946847918221148000f, 
+  0.320955232427875210f, -0.947094366352777220f, 0.320228725813100020f, 
+  -0.947340257333191940f, 
+  0.319502030816015750f, -0.947585591017741090f, 0.318775147864118480f, 
+  -0.947830367262101010f, 
+  0.318048077385015060f, -0.948074585922276230f, 0.317320819806421790f, 
+  -0.948318246854599090f, 
+  0.316593375556165850f, -0.948561349915730270f, 0.315865745062184070f, 
+  -0.948803894962658380f, 
+  0.315137928752522440f, -0.949045881852700560f, 0.314409927055336820f, 
+  -0.949287310443502010f, 
+  0.313681740398891570f, -0.949528180593036670f, 0.312953369211560200f, 
+  -0.949768492159606680f, 
+  0.312224813921825050f, -0.950008245001843000f, 0.311496074958275970f, 
+  -0.950247438978705230f, 
+  0.310767152749611470f, -0.950486073949481700f, 0.310038047724638000f, 
+  -0.950724149773789610f, 
+  0.309308760312268780f, -0.950961666311575080f, 0.308579290941525030f, 
+  -0.951198623423113230f, 
+  0.307849640041534980f, -0.951435020969008340f, 0.307119808041533100f, 
+  -0.951670858810193860f, 
+  0.306389795370861080f, -0.951906136807932230f, 0.305659602458966230f, 
+  -0.952140854823815830f, 
+  0.304929229735402430f, -0.952375012719765880f, 0.304198677629829270f, 
+  -0.952608610358033240f, 
+  0.303467946572011370f, -0.952841647601198720f, 0.302737036991819140f, 
+  -0.953074124312172200f, 
+  0.302005949319228200f, -0.953306040354193750f, 0.301274683984318000f, 
+  -0.953537395590833280f, 
+  0.300543241417273400f, -0.953768189885990330f, 0.299811622048383460f, 
+  -0.953998423103894490f, 
+  0.299079826308040480f, -0.954228095109105670f, 0.298347854626741570f, 
+  -0.954457205766513490f, 
+  0.297615707435086310f, -0.954685754941338340f, 0.296883385163778270f, 
+  -0.954913742499130520f, 
+  0.296150888243623960f, -0.955141168305770670f, 0.295418217105532070f, 
+  -0.955368032227470240f, 
+  0.294685372180514330f, -0.955594334130771110f, 0.293952353899684770f, 
+  -0.955820073882545420f, 
+  0.293219162694258680f, -0.956045251349996410f, 0.292485798995553830f, 
+  -0.956269866400658140f, 
+  0.291752263234989370f, -0.956493918902394990f, 0.291018555844085090f, 
+  -0.956717408723403050f, 
+  0.290284677254462330f, -0.956940335732208940f, 0.289550627897843140f, 
+  -0.957162699797670100f, 
+  0.288816408206049480f, -0.957384500788975860f, 0.288082018611004300f, 
+  -0.957605738575646240f, 
+  0.287347459544729570f, -0.957826413027532910f, 0.286612731439347790f, 
+  -0.958046524014818600f, 
+  0.285877834727080730f, -0.958266071408017670f, 0.285142769840248720f, 
+  -0.958485055077976100f, 
+  0.284407537211271820f, -0.958703474895871600f, 0.283672137272668550f, 
+  -0.958921330733213060f, 
+  0.282936570457055390f, -0.959138622461841890f, 0.282200837197147500f, 
+  -0.959355349953930790f, 
+  0.281464937925758050f, -0.959571513081984520f, 0.280728873075797190f, 
+  -0.959787111718839900f, 
+  0.279992643080273380f, -0.960002145737665850f, 0.279256248372291240f, 
+  -0.960216615011963430f, 
+  0.278519689385053060f, -0.960430519415565790f, 0.277782966551857800f, 
+  -0.960643858822638470f, 
+  0.277046080306099950f, -0.960856633107679660f, 0.276309031081271030f, 
+  -0.961068842145519350f, 
+  0.275571819310958250f, -0.961280485811320640f, 0.274834445428843940f, 
+  -0.961491563980579000f, 
+  0.274096909868706330f, -0.961702076529122540f, 0.273359213064418790f, 
+  -0.961912023333112100f, 
+  0.272621355449948980f, -0.962121404269041580f, 0.271883337459359890f, 
+  -0.962330219213737400f, 
+  0.271145159526808070f, -0.962538468044359160f, 0.270406822086544820f, 
+  -0.962746150638399410f, 
+  0.269668325572915200f, -0.962953266873683880f, 0.268929670420357310f, 
+  -0.963159816628371360f, 
+  0.268190857063403180f, -0.963365799780954050f, 0.267451885936677740f, 
+  -0.963571216210257210f, 
+  0.266712757474898420f, -0.963776065795439840f, 0.265973472112875530f, 
+  -0.963980348415994110f, 
+  0.265234030285511900f, -0.964184063951745720f, 0.264494432427801630f, 
+  -0.964387212282854290f, 
+  0.263754678974831510f, -0.964589793289812650f, 0.263014770361779060f, 
+  -0.964791806853447900f, 
+  0.262274707023913590f, -0.964993252854920320f, 0.261534489396595630f, 
+  -0.965194131175724720f, 
+  0.260794117915275570f, -0.965394441697689400f, 0.260053593015495130f, 
+  -0.965594184302976830f, 
+  0.259312915132886350f, -0.965793358874083570f, 0.258572084703170390f, 
+  -0.965991965293840570f, 
+  0.257831102162158930f, -0.966190003445412620f, 0.257089967945753230f, 
+  -0.966387473212298790f, 
+  0.256348682489942910f, -0.966584374478333120f, 0.255607246230807550f, 
+  -0.966780707127683270f, 
+  0.254865659604514630f, -0.966976471044852070f, 0.254123923047320620f, 
+  -0.967171666114676640f, 
+  0.253382036995570270f, -0.967366292222328510f, 0.252640001885695580f, 
+  -0.967560349253314360f, 
+  0.251897818154216910f, -0.967753837093475510f, 0.251155486237742030f, 
+  -0.967946755628987800f, 
+  0.250413006572965280f, -0.968139104746362330f, 0.249670379596668520f, 
+  -0.968330884332445300f, 
+  0.248927605745720260f, -0.968522094274417270f, 0.248184685457074780f, 
+  -0.968712734459794780f, 
+  0.247441619167773440f, -0.968902804776428870f, 0.246698407314942500f, 
+  -0.969092305112506100f, 
+  0.245955050335794590f, -0.969281235356548530f, 0.245211548667627680f, 
+  -0.969469595397412950f, 
+  0.244467902747824210f, -0.969657385124292450f, 0.243724113013852130f, 
+  -0.969844604426714830f, 
+  0.242980179903263980f, -0.970031253194543970f, 0.242236103853696070f, 
+  -0.970217331317979160f, 
+  0.241491885302869300f, -0.970402838687555500f, 0.240747524688588540f, 
+  -0.970587775194143630f, 
+  0.240003022448741500f, -0.970772140728950350f, 0.239258379021300120f, 
+  -0.970955935183517970f, 
+  0.238513594844318500f, -0.971139158449725090f, 0.237768670355934210f, 
+  -0.971321810419786160f, 
+  0.237023605994367340f, -0.971503890986251780f, 0.236278402197919620f, 
+  -0.971685400042008540f, 
+  0.235533059404975460f, -0.971866337480279400f, 0.234787578054001080f, 
+  -0.972046703194623500f, 
+  0.234041958583543460f, -0.972226497078936270f, 0.233296201432231560f, 
+  -0.972405719027449770f, 
+  0.232550307038775330f, -0.972584368934732210f, 0.231804275841964780f, 
+  -0.972762446695688570f, 
+  0.231058108280671280f, -0.972939952205560070f, 0.230311804793845530f, 
+  -0.973116885359925130f, 
+  0.229565365820518870f, -0.973293246054698250f, 0.228818791799802360f, 
+  -0.973469034186130950f, 
+  0.228072083170885790f, -0.973644249650811870f, 0.227325240373038830f, 
+  -0.973818892345666100f, 
+  0.226578263845610110f, -0.973992962167955830f, 0.225831154028026200f, 
+  -0.974166459015280320f, 
+  0.225083911359792780f, -0.974339382785575860f, 0.224336536280493690f, 
+  -0.974511733377115720f, 
+  0.223589029229790020f, -0.974683510688510670f, 0.222841390647421280f, 
+  -0.974854714618708430f, 
+  0.222093620973203590f, -0.975025345066994120f, 0.221345720647030810f, 
+  -0.975195401932990370f, 
+  0.220597690108873650f, -0.975364885116656870f, 0.219849529798778750f, 
+  -0.975533794518291360f, 
+  0.219101240156869770f, -0.975702130038528570f, 0.218352821623346430f, 
+  -0.975869891578341030f, 
+  0.217604274638483670f, -0.976037079039039020f, 0.216855599642632570f, 
+  -0.976203692322270560f, 
+  0.216106797076219600f, -0.976369731330021140f, 0.215357867379745550f, 
+  -0.976535195964614470f, 
+  0.214608810993786920f, -0.976700086128711840f, 0.213859628358993830f, 
+  -0.976864401725312640f, 
+  0.213110319916091360f, -0.977028142657754390f, 0.212360886105878580f, 
+  -0.977191308829712280f, 
+  0.211611327369227610f, -0.977353900145199960f, 0.210861644147084830f, 
+  -0.977515916508569280f, 
+  0.210111836880469720f, -0.977677357824509930f, 0.209361906010474190f, 
+  -0.977838223998050430f, 
+  0.208611851978263460f, -0.977998514934557140f, 0.207861675225075150f, 
+  -0.978158230539735050f, 
+  0.207111376192218560f, -0.978317370719627650f, 0.206360955321075680f, 
+  -0.978475935380616830f, 
+  0.205610413053099320f, -0.978633924429423100f, 0.204859749829814420f, 
+  -0.978791337773105670f, 
+  0.204108966092817010f, -0.978948175319062200f, 0.203358062283773370f, 
+  -0.979104436975029250f, 
+  0.202607038844421110f, -0.979260122649082020f, 0.201855896216568160f, 
+  -0.979415232249634780f, 
+  0.201104634842091960f, -0.979569765685440520f, 0.200353255162940420f, 
+  -0.979723722865591170f, 
+  0.199601757621131050f, -0.979877103699517640f, 0.198850142658750120f, 
+  -0.980029908096989980f, 
+  0.198098410717953730f, -0.980182135968117320f, 0.197346562240966000f, 
+  -0.980333787223347960f, 
+  0.196594597670080220f, -0.980484861773469380f, 0.195842517447657990f, 
+  -0.980635359529608120f, 
+  0.195090322016128330f, -0.980785280403230430f, 0.194338011817988600f, 
+  -0.980934624306141640f, 
+  0.193585587295803750f, -0.981083391150486590f, 0.192833048892205290f, 
+  -0.981231580848749730f, 
+  0.192080397049892380f, -0.981379193313754560f, 0.191327632211630990f, 
+  -0.981526228458664660f, 
+  0.190574754820252800f, -0.981672686196983110f, 0.189821765318656580f, 
+  -0.981818566442552500f, 
+  0.189068664149806280f, -0.981963869109555240f, 0.188315451756732120f, 
+  -0.982108594112513610f, 
+  0.187562128582529740f, -0.982252741366289370f, 0.186808695070359330f, 
+  -0.982396310786084690f, 
+  0.186055151663446630f, -0.982539302287441240f, 0.185301498805082040f, 
+  -0.982681715786240860f, 
+  0.184547736938619640f, -0.982823551198705240f, 0.183793866507478390f, 
+  -0.982964808441396440f, 
+  0.183039887955141060f, -0.983105487431216290f, 0.182285801725153320f, 
+  -0.983245588085407070f, 
+  0.181531608261125130f, -0.983385110321551180f, 0.180777308006728670f, 
+  -0.983524054057571260f, 
+  0.180022901405699510f, -0.983662419211730250f, 0.179268388901835880f, 
+  -0.983800205702631490f, 
+  0.178513770938997590f, -0.983937413449218920f, 0.177759047961107140f, 
+  -0.984074042370776450f, 
+  0.177004220412148860f, -0.984210092386929030f, 0.176249288736167940f, 
+  -0.984345563417641900f, 
+  0.175494253377271400f, -0.984480455383220930f, 0.174739114779627310f, 
+  -0.984614768204312600f, 
+  0.173983873387463850f, -0.984748501801904210f, 0.173228529645070490f, 
+  -0.984881656097323700f, 
+  0.172473083996796030f, -0.985014231012239840f, 0.171717536887049970f, 
+  -0.985146226468662230f, 
+  0.170961888760301360f, -0.985277642388941220f, 0.170206140061078120f, 
+  -0.985408478695768420f, 
+  0.169450291233967930f, -0.985538735312176060f, 0.168694342723617440f, 
+  -0.985668412161537550f, 
+  0.167938294974731230f, -0.985797509167567370f, 0.167182148432072880f, 
+  -0.985926026254321130f, 
+  0.166425903540464220f, -0.986053963346195440f, 0.165669560744784140f, 
+  -0.986181320367928270f, 
+  0.164913120489970090f, -0.986308097244598670f, 0.164156583221015890f, 
+  -0.986434293901627070f, 
+  0.163399949382973230f, -0.986559910264775410f, 0.162643219420950450f, 
+  -0.986684946260146690f, 
+  0.161886393780111910f, -0.986809401814185420f, 0.161129472905678780f, 
+  -0.986933276853677710f, 
+  0.160372457242928400f, -0.987056571305750970f, 0.159615347237193090f, 
+  -0.987179285097874340f, 
+  0.158858143333861390f, -0.987301418157858430f, 0.158100845978377090f, 
+  -0.987422970413855410f, 
+  0.157343455616238280f, -0.987543941794359230f, 0.156585972692998590f, 
+  -0.987664332228205710f, 
+  0.155828397654265320f, -0.987784141644572180f, 0.155070730945700510f, 
+  -0.987903369972977790f, 
+  0.154312973013020240f, -0.988022017143283530f, 0.153555124301993500f, 
+  -0.988140083085692570f, 
+  0.152797185258443410f, -0.988257567730749460f, 0.152039156328246160f, 
+  -0.988374471009341280f, 
+  0.151281037957330250f, -0.988490792852696590f, 0.150522830591677370f, 
+  -0.988606533192386450f, 
+  0.149764534677321620f, -0.988721691960323780f, 0.149006150660348470f, 
+  -0.988836269088763540f, 
+  0.148247678986896200f, -0.988950264510302990f, 0.147489120103153680f, 
+  -0.989063678157881540f, 
+  0.146730474455361750f, -0.989176509964781010f, 0.145971742489812370f, 
+  -0.989288759864625170f, 
+  0.145212924652847520f, -0.989400427791380380f, 0.144454021390860440f, 
+  -0.989511513679355190f, 
+  0.143695033150294580f, -0.989622017463200780f, 0.142935960377642700f, 
+  -0.989731939077910570f, 
+  0.142176803519448000f, -0.989841278458820530f, 0.141417563022303130f, 
+  -0.989950035541608990f, 
+  0.140658239332849240f, -0.990058210262297120f, 0.139898832897777380f, 
+  -0.990165802557248400f, 
+  0.139139344163826280f, -0.990272812363169110f, 0.138379773577783890f, 
+  -0.990379239617108160f, 
+  0.137620121586486180f, -0.990485084256456980f, 0.136860388636816430f, 
+  -0.990590346218950150f, 
+  0.136100575175706200f, -0.990695025442664630f, 0.135340681650134330f, 
+  -0.990799121866020370f, 
+  0.134580708507126220f, -0.990902635427780010f, 0.133820656193754690f, 
+  -0.991005566067049370f, 
+  0.133060525157139180f, -0.991107913723276780f, 0.132300315844444680f, 
+  -0.991209678336254060f, 
+  0.131540028702883280f, -0.991310859846115440f, 0.130779664179711790f, 
+  -0.991411458193338540f, 
+  0.130019222722233350f, -0.991511473318743900f, 0.129258704777796270f, 
+  -0.991610905163495370f, 
+  0.128498110793793220f, -0.991709753669099530f, 0.127737441217662280f, 
+  -0.991808018777406430f, 
+  0.126976696496885980f, -0.991905700430609330f, 0.126215877078990400f, 
+  -0.992002798571244520f, 
+  0.125454983411546210f, -0.992099313142191800f, 0.124694015942167770f, 
+  -0.992195244086673920f, 
+  0.123932975118512200f, -0.992290591348257370f, 0.123171861388280650f, 
+  -0.992385354870851670f, 
+  0.122410675199216280f, -0.992479534598709970f, 0.121649416999105540f, 
+  -0.992573130476428810f, 
+  0.120888087235777220f, -0.992666142448948020f, 0.120126686357101580f, 
+  -0.992758570461551140f, 
+  0.119365214810991350f, -0.992850414459865100f, 0.118603673045400840f, 
+  -0.992941674389860470f, 
+  0.117842061508325020f, -0.993032350197851410f, 0.117080380647800550f, 
+  -0.993122441830495580f, 
+  0.116318630911904880f, -0.993211949234794500f, 0.115556812748755290f, 
+  -0.993300872358093280f, 
+  0.114794926606510250f, -0.993389211148080650f, 0.114032972933367300f, 
+  -0.993476965552789190f, 
+  0.113270952177564360f, -0.993564135520595300f, 0.112508864787378830f, 
+  -0.993650721000219120f, 
+  0.111746711211126660f, -0.993736721940724600f, 0.110984491897163380f, 
+  -0.993822138291519660f, 
+  0.110222207293883180f, -0.993906970002356060f, 0.109459857849718030f, 
+  -0.993991217023329380f, 
+  0.108697444013138670f, -0.994074879304879370f, 0.107934966232653760f, 
+  -0.994157956797789730f, 
+  0.107172424956808870f, -0.994240449453187900f, 0.106409820634187840f, 
+  -0.994322357222545810f, 
+  0.105647153713410700f, -0.994403680057679100f, 0.104884424643134970f, 
+  -0.994484417910747600f, 
+  0.104121633872054730f, -0.994564570734255420f, 0.103358781848899700f, 
+  -0.994644138481050710f, 
+  0.102595869022436280f, -0.994723121104325700f, 0.101832895841466670f, 
+  -0.994801518557617110f, 
+  0.101069862754827880f, -0.994879330794805620f, 0.100306770211392820f, 
+  -0.994956557770116380f, 
+  0.099543618660069444f, -0.995033199438118630f, 0.098780408549799664f, 
+  -0.995109255753726110f, 
+  0.098017140329560770f, -0.995184726672196820f, 0.097253814448363354f, 
+  -0.995259612149133390f, 
+  0.096490431355252607f, -0.995333912140482280f, 0.095726991499307315f, 
+  -0.995407626602534900f, 
+  0.094963495329639061f, -0.995480755491926940f, 0.094199943295393190f, 
+  -0.995553298765638470f, 
+  0.093436335845747912f, -0.995625256380994310f, 0.092672673429913366f, 
+  -0.995696628295663520f, 
+  0.091908956497132696f, -0.995767414467659820f, 0.091145185496681130f, 
+  -0.995837614855341610f, 
+  0.090381360877865011f, -0.995907229417411720f, 0.089617483090022917f, 
+  -0.995976258112917790f, 
+  0.088853552582524684f, -0.996044700901251970f, 0.088089569804770507f, 
+  -0.996112557742151130f, 
+  0.087325535206192226f, -0.996179828595696870f, 0.086561449236251239f, 
+  -0.996246513422315520f, 
+  0.085797312344439880f, -0.996312612182778000f, 0.085033124980280414f, 
+  -0.996378124838200210f, 
+  0.084268887593324127f, -0.996443051350042630f, 0.083504600633152404f, 
+  -0.996507391680110820f, 
+  0.082740264549375803f, -0.996571145790554840f, 0.081975879791633108f, 
+  -0.996634313643869900f, 
+  0.081211446809592386f, -0.996696895202896060f, 0.080446966052950097f, 
+  -0.996758890430818000f, 
+  0.079682437971430126f, -0.996820299291165670f, 0.078917863014785095f, 
+  -0.996881121747813850f, 
+  0.078153241632794315f, -0.996941357764982160f, 0.077388574275265049f, 
+  -0.997001007307235290f, 
+  0.076623861392031617f, -0.997060070339482960f, 0.075859103432954503f, 
+  -0.997118546826979980f, 
+  0.075094300847921291f, -0.997176436735326190f, 0.074329454086845867f, 
+  -0.997233740030466160f, 
+  0.073564563599667454f, -0.997290456678690210f, 0.072799629836351618f, 
+  -0.997346586646633230f, 
+  0.072034653246889416f, -0.997402129901275300f, 0.071269634281296415f, 
+  -0.997457086409941910f, 
+  0.070504573389614009f, -0.997511456140303450f, 0.069739471021907376f, 
+  -0.997565239060375750f, 
+  0.068974327628266732f, -0.997618435138519550f, 0.068209143658806454f, 
+  -0.997671044343441000f, 
+  0.067443919563664106f, -0.997723066644191640f, 0.066678655793001543f, 
+  -0.997774502010167820f, 
+  0.065913352797003930f, -0.997825350411111640f, 0.065148011025878860f, 
+  -0.997875611817110150f, 
+  0.064382630929857410f, -0.997925286198596000f, 0.063617212959193190f, 
+  -0.997974373526346990f, 
+  0.062851757564161420f, -0.998022873771486240f, 0.062086265195060247f, 
+  -0.998070786905482340f, 
+  0.061320736302208648f, -0.998118112900149180f, 0.060555171335947781f, 
+  -0.998164851727646240f, 
+  0.059789570746640007f, -0.998211003360478190f, 0.059023934984667986f, 
+  -0.998256567771495180f, 
+  0.058258264500435732f, -0.998301544933892890f, 0.057492559744367684f, 
+  -0.998345934821212370f, 
+  0.056726821166907783f, -0.998389737407340160f, 0.055961049218520520f, 
+  -0.998432952666508440f, 
+  0.055195244349690031f, -0.998475580573294770f, 0.054429407010919147f, 
+  -0.998517621102622210f, 
+  0.053663537652730679f, -0.998559074229759310f, 0.052897636725665401f, 
+  -0.998599939930320370f, 
+  0.052131704680283317f, -0.998640218180265270f, 0.051365741967162731f, 
+  -0.998679908955899090f, 
+  0.050599749036899337f, -0.998719012233872940f, 0.049833726340107257f, 
+  -0.998757527991183340f, 
+  0.049067674327418126f, -0.998795456205172410f, 0.048301593449480172f, 
+  -0.998832796853527990f, 
+  0.047535484156959261f, -0.998869549914283560f, 0.046769346900537960f, 
+  -0.998905715365818290f, 
+  0.046003182130914644f, -0.998941293186856870f, 0.045236990298804750f, 
+  -0.998976283356469820f, 
+  0.044470771854938744f, -0.999010685854073380f, 0.043704527250063421f, 
+  -0.999044500659429290f, 
+  0.042938256934940959f, -0.999077727752645360f, 0.042171961360348002f, 
+  -0.999110367114174890f, 
+  0.041405640977076712f, -0.999142418724816910f, 0.040639296235933854f, 
+  -0.999173882565716380f, 
+  0.039872927587739845f, -0.999204758618363890f, 0.039106535483329839f, 
+  -0.999235046864595850f, 
+  0.038340120373552791f, -0.999264747286594420f, 0.037573682709270514f, 
+  -0.999293859866887790f, 
+  0.036807222941358991f, -0.999322384588349540f, 0.036040741520706299f, 
+  -0.999350321434199440f, 
+  0.035274238898213947f, -0.999377670388002850f, 0.034507715524795889f, 
+  -0.999404431433671300f, 
+  0.033741171851377642f, -0.999430604555461730f, 0.032974608328897315f, 
+  -0.999456189737977340f, 
+  0.032208025408304704f, -0.999481186966166950f, 0.031441423540560343f, 
+  -0.999505596225325310f, 
+  0.030674803176636581f, -0.999529417501093140f, 0.029908164767516655f, 
+  -0.999552650779456990f, 
+  0.029141508764193740f, -0.999575296046749220f, 0.028374835617672258f, 
+  -0.999597353289648380f, 
+  0.027608145778965820f, -0.999618822495178640f, 0.026841439699098527f, 
+  -0.999639703650710200f, 
+  0.026074717829104040f, -0.999659996743959220f, 0.025307980620024630f, 
+  -0.999679701762987930f, 
+  0.024541228522912264f, -0.999698818696204250f, 0.023774461988827676f, 
+  -0.999717347532362190f, 
+  0.023007681468839410f, -0.999735288260561680f, 0.022240887414024919f, 
+  -0.999752640870248840f, 
+  0.021474080275469605f, -0.999769405351215280f, 0.020707260504265912f, 
+  -0.999785581693599210f, 
+  0.019940428551514598f, -0.999801169887884260f, 0.019173584868322699f, 
+  -0.999816169924900410f, 
+  0.018406729905804820f, -0.999830581795823400f, 0.017639864115082195f, 
+  -0.999844405492175240f, 
+  0.016872987947281773f, -0.999857641005823860f, 0.016106101853537263f, 
+  -0.999870288328982950f, 
+  0.015339206284988220f, -0.999882347454212560f, 0.014572301692779104f, 
+  -0.999893818374418490f, 
+  0.013805388528060349f, -0.999904701082852900f, 0.013038467241987433f, 
+  -0.999914995573113470f, 
+  0.012271538285719944f, -0.999924701839144500f, 0.011504602110422875f, 
+  -0.999933819875236000f, 
+  0.010737659167264572f, -0.999942349676023910f, 0.009970709907418029f, 
+  -0.999950291236490480f, 
+  0.009203754782059960f, -0.999957644551963900f, 0.008436794242369860f, 
+  -0.999964409618118280f, 
+  0.007669828739531077f, -0.999970586430974140f, 0.006902858724729877f, 
+  -0.999976174986897610f, 
+  0.006135884649154515f, -0.999981175282601110f, 0.005368906963996303f, 
+  -0.999985587315143200f, 
+  0.004601926120448672f, -0.999989411081928400f, 0.003834942569706248f, 
+  -0.999992646580707190f, 
+  0.003067956762966138f, -0.999995293809576190f, 0.002300969151425887f, 
+  -0.999997352766978210f, 
+  0.001533980186284766f, -0.999998823451701880f, 0.000766990318742846f, 
+  -0.999999705862882230f 
+}; 
+ 
+/**  
+* \par  
+* cosFactor tables are generated using the formula : <pre>cos_factors[n] = 2 * cos((2n+1)*pi/(4*N))</pre>  
+* \par  
+* C command to generate the table  
+* \par  
+* <pre> for(i = 0; i< N; i++)  
+* {  
+*    cos_factors[i]= 2 * cos((2*i+1)*c/2);  
+* } </pre>  
+* \par  
+* where <code>N</code> is the number of factors to generate and <code>c</code> is <code>pi/(2*N)</code>  
+*/ 
+static const float32_t cos_factors_128[128] = { 
+  0.999981175282601110f, 0.999830581795823400f, 0.999529417501093140f, 
+  0.999077727752645360f, 
+  0.998475580573294770f, 0.997723066644191640f, 0.996820299291165670f, 
+  0.995767414467659820f, 
+  0.994564570734255420f, 0.993211949234794500f, 0.991709753669099530f, 
+  0.990058210262297120f, 
+  0.988257567730749460f, 0.986308097244598670f, 0.984210092386929030f, 
+  0.981963869109555240f, 
+  0.979569765685440520f, 0.977028142657754390f, 0.974339382785575860f, 
+  0.971503890986251780f, 
+  0.968522094274417380f, 0.965394441697689400f, 0.962121404269041580f, 
+  0.958703474895871600f, 
+  0.955141168305770780f, 0.951435020969008340f, 0.947585591017741090f, 
+  0.943593458161960390f, 
+  0.939459223602189920f, 0.935183509938947610f, 0.930766961078983710f, 
+  0.926210242138311380f, 
+  0.921514039342042010f, 0.916679059921042700f, 0.911706032005429880f, 
+  0.906595704514915330f, 
+  0.901348847046022030f, 0.895966249756185220f, 0.890448723244757880f, 
+  0.884797098430937790f, 
+  0.879012226428633530f, 0.873094978418290090f, 0.867046245515692650f, 
+  0.860866938637767310f, 
+  0.854557988365400530f, 0.848120344803297230f, 0.841554977436898440f, 
+  0.834862874986380010f, 
+  0.828045045257755800f, 0.821102514991104650f, 0.814036329705948410f, 
+  0.806847553543799330f, 
+  0.799537269107905010f, 0.792106577300212390f, 0.784556597155575240f, 
+  0.776888465673232440f, 
+  0.769103337645579700f, 0.761202385484261780f, 0.753186799043612520f, 
+  0.745057785441466060f, 
+  0.736816568877369900f, 0.728464390448225200f, 0.720002507961381650f, 
+  0.711432195745216430f, 
+  0.702754744457225300f, 0.693971460889654000f, 0.685083667772700360f, 
+  0.676092703575316030f, 
+  0.666999922303637470f, 0.657806693297078640f, 0.648514401022112550f, 
+  0.639124444863775730f, 
+  0.629638238914927100f, 0.620057211763289210f, 0.610382806276309480f, 
+  0.600616479383868970f, 
+  0.590759701858874280f, 0.580813958095764530f, 0.570780745886967370f, 
+  0.560661576197336030f, 
+  0.550457972936604810f, 0.540171472729892970f, 0.529803624686294830f, 
+  0.519355990165589530f, 
+  0.508830142543106990f, 0.498227666972781870f, 0.487550160148436050f, 
+  0.476799230063322250f, 
+  0.465976495767966130f, 0.455083587126343840f, 0.444122144570429260f, 
+  0.433093818853152010f, 
+  0.422000270799799790f, 0.410843171057903910f, 0.399624199845646790f, 
+  0.388345046698826300f, 
+  0.377007410216418310f, 0.365612997804773960f, 0.354163525420490510f, 
+  0.342660717311994380f, 
+  0.331106305759876430f, 0.319502030816015750f, 0.307849640041534980f, 
+  0.296150888243623960f, 
+  0.284407537211271820f, 0.272621355449948980f, 0.260794117915275570f, 
+  0.248927605745720260f, 
+  0.237023605994367340f, 0.225083911359792780f, 0.213110319916091360f, 
+  0.201104634842091960f, 
+  0.189068664149806280f, 0.177004220412148860f, 0.164913120489970090f, 
+  0.152797185258443410f, 
+  0.140658239332849240f, 0.128498110793793220f, 0.116318630911904880f, 
+  0.104121633872054730f, 
+  0.091908956497132696f, 0.079682437971430126f, 0.067443919563664106f, 
+  0.055195244349690031f, 
+  0.042938256934940959f, 0.030674803176636581f, 0.018406729905804820f, 
+  0.006135884649154515f 
+}; 
+ 
+static const float32_t cos_factors_512[512] = { 
+  0.999998823451701880f, 0.999989411081928400f, 0.999970586430974140f, 
+  0.999942349676023910f, 
+  0.999904701082852900f, 0.999857641005823860f, 0.999801169887884260f, 
+  0.999735288260561680f, 
+  0.999659996743959220f, 0.999575296046749220f, 0.999481186966166950f, 
+  0.999377670388002850f, 
+  0.999264747286594420f, 0.999142418724816910f, 0.999010685854073380f, 
+  0.998869549914283560f, 
+  0.998719012233872940f, 0.998559074229759310f, 0.998389737407340160f, 
+  0.998211003360478190f, 
+  0.998022873771486240f, 0.997825350411111640f, 0.997618435138519550f, 
+  0.997402129901275300f, 
+  0.997176436735326190f, 0.996941357764982160f, 0.996696895202896060f, 
+  0.996443051350042630f, 
+  0.996179828595696980f, 0.995907229417411720f, 0.995625256380994310f, 
+  0.995333912140482280f, 
+  0.995033199438118630f, 0.994723121104325700f, 0.994403680057679100f, 
+  0.994074879304879370f, 
+  0.993736721940724600f, 0.993389211148080650f, 0.993032350197851410f, 
+  0.992666142448948020f, 
+  0.992290591348257370f, 0.991905700430609330f, 0.991511473318743900f, 
+  0.991107913723276890f, 
+  0.990695025442664630f, 0.990272812363169110f, 0.989841278458820530f, 
+  0.989400427791380380f, 
+  0.988950264510302990f, 0.988490792852696590f, 0.988022017143283530f, 
+  0.987543941794359230f, 
+  0.987056571305750970f, 0.986559910264775410f, 0.986053963346195440f, 
+  0.985538735312176060f, 
+  0.985014231012239840f, 0.984480455383220930f, 0.983937413449218920f, 
+  0.983385110321551180f, 
+  0.982823551198705240f, 0.982252741366289370f, 0.981672686196983110f, 
+  0.981083391150486710f, 
+  0.980484861773469380f, 0.979877103699517640f, 0.979260122649082020f, 
+  0.978633924429423210f, 
+  0.977998514934557140f, 0.977353900145199960f, 0.976700086128711840f, 
+  0.976037079039039020f, 
+  0.975364885116656980f, 0.974683510688510670f, 0.973992962167955830f, 
+  0.973293246054698250f, 
+  0.972584368934732210f, 0.971866337480279400f, 0.971139158449725090f, 
+  0.970402838687555500f, 
+  0.969657385124292450f, 0.968902804776428870f, 0.968139104746362440f, 
+  0.967366292222328510f, 
+  0.966584374478333120f, 0.965793358874083680f, 0.964993252854920320f, 
+  0.964184063951745830f, 
+  0.963365799780954050f, 0.962538468044359160f, 0.961702076529122540f, 
+  0.960856633107679660f, 
+  0.960002145737665960f, 0.959138622461841890f, 0.958266071408017670f, 
+  0.957384500788975860f, 
+  0.956493918902395100f, 0.955594334130771110f, 0.954685754941338340f, 
+  0.953768189885990330f, 
+  0.952841647601198720f, 0.951906136807932350f, 0.950961666311575080f, 
+  0.950008245001843000f, 
+  0.949045881852700560f, 0.948074585922276230f, 0.947094366352777220f, 
+  0.946105232370403450f, 
+  0.945107193285260610f, 0.944100258491272660f, 0.943084437466093490f, 
+  0.942059739771017310f, 
+  0.941026175050889260f, 0.939983753034014050f, 0.938932483532064600f, 
+  0.937872376439989890f, 
+  0.936803441735921560f, 0.935725689481080370f, 0.934639129819680780f, 
+  0.933543772978836170f, 
+  0.932439629268462360f, 0.931326709081180430f, 0.930205022892219070f, 
+  0.929074581259315860f, 
+  0.927935394822617890f, 0.926787474304581750f, 0.925630830509872720f, 
+  0.924465474325262600f, 
+  0.923291416719527640f, 0.922108668743345180f, 0.920917241529189520f, 
+  0.919717146291227360f, 
+  0.918508394325212250f, 0.917290997008377910f, 0.916064965799331720f, 
+  0.914830312237946200f, 
+  0.913587047945250810f, 0.912335184623322750f, 0.911074734055176360f, 
+  0.909805708104652220f, 
+  0.908528118716306120f, 0.907241977915295820f, 0.905947297807268460f, 
+  0.904644090578246240f, 
+  0.903332368494511820f, 0.902012143902493180f, 0.900683429228646970f, 
+  0.899346236979341570f, 
+  0.898000579740739880f, 0.896646470178680150f, 0.895283921038557580f, 
+  0.893912945145203250f, 
+  0.892533555402764580f, 0.891145764794583180f, 0.889749586383072780f, 
+  0.888345033309596350f, 
+  0.886932118794342190f, 0.885510856136199950f, 0.884081258712634990f, 
+  0.882643339979562790f, 
+  0.881197113471222090f, 0.879742592800047410f, 0.878279791656541580f, 
+  0.876808723809145650f, 
+  0.875329403104110890f, 0.873841843465366860f, 0.872346058894391540f, 
+  0.870842063470078980f, 
+  0.869329871348606840f, 0.867809496763303320f, 0.866280954024512990f, 
+  0.864744257519462380f, 
+  0.863199421712124160f, 0.861646461143081300f, 0.860085390429390140f, 
+  0.858516224264442740f, 
+  0.856938977417828760f, 0.855353664735196030f, 0.853760301138111410f, 
+  0.852158901623919830f, 
+  0.850549481265603480f, 0.848932055211639610f, 0.847306638685858320f, 
+  0.845673246987299070f, 
+  0.844031895490066410f, 0.842382599643185850f, 0.840725374970458070f, 
+  0.839060237070312740f, 
+  0.837387201615661940f, 0.835706284353752600f, 0.834017501106018130f, 
+  0.832320867767929680f, 
+  0.830616400308846310f, 0.828904114771864870f, 0.827184027273669130f, 
+  0.825456154004377550f, 
+  0.823720511227391430f, 0.821977115279241550f, 0.820225982569434690f, 
+  0.818467129580298660f, 
+  0.816700572866827850f, 0.814926329056526620f, 0.813144414849253590f, 
+  0.811354847017063730f, 
+  0.809557642404051260f, 0.807752817926190360f, 0.805940390571176280f, 
+  0.804120377398265810f, 
+  0.802292795538115720f, 0.800457662192622820f, 0.798614994634760820f, 
+  0.796764810208418830f, 
+  0.794907126328237010f, 0.793041960479443640f, 0.791169330217690200f, 
+  0.789289253168885650f, 
+  0.787401747029031430f, 0.785506829564053930f, 0.783604518609638200f, 
+  0.781694832071059390f, 
+  0.779777787923014550f, 0.777853404209453150f, 0.775921699043407690f, 
+  0.773982690606822900f, 
+  0.772036397150384520f, 0.770082836993347900f, 0.768122028523365420f, 
+  0.766153990196312920f, 
+  0.764178740536116670f, 0.762196298134578900f, 0.760206681651202420f, 
+  0.758209909813015280f, 
+  0.756206001414394540f, 0.754194975316889170f, 0.752176850449042810f, 
+  0.750151645806215070f, 
+  0.748119380450403600f, 0.746080073510063780f, 0.744033744179929290f, 
+  0.741980411720831070f, 
+  0.739920095459516200f, 0.737852814788465980f, 0.735778589165713590f, 
+  0.733697438114660370f, 
+  0.731609381223892630f, 0.729514438146997010f, 0.727412628602375770f, 
+  0.725303972373060770f, 
+  0.723188489306527460f, 0.721066199314508110f, 0.718937122372804490f, 
+  0.716801278521099540f, 
+  0.714658687862769090f, 0.712509370564692320f, 0.710353346857062420f, 
+  0.708190637033195400f, 
+  0.706021261449339740f, 0.703845240524484940f, 0.701662594740168570f, 
+  0.699473344640283770f, 
+  0.697277510830886630f, 0.695075113980000880f, 0.692866174817424740f, 
+  0.690650714134534720f, 
+  0.688428752784090550f, 0.686200311680038700f, 0.683965411797315510f, 
+  0.681724074171649820f, 
+  0.679476319899365080f, 0.677222170137180450f, 0.674961646102012040f, 
+  0.672694769070772970f, 
+  0.670421560380173090f, 0.668142041426518560f, 0.665856233665509720f, 
+  0.663564158612039880f, 
+  0.661265837839992270f, 0.658961292982037320f, 0.656650545729429050f, 
+  0.654333617831800550f, 
+  0.652010531096959500f, 0.649681307390683190f, 0.647345968636512060f, 
+  0.645004536815544040f, 
+  0.642657033966226860f, 0.640303482184151670f, 0.637943903621844170f, 
+  0.635578320488556230f, 
+  0.633206755050057190f, 0.630829229628424470f, 0.628445766601832710f, 
+  0.626056388404343520f, 
+  0.623661117525694640f, 0.621259976511087660f, 0.618852987960976320f, 
+  0.616440174530853650f, 
+  0.614021558931038490f, 0.611597163926462020f, 0.609167012336453210f, 
+  0.606731127034524480f, 
+  0.604289530948156070f, 0.601842247058580030f, 0.599389298400564540f, 
+  0.596930708062196500f, 
+  0.594466499184664540f, 0.591996694962040990f, 0.589521318641063940f, 
+  0.587040393520918080f, 
+  0.584553942953015330f, 0.582061990340775550f, 0.579564559139405740f, 
+  0.577061672855679550f, 
+  0.574553355047715760f, 0.572039629324757050f, 0.569520519346947250f, 
+  0.566996048825108680f, 
+  0.564466241520519500f, 0.561931121244689470f, 0.559390711859136140f, 
+  0.556845037275160100f, 
+  0.554294121453620110f, 0.551737988404707450f, 0.549176662187719770f, 
+  0.546610166910834860f, 
+  0.544038526730883930f, 0.541461765853123560f, 0.538879908531008420f, 
+  0.536292979065963180f, 
+  0.533701001807152960f, 0.531104001151255000f, 0.528502001542228480f, 
+  0.525895027471084740f, 
+  0.523283103475656430f, 0.520666254140367270f, 0.518044504095999340f, 
+  0.515417878019463150f, 
+  0.512786400633563070f, 0.510150096706766700f, 0.507508991052970870f, 
+  0.504863108531267480f, 
+  0.502212474045710900f, 0.499557112545081890f, 0.496897049022654640f, 
+  0.494232308515959730f, 
+  0.491562916106550060f, 0.488888896919763230f, 0.486210276124486530f, 
+  0.483527078932918740f, 
+  0.480839330600333900f, 0.478147056424843120f, 0.475450281747155870f, 
+  0.472749031950342900f, 
+  0.470043332459595620f, 0.467333208741988530f, 0.464618686306237820f, 
+  0.461899790702462840f, 
+  0.459176547521944150f, 0.456448982396883860f, 0.453717121000163930f, 
+  0.450980989045103810f, 
+  0.448240612285220000f, 0.445496016513981740f, 0.442747227564570130f, 
+  0.439994271309633260f, 
+  0.437237173661044200f, 0.434475960569655710f, 0.431710658025057370f, 
+  0.428941292055329550f, 
+  0.426167888726799620f, 0.423390474143796100f, 0.420609074448402510f, 
+  0.417823715820212380f, 
+  0.415034424476081630f, 0.412241226669883000f, 0.409444148692257590f, 
+  0.406643216870369140f, 
+  0.403838457567654130f, 0.401029897183575790f, 0.398217562153373620f, 
+  0.395401478947816300f, 
+  0.392581674072951530f, 0.389758174069856410f, 0.386931005514388690f, 
+  0.384100195016935040f, 
+  0.381265769222162490f, 0.378427754808765620f, 0.375586178489217330f, 
+  0.372741067009515810f, 
+  0.369892447148934270f, 0.367040345719767240f, 0.364184789567079840f, 
+  0.361325805568454340f, 
+  0.358463420633736540f, 0.355597661704783960f, 0.352728555755210730f, 
+  0.349856129790135030f, 
+  0.346980410845923680f, 0.344101425989938980f, 0.341219202320282410f, 
+  0.338333766965541290f, 
+  0.335445147084531660f, 0.332553369866044220f, 0.329658462528587550f, 
+  0.326760452320131790f, 
+  0.323859366517852960f, 0.320955232427875210f, 0.318048077385015060f, 
+  0.315137928752522440f, 
+  0.312224813921825050f, 0.309308760312268780f, 0.306389795370861080f, 
+  0.303467946572011370f, 
+  0.300543241417273400f, 0.297615707435086310f, 0.294685372180514330f, 
+  0.291752263234989370f, 
+  0.288816408206049480f, 0.285877834727080730f, 0.282936570457055390f, 
+  0.279992643080273380f, 
+  0.277046080306099950f, 0.274096909868706330f, 0.271145159526808070f, 
+  0.268190857063403180f, 
+  0.265234030285511900f, 0.262274707023913590f, 0.259312915132886350f, 
+  0.256348682489942910f, 
+  0.253382036995570270f, 0.250413006572965280f, 0.247441619167773440f, 
+  0.244467902747824210f, 
+  0.241491885302869300f, 0.238513594844318500f, 0.235533059404975460f, 
+  0.232550307038775330f, 
+  0.229565365820518870f, 0.226578263845610110f, 0.223589029229790020f, 
+  0.220597690108873650f, 
+  0.217604274638483670f, 0.214608810993786920f, 0.211611327369227610f, 
+  0.208611851978263460f, 
+  0.205610413053099320f, 0.202607038844421110f, 0.199601757621131050f, 
+  0.196594597670080220f, 
+  0.193585587295803750f, 0.190574754820252800f, 0.187562128582529740f, 
+  0.184547736938619640f, 
+  0.181531608261125130f, 0.178513770938997590f, 0.175494253377271400f, 
+  0.172473083996796030f, 
+  0.169450291233967930f, 0.166425903540464220f, 0.163399949382973230f, 
+  0.160372457242928400f, 
+  0.157343455616238280f, 0.154312973013020240f, 0.151281037957330250f, 
+  0.148247678986896200f, 
+  0.145212924652847520f, 0.142176803519448000f, 0.139139344163826280f, 
+  0.136100575175706200f, 
+  0.133060525157139180f, 0.130019222722233350f, 0.126976696496885980f, 
+  0.123932975118512200f, 
+  0.120888087235777220f, 0.117842061508325020f, 0.114794926606510250f, 
+  0.111746711211126660f, 
+  0.108697444013138670f, 0.105647153713410700f, 0.102595869022436280f, 
+  0.099543618660069444f, 
+  0.096490431355252607f, 0.093436335845747912f, 0.090381360877865011f, 
+  0.087325535206192226f, 
+  0.084268887593324127f, 0.081211446809592386f, 0.078153241632794315f, 
+  0.075094300847921291f, 
+  0.072034653246889416f, 0.068974327628266732f, 0.065913352797003930f, 
+  0.062851757564161420f, 
+  0.059789570746640007f, 0.056726821166907783f, 0.053663537652730679f, 
+  0.050599749036899337f, 
+  0.047535484156959261f, 0.044470771854938744f, 0.041405640977076712f, 
+  0.038340120373552791f, 
+  0.035274238898213947f, 0.032208025408304704f, 0.029141508764193740f, 
+  0.026074717829104040f, 
+  0.023007681468839410f, 0.019940428551514598f, 0.016872987947281773f, 
+  0.013805388528060349f, 
+  0.010737659167264572f, 0.007669828739531077f, 0.004601926120448672f, 
+  0.001533980186284766f 
+}; 
+ 
+static const float32_t cos_factors_2048[2048] = { 
+  0.999999926465717890f, 0.999999338191525530f, 0.999998161643486980f, 
+  0.999996396822294350f, 
+  0.999994043728985820f, 0.999991102364945590f, 0.999987572731904080f, 
+  0.999983454831937730f, 
+  0.999978748667468830f, 0.999973454241265940f, 0.999967571556443780f, 
+  0.999961100616462820f, 
+  0.999954041425129780f, 0.999946393986597460f, 0.999938158305364590f, 
+  0.999929334386276070f, 
+  0.999919922234522750f, 0.999909921855641540f, 0.999899333255515390f, 
+  0.999888156440373320f, 
+  0.999876391416790410f, 0.999864038191687680f, 0.999851096772332190f, 
+  0.999837567166337090f, 
+  0.999823449381661570f, 0.999808743426610520f, 0.999793449309835270f, 
+  0.999777567040332940f, 
+  0.999761096627446610f, 0.999744038080865430f, 0.999726391410624470f, 
+  0.999708156627104880f, 
+  0.999689333741033640f, 0.999669922763483760f, 0.999649923705874240f, 
+  0.999629336579970110f, 
+  0.999608161397882110f, 0.999586398172067070f, 0.999564046915327740f, 
+  0.999541107640812940f, 
+  0.999517580362016990f, 0.999493465092780590f, 0.999468761847290050f, 
+  0.999443470640077770f, 
+  0.999417591486021720f, 0.999391124400346050f, 0.999364069398620550f, 
+  0.999336426496761240f, 
+  0.999308195711029470f, 0.999279377058032710f, 0.999249970554724420f, 
+  0.999219976218403530f, 
+  0.999189394066714920f, 0.999158224117649430f, 0.999126466389543390f, 
+  0.999094120901079070f, 
+  0.999061187671284600f, 0.999027666719533690f, 0.998993558065545680f, 
+  0.998958861729386080f, 
+  0.998923577731465780f, 0.998887706092541290f, 0.998851246833715180f, 
+  0.998814199976435390f, 
+  0.998776565542495610f, 0.998738343554035230f, 0.998699534033539280f, 
+  0.998660137003838490f, 
+  0.998620152488108870f, 0.998579580509872500f, 0.998538421092996730f, 
+  0.998496674261694640f, 
+  0.998454340040524800f, 0.998411418454391300f, 0.998367909528543820f, 
+  0.998323813288577560f, 
+  0.998279129760433200f, 0.998233858970396850f, 0.998188000945100300f, 
+  0.998141555711520520f, 
+  0.998094523296980010f, 0.998046903729146840f, 0.997998697036034390f, 
+  0.997949903246001190f, 
+  0.997900522387751620f, 0.997850554490335110f, 0.997799999583146470f, 
+  0.997748857695925690f, 
+  0.997697128858758500f, 0.997644813102075420f, 0.997591910456652630f, 
+  0.997538420953611340f, 
+  0.997484344624417930f, 0.997429681500884180f, 0.997374431615167150f, 
+  0.997318594999768600f, 
+  0.997262171687536170f, 0.997205161711661850f, 0.997147565105683480f, 
+  0.997089381903483400f, 
+  0.997030612139289450f, 0.996971255847674320f, 0.996911313063555740f, 
+  0.996850783822196610f, 
+  0.996789668159204560f, 0.996727966110532490f, 0.996665677712478160f, 
+  0.996602803001684130f, 
+  0.996539342015137940f, 0.996475294790172160f, 0.996410661364464100f, 
+  0.996345441776035900f, 
+  0.996279636063254650f, 0.996213244264832040f, 0.996146266419824620f, 
+  0.996078702567633980f, 
+  0.996010552748005870f, 0.995941817001031350f, 0.995872495367145730f, 
+  0.995802587887129160f, 
+  0.995732094602106430f, 0.995661015553546910f, 0.995589350783264600f, 
+  0.995517100333418110f, 
+  0.995444264246510340f, 0.995370842565388990f, 0.995296835333246090f, 
+  0.995222242593618360f, 
+  0.995147064390386470f, 0.995071300767776170f, 0.994994951770357020f, 
+  0.994918017443043200f, 
+  0.994840497831093180f, 0.994762392980109930f, 0.994683702936040250f, 
+  0.994604427745175660f, 
+  0.994524567454151740f, 0.994444122109948040f, 0.994363091759888570f, 
+  0.994281476451641550f, 
+  0.994199276233218910f, 0.994116491152977070f, 0.994033121259616400f, 
+  0.993949166602181130f, 
+  0.993864627230059750f, 0.993779503192984580f, 0.993693794541031790f, 
+  0.993607501324621610f, 
+  0.993520623594518090f, 0.993433161401829360f, 0.993345114798006910f, 
+  0.993256483834846440f, 
+  0.993167268564487230f, 0.993077469039412300f, 0.992987085312448390f, 
+  0.992896117436765980f, 
+  0.992804565465879140f, 0.992712429453645460f, 0.992619709454266140f, 
+  0.992526405522286100f, 
+  0.992432517712593660f, 0.992338046080420420f, 0.992242990681341700f, 
+  0.992147351571276090f, 
+  0.992051128806485720f, 0.991954322443575950f, 0.991856932539495470f, 
+  0.991758959151536110f, 
+  0.991660402337333210f, 0.991561262154865290f, 0.991461538662453790f, 
+  0.991361231918763460f, 
+  0.991260341982802440f, 0.991158868913921350f, 0.991056812771814340f, 
+  0.990954173616518500f, 
+  0.990850951508413620f, 0.990747146508222710f, 0.990642758677011570f, 
+  0.990537788076188750f, 
+  0.990432234767505970f, 0.990326098813057330f, 0.990219380275280000f, 
+  0.990112079216953770f, 
+  0.990004195701200910f, 0.989895729791486660f, 0.989786681551618640f, 
+  0.989677051045747210f, 
+  0.989566838338365120f, 0.989456043494307710f, 0.989344666578752640f, 
+  0.989232707657220050f, 
+  0.989120166795572690f, 0.989007044060015270f, 0.988893339517095130f, 
+  0.988779053233701520f, 
+  0.988664185277066230f, 0.988548735714763200f, 0.988432704614708340f, 
+  0.988316092045159690f, 
+  0.988198898074717610f, 0.988081122772324070f, 0.987962766207263420f, 
+  0.987843828449161740f, 
+  0.987724309567986960f, 0.987604209634049160f, 0.987483528717999710f, 
+  0.987362266890832400f, 
+  0.987240424223882250f, 0.987118000788826280f, 0.986994996657682980f, 
+  0.986871411902812470f, 
+  0.986747246596916590f, 0.986622500813038480f, 0.986497174624562880f, 
+  0.986371268105216030f, 
+  0.986244781329065460f, 0.986117714370520090f, 0.985990067304330140f, 
+  0.985861840205586980f, 
+  0.985733033149723490f, 0.985603646212513400f, 0.985473679470071810f, 
+  0.985343132998854790f, 
+  0.985212006875659350f, 0.985080301177623800f, 0.984948015982227030f, 
+  0.984815151367289140f, 
+  0.984681707410970940f, 0.984547684191773960f, 0.984413081788540700f, 
+  0.984277900280454370f, 
+  0.984142139747038570f, 0.984005800268157870f, 0.983868881924017220f, 
+  0.983731384795162090f, 
+  0.983593308962478650f, 0.983454654507193270f, 0.983315421510872810f, 
+  0.983175610055424420f, 
+  0.983035220223095640f, 0.982894252096474070f, 0.982752705758487830f, 
+  0.982610581292404750f, 
+  0.982467878781833170f, 0.982324598310721280f, 0.982180739963357090f, 
+  0.982036303824369020f, 
+  0.981891289978725100f, 0.981745698511732990f, 0.981599529509040720f, 
+  0.981452783056635520f, 
+  0.981305459240844670f, 0.981157558148334830f, 0.981009079866112630f, 
+  0.980860024481523870f, 
+  0.980710392082253970f, 0.980560182756327840f, 0.980409396592109910f, 
+  0.980258033678303550f, 
+  0.980106094103951770f, 0.979953577958436740f, 0.979800485331479790f, 
+  0.979646816313141210f, 
+  0.979492570993820810f, 0.979337749464256780f, 0.979182351815526930f, 
+  0.979026378139047580f, 
+  0.978869828526574120f, 0.978712703070200420f, 0.978555001862359550f, 
+  0.978396724995823090f, 
+  0.978237872563701090f, 0.978078444659442380f, 0.977918441376834370f, 
+  0.977757862810002760f, 
+  0.977596709053411890f, 0.977434980201864260f, 0.977272676350500860f, 
+  0.977109797594800880f, 
+  0.976946344030581670f, 0.976782315753998650f, 0.976617712861545640f, 
+  0.976452535450054060f, 
+  0.976286783616693630f, 0.976120457458971910f, 0.975953557074734300f, 
+  0.975786082562163930f, 
+  0.975618034019781750f, 0.975449411546446380f, 0.975280215241354220f, 
+  0.975110445204038890f, 
+  0.974940101534371830f, 0.974769184332561770f, 0.974597693699155050f, 
+  0.974425629735034990f, 
+  0.974252992541422500f, 0.974079782219875680f, 0.973905998872289570f, 
+  0.973731642600896400f, 
+  0.973556713508265560f, 0.973381211697303290f, 0.973205137271252800f, 
+  0.973028490333694210f, 
+  0.972851270988544180f, 0.972673479340056430f, 0.972495115492821190f, 
+  0.972316179551765300f, 
+  0.972136671622152230f, 0.971956591809581720f, 0.971775940219990140f, 
+  0.971594716959650160f, 
+  0.971412922135170940f, 0.971230555853497380f, 0.971047618221911100f, 
+  0.970864109348029470f, 
+  0.970680029339806130f, 0.970495378305530560f, 0.970310156353828110f, 
+  0.970124363593660280f, 
+  0.969938000134323960f, 0.969751066085452140f, 0.969563561557013180f, 
+  0.969375486659311280f, 
+  0.969186841502985950f, 0.968997626199012420f, 0.968807840858700970f, 
+  0.968617485593697540f, 
+  0.968426560515983190f, 0.968235065737874320f, 0.968043001372022260f, 
+  0.967850367531413620f, 
+  0.967657164329369880f, 0.967463391879547550f, 0.967269050295937790f, 
+  0.967074139692867040f, 
+  0.966878660184995910f, 0.966682611887320080f, 0.966485994915169840f, 
+  0.966288809384209690f, 
+  0.966091055410438830f, 0.965892733110190860f, 0.965693842600133690f, 
+  0.965494383997269500f, 
+  0.965294357418934660f, 0.965093762982799590f, 0.964892600806868890f, 
+  0.964690871009481030f, 
+  0.964488573709308410f, 0.964285709025357480f, 0.964082277076968140f, 
+  0.963878277983814200f, 
+  0.963673711865903230f, 0.963468578843575950f, 0.963262879037507070f, 
+  0.963056612568704340f, 
+  0.962849779558509030f, 0.962642380128595710f, 0.962434414400972100f, 
+  0.962225882497979020f, 
+  0.962016784542290560f, 0.961807120656913540f, 0.961596890965187860f, 
+  0.961386095590786250f, 
+  0.961174734657714080f, 0.960962808290309780f, 0.960750316613243950f, 
+  0.960537259751520050f, 
+  0.960323637830473920f, 0.960109450975773940f, 0.959894699313420530f, 
+  0.959679382969746750f, 
+  0.959463502071417510f, 0.959247056745430090f, 0.959030047119113660f, 
+  0.958812473320129310f, 
+  0.958594335476470220f, 0.958375633716461170f, 0.958156368168758820f, 
+  0.957936538962351420f, 
+  0.957716146226558870f, 0.957495190091032570f, 0.957273670685755200f, 
+  0.957051588141040970f, 
+  0.956828942587535370f, 0.956605734156215080f, 0.956381962978387730f, 
+  0.956157629185692140f, 
+  0.955932732910098280f, 0.955707274283906560f, 0.955481253439748770f, 
+  0.955254670510586990f, 
+  0.955027525629714160f, 0.954799818930753720f, 0.954571550547659630f, 
+  0.954342720614716480f, 
+  0.954113329266538800f, 0.953883376638071770f, 0.953652862864590500f, 
+  0.953421788081700310f, 
+  0.953190152425336670f, 0.952957956031764700f, 0.952725199037579570f, 
+  0.952491881579706320f, 
+  0.952258003795399600f, 0.952023565822243570f, 0.951788567798152130f, 
+  0.951553009861368590f, 
+  0.951316892150465550f, 0.951080214804345010f, 0.950842977962238160f, 
+  0.950605181763705340f, 
+  0.950366826348635780f, 0.950127911857248100f, 0.949888438430089300f, 
+  0.949648406208035480f, 
+  0.949407815332291570f, 0.949166665944390700f, 0.948924958186195160f, 
+  0.948682692199895090f, 
+  0.948439868128009620f, 0.948196486113385580f, 0.947952546299198670f, 
+  0.947708048828952100f, 
+  0.947462993846477700f, 0.947217381495934820f, 0.946971211921810880f, 
+  0.946724485268921170f, 
+  0.946477201682408680f, 0.946229361307743820f, 0.945980964290724760f, 
+  0.945732010777477150f, 
+  0.945482500914453740f, 0.945232434848435000f, 0.944981812726528150f, 
+  0.944730634696167800f, 
+  0.944478900905115550f, 0.944226611501459810f, 0.943973766633615980f, 
+  0.943720366450326200f, 
+  0.943466411100659320f, 0.943211900734010620f, 0.942956835500102120f, 
+  0.942701215548981900f, 
+  0.942445041031024890f, 0.942188312096931770f, 0.941931028897729620f, 
+  0.941673191584771360f, 
+  0.941414800309736340f, 0.941155855224629190f, 0.940896356481780830f, 
+  0.940636304233847590f, 
+  0.940375698633811540f, 0.940114539834980280f, 0.939852827990986680f, 
+  0.939590563255789270f, 
+  0.939327745783671400f, 0.939064375729241950f, 0.938800453247434770f, 
+  0.938535978493508560f, 
+  0.938270951623047190f, 0.938005372791958840f, 0.937739242156476970f, 
+  0.937472559873159250f, 
+  0.937205326098887960f, 0.936937540990869900f, 0.936669204706636170f, 
+  0.936400317404042060f, 
+  0.936130879241267030f, 0.935860890376814640f, 0.935590350969512370f, 
+  0.935319261178511610f, 
+  0.935047621163287430f, 0.934775431083638700f, 0.934502691099687870f, 
+  0.934229401371880820f, 
+  0.933955562060986730f, 0.933681173328098410f, 0.933406235334631520f, 
+  0.933130748242325230f, 
+  0.932854712213241120f, 0.932578127409764420f, 0.932300993994602760f, 
+  0.932023312130786490f, 
+  0.931745081981668720f, 0.931466303710925090f, 0.931186977482553750f, 
+  0.930907103460875130f, 
+  0.930626681810531760f, 0.930345712696488470f, 0.930064196284032360f, 
+  0.929782132738772190f, 
+  0.929499522226638560f, 0.929216364913884040f, 0.928932660967082820f, 
+  0.928648410553130520f, 
+  0.928363613839244370f, 0.928078270992963140f, 0.927792382182146320f, 
+  0.927505947574975180f, 
+  0.927218967339951790f, 0.926931441645899130f, 0.926643370661961230f, 
+  0.926354754557602860f, 
+  0.926065593502609310f, 0.925775887667086740f, 0.925485637221461490f, 
+  0.925194842336480530f, 
+  0.924903503183210910f, 0.924611619933039970f, 0.924319192757675160f, 
+  0.924026221829143850f, 
+  0.923732707319793290f, 0.923438649402290370f, 0.923144048249621930f, 
+  0.922848904035094120f, 
+  0.922553216932332830f, 0.922256987115283030f, 0.921960214758209220f, 
+  0.921662900035694730f, 
+  0.921365043122642340f, 0.921066644194273640f, 0.920767703426128790f, 
+  0.920468220994067110f, 
+  0.920168197074266340f, 0.919867631843222950f, 0.919566525477751530f, 
+  0.919264878154985370f, 
+  0.918962690052375630f, 0.918659961347691900f, 0.918356692219021720f, 
+  0.918052882844770380f, 
+  0.917748533403661250f, 0.917443644074735220f, 0.917138215037350710f, 
+  0.916832246471183890f, 
+  0.916525738556228210f, 0.916218691472794220f, 0.915911105401509880f, 
+  0.915602980523320230f, 
+  0.915294317019487050f, 0.914985115071589310f, 0.914675374861522390f, 
+  0.914365096571498560f, 
+  0.914054280384046570f, 0.913742926482011390f, 0.913431035048554720f, 
+  0.913118606267154240f, 
+  0.912805640321603500f, 0.912492137396012650f, 0.912178097674807180f, 
+  0.911863521342728520f, 
+  0.911548408584833990f, 0.911232759586496190f, 0.910916574533403360f, 
+  0.910599853611558930f, 
+  0.910282597007281760f, 0.909964804907205660f, 0.909646477498279540f, 
+  0.909327614967767260f, 
+  0.909008217503247450f, 0.908688285292613360f, 0.908367818524072890f, 
+  0.908046817386148340f, 
+  0.907725282067676440f, 0.907403212757808110f, 0.907080609646008450f, 
+  0.906757472922056550f, 
+  0.906433802776045460f, 0.906109599398381980f, 0.905784862979786550f, 
+  0.905459593711293250f, 
+  0.905133791784249690f, 0.904807457390316540f, 0.904480590721468250f, 
+  0.904153191969991780f, 
+  0.903825261328487510f, 0.903496798989868450f, 0.903167805147360720f, 
+  0.902838279994502830f, 
+  0.902508223725145940f, 0.902177636533453620f, 0.901846518613901750f, 
+  0.901514870161278740f, 
+  0.901182691370684520f, 0.900849982437531450f, 0.900516743557543520f, 
+  0.900182974926756810f, 
+  0.899848676741518580f, 0.899513849198487980f, 0.899178492494635330f, 
+  0.898842606827242370f, 
+  0.898506192393901950f, 0.898169249392518080f, 0.897831778021305650f, 
+  0.897493778478790310f, 
+  0.897155250963808550f, 0.896816195675507300f, 0.896476612813344120f, 
+  0.896136502577086770f, 
+  0.895795865166813530f, 0.895454700782912450f, 0.895113009626081760f, 
+  0.894770791897329550f, 
+  0.894428047797973800f, 0.894084777529641990f, 0.893740981294271040f, 
+  0.893396659294107720f, 
+  0.893051811731707450f, 0.892706438809935390f, 0.892360540731965360f, 
+  0.892014117701280470f, 
+  0.891667169921672280f, 0.891319697597241390f, 0.890971700932396860f, 
+  0.890623180131855930f, 
+  0.890274135400644600f, 0.889924566944096720f, 0.889574474967854580f, 
+  0.889223859677868210f, 
+  0.888872721280395630f, 0.888521059982002260f, 0.888168875989561730f, 
+  0.887816169510254440f, 
+  0.887462940751568840f, 0.887109189921300170f, 0.886754917227550840f, 
+  0.886400122878730600f, 
+  0.886044807083555600f, 0.885688970051048960f, 0.885332611990540590f, 
+  0.884975733111666660f, 
+  0.884618333624369920f, 0.884260413738899190f, 0.883901973665809470f, 
+  0.883543013615961880f, 
+  0.883183533800523390f, 0.882823534430966620f, 0.882463015719070150f, 
+  0.882101977876917580f, 
+  0.881740421116898320f, 0.881378345651706920f, 0.881015751694342870f, 
+  0.880652639458111010f, 
+  0.880289009156621010f, 0.879924861003786860f, 0.879560195213827890f, 
+  0.879195012001267480f, 
+  0.878829311580933360f, 0.878463094167957870f, 0.878096359977777130f, 
+  0.877729109226131570f, 
+  0.877361342129065140f, 0.876993058902925890f, 0.876624259764365310f, 
+  0.876254944930338510f, 
+  0.875885114618103810f, 0.875514769045222850f, 0.875143908429560360f, 
+  0.874772532989284150f, 
+  0.874400642942864790f, 0.874028238509075740f, 0.873655319906992630f, 
+  0.873281887355994210f, 
+  0.872907941075761080f, 0.872533481286276170f, 0.872158508207824480f, 
+  0.871783022060993120f, 
+  0.871407023066670950f, 0.871030511446048260f, 0.870653487420617430f, 
+  0.870275951212171940f, 
+  0.869897903042806340f, 0.869519343134916860f, 0.869140271711200560f, 
+  0.868760688994655310f, 
+  0.868380595208579800f, 0.867999990576573510f, 0.867618875322536230f, 
+  0.867237249670668400f, 
+  0.866855113845470430f, 0.866472468071743050f, 0.866089312574586770f, 
+  0.865705647579402380f, 
+  0.865321473311889800f, 0.864936789998049020f, 0.864551597864179340f, 
+  0.864165897136879300f, 
+  0.863779688043046720f, 0.863392970809878420f, 0.863005745664870320f, 
+  0.862618012835816740f, 
+  0.862229772550811240f, 0.861841025038245330f, 0.861451770526809320f, 
+  0.861062009245491480f, 
+  0.860671741423578380f, 0.860280967290654510f, 0.859889687076602290f, 
+  0.859497901011601730f, 
+  0.859105609326130450f, 0.858712812250963520f, 0.858319510017173440f, 
+  0.857925702856129790f, 
+  0.857531390999499150f, 0.857136574679244980f, 0.856741254127627470f, 
+  0.856345429577203610f, 
+  0.855949101260826910f, 0.855552269411646860f, 0.855154934263109620f, 
+  0.854757096048957220f, 
+  0.854358755003227440f, 0.853959911360254180f, 0.853560565354666840f, 
+  0.853160717221390420f, 
+  0.852760367195645300f, 0.852359515512947090f, 0.851958162409106380f, 
+  0.851556308120228980f, 
+  0.851153952882715340f, 0.850751096933260790f, 0.850347740508854980f, 
+  0.849943883846782210f, 
+  0.849539527184620890f, 0.849134670760243630f, 0.848729314811817130f, 
+  0.848323459577801640f, 
+  0.847917105296951410f, 0.847510252208314330f, 0.847102900551231500f, 
+  0.846695050565337450f, 
+  0.846286702490559710f, 0.845877856567119000f, 0.845468513035528830f, 
+  0.845058672136595470f, 
+  0.844648334111417820f, 0.844237499201387020f, 0.843826167648186740f, 
+  0.843414339693792760f, 
+  0.843002015580472940f, 0.842589195550786710f, 0.842175879847585570f, 
+  0.841762068714012490f, 
+  0.841347762393501950f, 0.840932961129779780f, 0.840517665166862550f, 
+  0.840101874749058400f, 
+  0.839685590120966110f, 0.839268811527475230f, 0.838851539213765760f, 
+  0.838433773425308340f, 
+  0.838015514407863820f, 0.837596762407483040f, 0.837177517670507300f, 
+  0.836757780443567190f, 
+  0.836337550973583530f, 0.835916829507766360f, 0.835495616293615350f, 
+  0.835073911578919410f, 
+  0.834651715611756440f, 0.834229028640493420f, 0.833805850913786340f, 
+  0.833382182680579730f, 
+  0.832958024190106670f, 0.832533375691888680f, 0.832108237435735590f, 
+  0.831682609671745120f, 
+  0.831256492650303210f, 0.830829886622083570f, 0.830402791838047550f, 
+  0.829975208549443950f, 
+  0.829547137007808910f, 0.829118577464965980f, 0.828689530173025820f, 
+  0.828259995384385660f, 
+  0.827829973351729920f, 0.827399464328029470f, 0.826968468566541600f, 
+  0.826536986320809960f, 
+  0.826105017844664610f, 0.825672563392221390f, 0.825239623217882250f, 
+  0.824806197576334330f, 
+  0.824372286722551250f, 0.823937890911791370f, 0.823503010399598500f, 
+  0.823067645441801670f, 
+  0.822631796294514990f, 0.822195463214137170f, 0.821758646457351750f, 
+  0.821321346281126740f, 
+  0.820883562942714580f, 0.820445296699652050f, 0.820006547809759680f, 
+  0.819567316531142230f, 
+  0.819127603122188240f, 0.818687407841569680f, 0.818246730948242070f, 
+  0.817805572701444270f, 
+  0.817363933360698460f, 0.816921813185809480f, 0.816479212436865390f, 
+  0.816036131374236810f, 
+  0.815592570258576790f, 0.815148529350820830f, 0.814704008912187080f, 
+  0.814259009204175270f, 
+  0.813813530488567190f, 0.813367573027426570f, 0.812921137083098770f, 
+  0.812474222918210480f, 
+  0.812026830795669730f, 0.811578960978665890f, 0.811130613730669190f, 
+  0.810681789315430780f, 
+  0.810232487996982330f, 0.809782710039636530f, 0.809332455707985950f, 
+  0.808881725266903610f, 
+  0.808430518981542720f, 0.807978837117336310f, 0.807526679939997160f, 
+  0.807074047715517610f, 
+  0.806620940710169650f, 0.806167359190504420f, 0.805713303423352230f, 
+  0.805258773675822210f, 
+  0.804803770215302920f, 0.804348293309460780f, 0.803892343226241260f, 
+  0.803435920233868120f, 
+  0.802979024600843250f, 0.802521656595946430f, 0.802063816488235440f, 
+  0.801605504547046150f, 
+  0.801146721041991360f, 0.800687466242961610f, 0.800227740420124790f, 
+  0.799767543843925680f, 
+  0.799306876785086160f, 0.798845739514604580f, 0.798384132303756380f, 
+  0.797922055424093000f, 
+  0.797459509147442460f, 0.796996493745908750f, 0.796533009491872000f, 
+  0.796069056657987990f, 
+  0.795604635517188070f, 0.795139746342679590f, 0.794674389407944550f, 
+  0.794208564986740640f, 
+  0.793742273353100210f, 0.793275514781330630f, 0.792808289546014120f, 
+  0.792340597922007170f, 
+  0.791872440184440470f, 0.791403816608719500f, 0.790934727470523290f, 
+  0.790465173045804880f, 
+  0.789995153610791090f, 0.789524669441982190f, 0.789053720816151880f, 
+  0.788582308010347120f, 
+  0.788110431301888070f, 0.787638090968367450f, 0.787165287287651010f, 
+  0.786692020537876790f, 
+  0.786218290997455660f, 0.785744098945070360f, 0.785269444659675850f, 
+  0.784794328420499230f, 
+  0.784318750507038920f, 0.783842711199065230f, 0.783366210776619720f, 
+  0.782889249520015480f, 
+  0.782411827709836530f, 0.781933945626937630f, 0.781455603552444590f, 
+  0.780976801767753750f, 
+  0.780497540554531910f, 0.780017820194715990f, 0.779537640970513260f, 
+  0.779057003164400630f, 
+  0.778575907059125050f, 0.778094352937702790f, 0.777612341083420030f, 
+  0.777129871779831620f, 
+  0.776646945310762060f, 0.776163561960304340f, 0.775679722012820650f, 
+  0.775195425752941420f, 
+  0.774710673465565550f, 0.774225465435860680f, 0.773739801949261840f, 
+  0.773253683291472590f, 
+  0.772767109748463850f, 0.772280081606474320f, 0.771792599152010150f, 
+  0.771304662671844830f, 
+  0.770816272453018540f, 0.770327428782838890f, 0.769838131948879840f, 
+  0.769348382238982280f, 
+  0.768858179941253270f, 0.768367525344066270f, 0.767876418736060610f, 
+  0.767384860406141730f, 
+  0.766892850643480670f, 0.766400389737514230f, 0.765907477977944340f, 
+  0.765414115654738270f, 
+  0.764920303058128410f, 0.764426040478612070f, 0.763931328206951090f, 
+  0.763436166534172010f, 
+  0.762940555751565720f, 0.762444496150687210f, 0.761947988023355390f, 
+  0.761451031661653620f, 
+  0.760953627357928150f, 0.760455775404789260f, 0.759957476095110330f, 
+  0.759458729722028210f, 
+  0.758959536578942440f, 0.758459896959515430f, 0.757959811157672300f, 
+  0.757459279467600720f, 
+  0.756958302183750490f, 0.756456879600833740f, 0.755955012013824420f, 
+  0.755452699717958250f, 
+  0.754949943008732640f, 0.754446742181906440f, 0.753943097533499640f, 
+  0.753439009359793580f, 
+  0.752934477957330150f, 0.752429503622912390f, 0.751924086653603550f, 
+  0.751418227346727470f, 
+  0.750911925999867890f, 0.750405182910869330f, 0.749897998377835330f, 
+  0.749390372699129560f, 
+  0.748882306173375150f, 0.748373799099454560f, 0.747864851776509410f, 
+  0.747355464503940190f, 
+  0.746845637581406540f, 0.746335371308826320f, 0.745824665986376090f, 
+  0.745313521914490520f, 
+  0.744801939393862630f, 0.744289918725443260f, 0.743777460210440890f, 
+  0.743264564150321600f, 
+  0.742751230846809050f, 0.742237460601884000f, 0.741723253717784140f, 
+  0.741208610497004260f, 
+  0.740693531242295760f, 0.740178016256666240f, 0.739662065843380010f, 
+  0.739145680305957510f, 
+  0.738628859948174840f, 0.738111605074064260f, 0.737593915987913570f, 
+  0.737075792994265730f, 
+  0.736557236397919150f, 0.736038246503927350f, 0.735518823617598900f, 
+  0.734998968044496710f, 
+  0.734478680090438370f, 0.733957960061495940f, 0.733436808263995710f, 
+  0.732915225004517780f, 
+  0.732393210589896040f, 0.731870765327218290f, 0.731347889523825570f, 
+  0.730824583487312160f, 
+  0.730300847525525490f, 0.729776681946566090f, 0.729252087058786970f, 
+  0.728727063170793830f, 
+  0.728201610591444610f, 0.727675729629849610f, 0.727149420595371020f, 
+  0.726622683797622850f, 
+  0.726095519546471000f, 0.725567928152032300f, 0.725039909924675370f, 
+  0.724511465175019630f, 
+  0.723982594213935520f, 0.723453297352544380f, 0.722923574902217700f, 
+  0.722393427174577550f, 
+  0.721862854481496340f, 0.721331857135096290f, 0.720800435447749190f, 
+  0.720268589732077190f, 
+  0.719736320300951030f, 0.719203627467491220f, 0.718670511545067230f, 
+  0.718136972847297490f, 
+  0.717603011688049080f, 0.717068628381437480f, 0.716533823241826680f, 
+  0.715998596583828690f, 
+  0.715462948722303760f, 0.714926879972359490f, 0.714390390649351390f, 
+  0.713853481068882470f, 
+  0.713316151546802610f, 0.712778402399208980f, 0.712240233942445510f, 
+  0.711701646493102970f, 
+  0.711162640368018350f, 0.710623215884275020f, 0.710083373359202800f, 
+  0.709543113110376770f, 
+  0.709002435455618250f, 0.708461340712994160f, 0.707919829200816310f, 
+  0.707377901237642100f, 
+  0.706835557142273860f, 0.706292797233758480f, 0.705749621831387790f, 
+  0.705206031254697830f, 
+  0.704662025823468930f, 0.704117605857725430f, 0.703572771677735580f, 
+  0.703027523604011220f, 
+  0.702481861957308000f, 0.701935787058624360f, 0.701389299229202230f, 
+  0.700842398790526230f, 
+  0.700295086064323780f, 0.699747361372564990f, 0.699199225037462120f, 
+  0.698650677381469580f, 
+  0.698101718727283880f, 0.697552349397843270f, 0.697002569716327460f, 
+  0.696452380006157830f, 
+  0.695901780590996830f, 0.695350771794747800f, 0.694799353941554900f, 
+  0.694247527355803310f, 
+  0.693695292362118350f, 0.693142649285365510f, 0.692589598450650380f, 
+  0.692036140183318830f, 
+  0.691482274808955850f, 0.690928002653386280f, 0.690373324042674040f, 
+  0.689818239303122470f, 
+  0.689262748761273470f, 0.688706852743907750f, 0.688150551578044830f, 
+  0.687593845590942170f, 
+  0.687036735110095660f, 0.686479220463238950f, 0.685921301978343670f, 
+  0.685362979983618730f, 
+  0.684804254807510620f, 0.684245126778703080f, 0.683685596226116690f, 
+  0.683125663478908800f, 
+  0.682565328866473250f, 0.682004592718440830f, 0.681443455364677990f, 
+  0.680881917135287340f, 
+  0.680319978360607200f, 0.679757639371212030f, 0.679194900497911200f, 
+  0.678631762071749470f, 
+  0.678068224424006600f, 0.677504287886197430f, 0.676939952790071240f, 
+  0.676375219467611700f, 
+  0.675810088251037060f, 0.675244559472799270f, 0.674678633465584540f, 
+  0.674112310562312360f, 
+  0.673545591096136100f, 0.672978475400442090f, 0.672410963808849900f, 
+  0.671843056655211930f, 
+  0.671274754273613490f, 0.670706056998372160f, 0.670136965164037760f, 
+  0.669567479105392490f, 
+  0.668997599157450270f, 0.668427325655456820f, 0.667856658934889440f, 
+  0.667285599331456480f, 
+  0.666714147181097670f, 0.666142302819983540f, 0.665570066584515560f, 
+  0.664997438811325340f, 
+  0.664424419837275180f, 0.663851009999457340f, 0.663277209635194100f, 
+  0.662703019082037440f, 
+  0.662128438677768720f, 0.661553468760399000f, 0.660978109668168060f, 
+  0.660402361739545030f, 
+  0.659826225313227430f, 0.659249700728141490f, 0.658672788323441890f, 
+  0.658095488438511290f, 
+  0.657517801412960120f, 0.656939727586627110f, 0.656361267299578000f, 
+  0.655782420892106030f, 
+  0.655203188704731930f, 0.654623571078202680f, 0.654043568353492640f, 
+  0.653463180871802330f, 
+  0.652882408974558960f, 0.652301253003415460f, 0.651719713300251020f, 
+  0.651137790207170330f, 
+  0.650555484066503990f, 0.649972795220807530f, 0.649389724012861770f, 
+  0.648806270785672550f, 
+  0.648222435882470420f, 0.647638219646710420f, 0.647053622422071650f, 
+  0.646468644552457890f, 
+  0.645883286381996440f, 0.645297548255038380f, 0.644711430516158420f, 
+  0.644124933510154540f, 
+  0.643538057582047850f, 0.642950803077082080f, 0.642363170340724320f, 
+  0.641775159718663500f, 
+  0.641186771556811250f, 0.640598006201301030f, 0.640008863998488440f, 
+  0.639419345294950700f, 
+  0.638829450437486400f, 0.638239179773115390f, 0.637648533649078810f, 
+  0.637057512412838590f, 
+  0.636466116412077180f, 0.635874345994697720f, 0.635282201508823530f, 
+  0.634689683302797850f, 
+  0.634096791725183740f, 0.633503527124764320f, 0.632909889850541860f, 
+  0.632315880251737680f, 
+  0.631721498677792370f, 0.631126745478365340f, 0.630531621003334600f, 
+  0.629936125602796550f, 
+  0.629340259627065750f, 0.628744023426674790f, 0.628147417352374120f, 
+  0.627550441755131530f, 
+  0.626953096986132770f, 0.626355383396779990f, 0.625757301338692900f, 
+  0.625158851163707730f, 
+  0.624560033223877320f, 0.623960847871470770f, 0.623361295458973340f, 
+  0.622761376339086460f, 
+  0.622161090864726930f, 0.621560439389027270f, 0.620959422265335180f, 
+  0.620358039847213830f, 
+  0.619756292488440660f, 0.619154180543008410f, 0.618551704365123860f, 
+  0.617948864309208260f, 
+  0.617345660729896940f, 0.616742093982038830f, 0.616138164420696910f, 
+  0.615533872401147430f, 
+  0.614929218278879590f, 0.614324202409595950f, 0.613718825149211830f, 
+  0.613113086853854910f, 
+  0.612506987879865570f, 0.611900528583796070f, 0.611293709322411010f, 
+  0.610686530452686280f, 
+  0.610078992331809620f, 0.609471095317180240f, 0.608862839766408200f, 
+  0.608254226037314490f, 
+  0.607645254487930830f, 0.607035925476499760f, 0.606426239361473550f, 
+  0.605816196501515080f, 
+  0.605205797255496500f, 0.604595041982500360f, 0.603983931041818020f, 
+  0.603372464792950370f, 
+  0.602760643595607220f, 0.602148467809707320f, 0.601535937795377730f, 
+  0.600923053912954090f, 
+  0.600309816522980430f, 0.599696225986208310f, 0.599082282663597310f, 
+  0.598467986916314310f, 
+  0.597853339105733910f, 0.597238339593437530f, 0.596622988741213330f, 
+  0.596007286911056530f, 
+  0.595391234465168730f, 0.594774831765957580f, 0.594158079176036800f, 
+  0.593540977058226390f, 
+  0.592923525775551410f, 0.592305725691242400f, 0.591687577168735550f, 
+  0.591069080571671510f, 
+  0.590450236263895920f, 0.589831044609458900f, 0.589211505972615070f, 
+  0.588591620717822890f, 
+  0.587971389209745120f, 0.587350811813247660f, 0.586729888893400500f, 
+  0.586108620815476430f, 
+  0.585487007944951450f, 0.584865050647504490f, 0.584242749289016980f, 
+  0.583620104235572760f, 
+  0.582997115853457700f, 0.582373784509160220f, 0.581750110569369760f, 
+  0.581126094400977620f, 
+  0.580501736371076600f, 0.579877036846960350f, 0.579251996196123550f, 
+  0.578626614786261430f, 
+  0.578000892985269910f, 0.577374831161244880f, 0.576748429682482520f, 
+  0.576121688917478390f, 
+  0.575494609234928230f, 0.574867191003726740f, 0.574239434592967890f, 
+  0.573611340371944610f, 
+  0.572982908710148680f, 0.572354139977270030f, 0.571725034543197120f, 
+  0.571095592778016690f, 
+  0.570465815052012990f, 0.569835701735668110f, 0.569205253199661200f, 
+  0.568574469814869250f, 
+  0.567943351952365670f, 0.567311899983420800f, 0.566680114279501710f, 
+  0.566047995212271560f, 
+  0.565415543153589770f, 0.564782758475511400f, 0.564149641550287680f, 
+  0.563516192750364910f, 
+  0.562882412448384550f, 0.562248301017183150f, 0.561613858829792420f, 
+  0.560979086259438260f, 
+  0.560343983679540860f, 0.559708551463714790f, 0.559072789985768480f, 
+  0.558436699619704100f, 
+  0.557800280739717100f, 0.557163533720196340f, 0.556526458935723720f, 
+  0.555889056761073920f, 
+  0.555251327571214090f, 0.554613271741304040f, 0.553974889646695610f, 
+  0.553336181662932410f, 
+  0.552697148165749770f, 0.552057789531074980f, 0.551418106135026060f, 
+  0.550778098353912230f, 
+  0.550137766564233630f, 0.549497111142680960f, 0.548856132466135290f, 
+  0.548214830911667780f, 
+  0.547573206856539870f, 0.546931260678202190f, 0.546288992754295210f, 
+  0.545646403462648590f, 
+  0.545003493181281160f, 0.544360262288400400f, 0.543716711162402390f, 
+  0.543072840181871850f, 
+  0.542428649725581360f, 0.541784140172491660f, 0.541139311901750910f, 
+  0.540494165292695230f, 
+  0.539848700724847700f, 0.539202918577918240f, 0.538556819231804210f, 
+  0.537910403066588990f, 
+  0.537263670462542530f, 0.536616621800121150f, 0.535969257459966710f, 
+  0.535321577822907010f, 
+  0.534673583269955510f, 0.534025274182310380f, 0.533376650941355560f, 
+  0.532727713928658810f, 
+  0.532078463525973540f, 0.531428900115236910f, 0.530779024078570250f, 
+  0.530128835798278850f, 
+  0.529478335656852090f, 0.528827524036961980f, 0.528176401321464370f, 
+  0.527524967893398200f, 
+  0.526873224135984700f, 0.526221170432628170f, 0.525568807166914680f, 
+  0.524916134722612890f, 
+  0.524263153483673470f, 0.523609863834228030f, 0.522956266158590140f, 
+  0.522302360841254700f, 
+  0.521648148266897090f, 0.520993628820373810f, 0.520338802886721960f, 
+  0.519683670851158520f, 
+  0.519028233099080970f, 0.518372490016066220f, 0.517716441987871150f, 
+  0.517060089400432130f, 
+  0.516403432639863990f, 0.515746472092461380f, 0.515089208144697270f, 
+  0.514431641183222930f, 
+  0.513773771594868030f, 0.513115599766640560f, 0.512457126085725800f, 
+  0.511798350939487000f, 
+  0.511139274715464390f, 0.510479897801375700f, 0.509820220585115560f, 
+  0.509160243454754750f, 
+  0.508499966798540810f, 0.507839391004897940f, 0.507178516462425290f, 
+  0.506517343559898530f, 
+  0.505855872686268860f, 0.505194104230662240f, 0.504532038582380380f, 
+  0.503869676130898950f, 
+  0.503207017265869030f, 0.502544062377115800f, 0.501880811854638400f, 
+  0.501217266088609950f, 
+  0.500553425469377640f, 0.499889290387461380f, 0.499224861233555030f, 
+  0.498560138398525200f, 
+  0.497895122273410930f, 0.497229813249424340f, 0.496564211717949340f, 
+  0.495898318070542240f, 
+  0.495232132698931350f, 0.494565655995016010f, 0.493898888350867430f, 
+  0.493231830158728070f, 
+  0.492564481811010650f, 0.491896843700299240f, 0.491228916219348330f, 
+  0.490560699761082080f, 
+  0.489892194718595300f, 0.489223401485152030f, 0.488554320454186230f, 
+  0.487884952019301210f, 
+  0.487215296574268820f, 0.486545354513030270f, 0.485875126229695420f, 
+  0.485204612118541880f, 
+  0.484533812574016120f, 0.483862727990732320f, 0.483191358763471910f, 
+  0.482519705287184520f, 
+  0.481847767956986080f, 0.481175547168160360f, 0.480503043316157670f, 
+  0.479830256796594250f, 
+  0.479157188005253310f, 0.478483837338084080f, 0.477810205191201040f, 
+  0.477136291960884750f, 
+  0.476462098043581310f, 0.475787623835901120f, 0.475112869734620470f, 
+  0.474437836136679340f, 
+  0.473762523439182850f, 0.473086932039400220f, 0.472411062334764100f, 
+  0.471734914722871430f, 
+  0.471058489601482610f, 0.470381787368520710f, 0.469704808422072460f, 
+  0.469027553160387240f, 
+  0.468350021981876530f, 0.467672215285114710f, 0.466994133468838110f, 
+  0.466315776931944480f, 
+  0.465637146073493770f, 0.464958241292706740f, 0.464279062988965760f, 
+  0.463599611561814120f, 
+  0.462919887410955130f, 0.462239890936253280f, 0.461559622537733190f, 
+  0.460879082615578690f, 
+  0.460198271570134270f, 0.459517189801903590f, 0.458835837711549120f, 
+  0.458154215699893230f, 
+  0.457472324167916110f, 0.456790163516757220f, 0.456107734147714220f, 
+  0.455425036462242420f, 
+  0.454742070861955450f, 0.454058837748624540f, 0.453375337524177750f, 
+  0.452691570590700860f, 
+  0.452007537350436530f, 0.451323238205783520f, 0.450638673559297760f, 
+  0.449953843813690580f, 
+  0.449268749371829920f, 0.448583390636739300f, 0.447897768011597360f, 
+  0.447211881899738260f, 
+  0.446525732704651400f, 0.445839320829980350f, 0.445152646679523590f, 
+  0.444465710657234110f, 
+  0.443778513167218280f, 0.443091054613736990f, 0.442403335401204130f, 
+  0.441715355934187310f, 
+  0.441027116617407340f, 0.440338617855737300f, 0.439649860054203420f, 
+  0.438960843617984430f, 
+  0.438271568952410480f, 0.437582036462964340f, 0.436892246555280470f, 
+  0.436202199635143950f, 
+  0.435511896108492170f, 0.434821336381412350f, 0.434130520860143310f, 
+  0.433439449951074200f, 
+  0.432748124060743760f, 0.432056543595841450f, 0.431364708963206440f, 
+  0.430672620569826860f, 
+  0.429980278822840570f, 0.429287684129534720f, 0.428594836897344400f, 
+  0.427901737533854240f, 
+  0.427208386446796370f, 0.426514784044051520f, 0.425820930733648350f, 
+  0.425126826923762410f, 
+  0.424432473022717420f, 0.423737869438983950f, 0.423043016581179100f, 
+  0.422347914858067000f, 
+  0.421652564678558380f, 0.420956966451709440f, 0.420261120586723050f, 
+  0.419565027492946940f, 
+  0.418868687579875110f, 0.418172101257146430f, 0.417475268934544340f, 
+  0.416778191021997590f, 
+  0.416080867929579320f, 0.415383300067506290f, 0.414685487846140010f, 
+  0.413987431675985510f, 
+  0.413289131967690960f, 0.412590589132048380f, 0.411891803579992220f, 
+  0.411192775722600160f, 
+  0.410493505971092520f, 0.409793994736831200f, 0.409094242431320920f, 
+  0.408394249466208110f, 
+  0.407694016253280170f, 0.406993543204466460f, 0.406292830731837470f, 
+  0.405591879247603870f, 
+  0.404890689164117750f, 0.404189260893870750f, 0.403487594849495310f, 
+  0.402785691443763640f, 
+  0.402083551089587040f, 0.401381174200016790f, 0.400678561188243350f, 
+  0.399975712467595390f, 
+  0.399272628451540930f, 0.398569309553686360f, 0.397865756187775750f, 
+  0.397161968767691720f, 
+  0.396457947707453960f, 0.395753693421220080f, 0.395049206323284880f, 
+  0.394344486828079650f, 
+  0.393639535350172880f, 0.392934352304269600f, 0.392228938105210370f, 
+  0.391523293167972350f, 
+  0.390817417907668610f, 0.390111312739546910f, 0.389404978078991100f, 
+  0.388698414341519250f, 
+  0.387991621942784910f, 0.387284601298575890f, 0.386577352824813980f, 
+  0.385869876937555310f, 
+  0.385162174052989970f, 0.384454244587440870f, 0.383746088957365010f, 
+  0.383037707579352130f, 
+  0.382329100870124510f, 0.381620269246537520f, 0.380911213125578130f, 
+  0.380201932924366050f, 
+  0.379492429060152740f, 0.378782701950320600f, 0.378072752012383990f, 
+  0.377362579663988450f, 
+  0.376652185322909620f, 0.375941569407054420f, 0.375230732334460030f, 
+  0.374519674523293210f, 
+  0.373808396391851370f, 0.373096898358560690f, 0.372385180841977360f, 
+  0.371673244260786630f, 
+  0.370961089033802040f, 0.370248715579966360f, 0.369536124318350760f, 
+  0.368823315668153960f, 
+  0.368110290048703050f, 0.367397047879452820f, 0.366683589579984930f, 
+  0.365969915570008910f, 
+  0.365256026269360380f, 0.364541922098002180f, 0.363827603476023610f, 
+  0.363113070823639530f, 
+  0.362398324561191310f, 0.361683365109145950f, 0.360968192888095290f, 
+  0.360252808318756830f, 
+  0.359537211821973180f, 0.358821403818710860f, 0.358105384730061760f, 
+  0.357389154977241000f, 
+  0.356672714981588260f, 0.355956065164567010f, 0.355239205947763370f, 
+  0.354522137752887430f, 
+  0.353804861001772160f, 0.353087376116372530f, 0.352369683518766630f, 
+  0.351651783631154680f, 
+  0.350933676875858360f, 0.350215363675321740f, 0.349496844452109600f, 
+  0.348778119628908420f, 
+  0.348059189628525780f, 0.347340054873889190f, 0.346620715788047320f, 
+  0.345901172794169100f, 
+  0.345181426315542610f, 0.344461476775576480f, 0.343741324597798600f, 
+  0.343020970205855540f, 
+  0.342300414023513690f, 0.341579656474657210f, 0.340858697983289440f, 
+  0.340137538973531880f, 
+  0.339416179869623410f, 0.338694621095921190f, 0.337972863076899830f, 
+  0.337250906237150650f, 
+  0.336528751001382350f, 0.335806397794420560f, 0.335083847041206580f, 
+  0.334361099166798900f, 
+  0.333638154596370920f, 0.332915013755212650f, 0.332191677068729320f, 
+  0.331468144962440920f, 
+  0.330744417861982890f, 0.330020496193105530f, 0.329296380381672800f, 
+  0.328572070853663690f, 
+  0.327847568035170960f, 0.327122872352400510f, 0.326397984231672660f, 
+  0.325672904099419900f, 
+  0.324947632382188430f, 0.324222169506637130f, 0.323496515899536760f, 
+  0.322770671987770710f, 
+  0.322044638198334620f, 0.321318414958334910f, 0.320592002694990330f, 
+  0.319865401835630610f, 
+  0.319138612807695900f, 0.318411636038737960f, 0.317684471956418020f, 
+  0.316957120988508150f, 
+  0.316229583562890490f, 0.315501860107556040f, 0.314773951050606070f, 
+  0.314045856820250820f, 
+  0.313317577844809070f, 0.312589114552708660f, 0.311860467372486130f, 
+  0.311131636732785270f, 
+  0.310402623062358880f, 0.309673426790066490f, 0.308944048344875710f, 
+  0.308214488155861220f, 
+  0.307484746652204160f, 0.306754824263192780f, 0.306024721418221900f, 
+  0.305294438546791720f, 
+  0.304563976078509050f, 0.303833334443086470f, 0.303102514070341060f, 
+  0.302371515390196130f, 
+  0.301640338832678880f, 0.300908984827921890f, 0.300177453806162120f, 
+  0.299445746197739950f, 
+  0.298713862433100390f, 0.297981802942791920f, 0.297249568157465890f, 
+  0.296517158507877410f, 
+  0.295784574424884370f, 0.295051816339446720f, 0.294318884682627570f, 
+  0.293585779885591310f, 
+  0.292852502379604810f, 0.292119052596036540f, 0.291385430966355720f, 
+  0.290651637922133220f, 
+  0.289917673895040860f, 0.289183539316850310f, 0.288449234619434170f, 
+  0.287714760234765280f, 
+  0.286980116594915570f, 0.286245304132057120f, 0.285510323278461380f, 
+  0.284775174466498300f, 
+  0.284039858128637360f, 0.283304374697445790f, 0.282568724605589740f, 
+  0.281832908285833460f, 
+  0.281096926171038320f, 0.280360778694163810f, 0.279624466288266700f, 
+  0.278887989386500280f, 
+  0.278151348422115090f, 0.277414543828458200f, 0.276677576038972420f, 
+  0.275940445487197320f, 
+  0.275203152606767370f, 0.274465697831413220f, 0.273728081594960650f, 
+  0.272990304331329980f, 
+  0.272252366474536660f, 0.271514268458690810f, 0.270776010717996010f, 
+  0.270037593686750510f, 
+  0.269299017799346230f, 0.268560283490267890f, 0.267821391194094320f, 
+  0.267082341345496350f, 
+  0.266343134379238180f, 0.265603770730176440f, 0.264864250833259320f, 
+  0.264124575123527490f, 
+  0.263384744036113390f, 0.262644758006240100f, 0.261904617469222560f, 
+  0.261164322860466590f, 
+  0.260423874615468010f, 0.259683273169813930f, 0.258942518959180580f, 
+  0.258201612419334870f, 
+  0.257460553986133210f, 0.256719344095520720f, 0.255977983183532380f, 
+  0.255236471686291820f, 
+  0.254494810040010790f, 0.253752998680989940f, 0.253011038045617980f, 
+  0.252268928570370810f, 
+  0.251526670691812780f, 0.250784264846594550f, 0.250041711471454650f, 
+  0.249299011003218300f, 
+  0.248556163878796620f, 0.247813170535187620f, 0.247070031409475370f, 
+  0.246326746938829060f, 
+  0.245583317560504000f, 0.244839743711840750f, 0.244096025830264210f, 
+  0.243352164353284880f, 
+  0.242608159718496890f, 0.241864012363579210f, 0.241119722726294730f, 
+  0.240375291244489500f, 
+  0.239630718356093560f, 0.238886004499120170f, 0.238141150111664870f, 
+  0.237396155631906550f, 
+  0.236651021498106460f, 0.235905748148607370f, 0.235160336021834860f, 
+  0.234414785556295250f, 
+  0.233669097190576820f, 0.232923271363349120f, 0.232177308513361770f, 
+  0.231431209079445730f, 
+  0.230684973500512310f, 0.229938602215552260f, 0.229192095663636740f, 
+  0.228445454283916550f, 
+  0.227698678515621170f, 0.226951768798059980f, 0.226204725570620270f, 
+  0.225457549272768540f, 
+  0.224710240344049570f, 0.223962799224085520f, 0.223215226352576960f, 
+  0.222467522169301990f, 
+  0.221719687114115240f, 0.220971721626949060f, 0.220223626147812460f, 
+  0.219475401116790340f, 
+  0.218727046974044600f, 0.217978564159812290f, 0.217229953114406790f, 
+  0.216481214278216900f, 
+  0.215732348091705940f, 0.214983354995412820f, 0.214234235429951100f, 
+  0.213484989836008080f, 
+  0.212735618654345870f, 0.211986122325800410f, 0.211236501291280710f, 
+  0.210486755991769890f, 
+  0.209736886868323370f, 0.208986894362070070f, 0.208236778914211470f, 
+  0.207486540966020700f, 
+  0.206736180958843660f, 0.205985699334098050f, 0.205235096533272380f, 
+  0.204484372997927180f, 
+  0.203733529169694010f, 0.202982565490274460f, 0.202231482401441620f, 
+  0.201480280345037820f, 
+  0.200728959762976140f, 0.199977521097239290f, 0.199225964789878890f, 
+  0.198474291283016360f, 
+  0.197722501018842030f, 0.196970594439614370f, 0.196218571987660850f, 
+  0.195466434105377090f, 
+  0.194714181235225990f, 0.193961813819739010f, 0.193209332301514080f, 
+  0.192456737123216840f, 
+  0.191704028727579940f, 0.190951207557401860f, 0.190198274055548120f, 
+  0.189445228664950340f, 
+  0.188692071828605260f, 0.187938803989575850f, 0.187185425590990440f, 
+  0.186431937076041640f, 
+  0.185678338887987790f, 0.184924631470150870f, 0.184170815265917720f, 
+  0.183416890718739230f, 
+  0.182662858272129360f, 0.181908718369666160f, 0.181154471454990920f, 
+  0.180400117971807270f, 
+  0.179645658363882100f, 0.178891093075044830f, 0.178136422549186320f, 
+  0.177381647230260200f, 
+  0.176626767562280960f, 0.175871783989325040f, 0.175116696955530060f, 
+  0.174361506905093830f, 
+  0.173606214282275410f, 0.172850819531394200f, 0.172095323096829040f, 
+  0.171339725423019260f, 
+  0.170584026954463700f, 0.169828228135719880f, 0.169072329411405180f, 
+  0.168316331226194910f, 
+  0.167560234024823590f, 0.166804038252083870f, 0.166047744352825850f, 
+  0.165291352771957970f, 
+  0.164534863954446110f, 0.163778278345312690f, 0.163021596389637810f, 
+  0.162264818532558110f, 
+  0.161507945219266150f, 0.160750976895011390f, 0.159993914005098350f, 
+  0.159236756994887850f, 
+  0.158479506309796100f, 0.157722162395293690f, 0.156964725696906750f, 
+  0.156207196660216040f, 
+  0.155449575730855880f, 0.154691863354515400f, 0.153934059976937460f, 
+  0.153176166043917870f, 
+  0.152418182001306500f, 0.151660108295005400f, 0.150901945370970040f, 
+  0.150143693675208330f, 
+  0.149385353653779810f, 0.148626925752796540f, 0.147868410418422360f, 
+  0.147109808096871850f, 
+  0.146351119234411440f, 0.145592344277358450f, 0.144833483672080240f, 
+  0.144074537864995330f, 
+  0.143315507302571590f, 0.142556392431327340f, 0.141797193697830530f, 
+  0.141037911548697770f, 
+  0.140278546430595420f, 0.139519098790238600f, 0.138759569074390380f, 
+  0.137999957729862760f, 
+  0.137240265203515700f, 0.136480491942256310f, 0.135720638393040080f, 
+  0.134960705002868830f, 
+  0.134200692218792020f, 0.133440600487905820f, 0.132680430257352130f, 
+  0.131920181974319760f, 
+  0.131159856086043410f, 0.130399453039802740f, 0.129638973282923540f, 
+  0.128878417262776660f, 
+  0.128117785426777150f, 0.127357078222385570f, 0.126596296097105960f, 
+  0.125835439498487020f, 
+  0.125074508874121300f, 0.124313504671644300f, 0.123552427338735370f, 
+  0.122791277323116900f, 
+  0.122030055072553410f, 0.121268761034852550f, 0.120507395657864240f, 
+  0.119745959389479630f, 
+  0.118984452677632520f, 0.118222875970297250f, 0.117461229715489990f, 
+  0.116699514361267840f, 
+  0.115937730355727850f, 0.115175878147008180f, 0.114413958183287050f, 
+  0.113651970912781920f, 
+  0.112889916783750470f, 0.112127796244489750f, 0.111365609743335190f, 
+  0.110603357728661910f, 
+  0.109841040648882680f, 0.109078658952449240f, 0.108316213087851300f, 
+  0.107553703503615710f, 
+  0.106791130648307380f, 0.106028494970528530f, 0.105265796918917650f, 
+  0.104503036942150550f, 
+  0.103740215488939480f, 0.102977333008032250f, 0.102214389948213370f, 
+  0.101451386758302160f, 
+  0.100688323887153970f, 0.099925201783659226f, 0.099162020896742573f, 
+  0.098398781675363881f, 
+  0.097635484568517339f, 0.096872130025230527f, 0.096108718494565468f, 
+  0.095345250425617742f, 
+  0.094581726267515473f, 0.093818146469420494f, 0.093054511480527333f, 
+  0.092290821750062355f, 
+  0.091527077727284981f, 0.090763279861485704f, 0.089999428601987341f, 
+  0.089235524398144139f, 
+  0.088471567699340822f, 0.087707558954993645f, 0.086943498614549489f, 
+  0.086179387127484922f, 
+  0.085415224943307277f, 0.084651012511553700f, 0.083886750281790226f, 
+  0.083122438703613077f, 
+  0.082358078226646619f, 0.081593669300544638f, 0.080829212374989468f, 
+  0.080064707899690932f, 
+  0.079300156324387569f, 0.078535558098845590f, 0.077770913672857989f, 
+  0.077006223496245585f, 
+  0.076241488018856149f, 0.075476707690563416f, 0.074711882961268378f, 
+  0.073947014280897269f, 
+  0.073182102099402888f, 0.072417146866763538f, 0.071652149032982254f, 
+  0.070887109048087787f, 
+  0.070122027362133646f, 0.069356904425197236f, 0.068591740687380900f, 
+  0.067826536598810966f, 
+  0.067061292609636836f, 0.066296009170032283f, 0.065530686730193397f, 
+  0.064765325740339871f, 
+  0.063999926650714078f, 0.063234489911580136f, 0.062469015973224969f, 
+  0.061703505285957416f, 
+  0.060937958300107238f, 0.060172375466026218f, 0.059406757234087247f, 
+  0.058641104054683348f, 
+  0.057875416378229017f, 0.057109694655158132f, 0.056343939335925283f, 
+  0.055578150871004817f, 
+  0.054812329710889909f, 0.054046476306093640f, 0.053280591107148056f, 
+  0.052514674564603257f, 
+  0.051748727129028414f, 0.050982749251010900f, 0.050216741381155325f, 
+  0.049450703970084824f, 
+  0.048684637468439020f, 0.047918542326875327f, 0.047152418996068000f, 
+  0.046386267926707213f, 
+  0.045620089569500123f, 0.044853884375169933f, 0.044087652794454979f, 
+  0.043321395278109784f, 
+  0.042555112276904117f, 0.041788804241622082f, 0.041022471623063397f, 
+  0.040256114872041358f, 
+  0.039489734439384118f, 0.038723330775933762f, 0.037956904332545366f, 
+  0.037190455560088091f, 
+  0.036423984909444228f, 0.035657492831508264f, 0.034890979777187955f, 
+  0.034124446197403423f, 
+  0.033357892543086159f, 0.032591319265180385f, 0.031824726814640963f, 
+  0.031058115642434700f, 
+  0.030291486199539423f, 0.029524838936943035f, 0.028758174305644590f, 
+  0.027991492756653365f, 
+  0.027224794740987910f, 0.026458080709677145f, 0.025691351113759395f, 
+  0.024924606404281485f, 
+  0.024157847032300020f, 0.023391073448879338f, 0.022624286105092803f, 
+  0.021857485452021874f, 
+  0.021090671940755180f, 0.020323846022389572f, 0.019557008148029204f, 
+  0.018790158768784596f, 
+  0.018023298335773701f, 0.017256427300120978f, 0.016489546112956454f, 
+  0.015722655225417017f, 
+  0.014955755088644378f, 0.014188846153786343f, 0.013421928871995907f, 
+  0.012655003694430301f, 
+  0.011888071072252072f, 0.011121131456628141f, 0.010354185298728884f, 
+  0.009587233049729183f, 
+  0.008820275160807512f, 0.008053312083144991f, 0.007286344267926684f, 
+  0.006519372166339549f, 
+  0.005752396229573737f, 0.004985416908821652f, 0.004218434655277024f, 
+  0.003451449920135975f, 
+  0.002684463154596083f, 0.001917474809855460f, 0.001150485337113809f, 
+  0.000383495187571497f 
+}; 
+ 
+/**  
+ * @brief  Initialization function for the floating-point DCT4/IDCT4. 
+ * @param[in,out] *S         points to an instance of floating-point DCT4/IDCT4 structure. 
+ * @param[in]     *S_RFFT    points to an instance of floating-point RFFT/RIFFT structure. 
+ * @param[in]     *S_CFFT    points to an instance of floating-point CFFT/CIFFT structure. 
+ * @param[in]     N			 length of the DCT4. 
+ * @param[in]     Nby2       half of the length of the DCT4. 
+ * @param[in]     normalize  normalizing factor. 
+ * @return        arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length. 
+ * \par Normalizing factor:  
+ * The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.  
+ * Floating-point normalizing factors are mentioned in the table below for different DCT sizes:  
+ * \image html dct4NormalizingF32Table.gif  
+ */ 
+ 
+arm_status arm_dct4_init_f32( 
+  arm_dct4_instance_f32 * S, 
+  arm_rfft_instance_f32 * S_RFFT, 
+  arm_cfft_radix4_instance_f32 * S_CFFT, 
+  uint16_t N, 
+  uint16_t Nby2, 
+  float32_t normalize) 
+{ 
+  /*  Initialize the default arm status */ 
+  arm_status status = ARM_MATH_SUCCESS; 
+ 
+  /* Initializing the pointer array with the weight table base addresses of different lengths */ 
+  float32_t *twiddlePtr[3] = 
+    { (float32_t *) Weights_128, (float32_t *) Weights_512, 
+    (float32_t *) Weights_2048 
+  }; 
+ 
+  /* Initializing the pointer array with the cos factor table base addresses of different lengths */ 
+  float32_t *pCosFactor[3] = 
+    { (float32_t *) cos_factors_128, (float32_t *) cos_factors_512, 
+    (float32_t *) cos_factors_2048 
+  }; 
+ 
+  /* Initialize the DCT4 length */ 
+  S->N = N; 
+ 
+  /* Initialize the half of DCT4 length */ 
+  S->Nby2 = Nby2; 
+ 
+  /* Initialize the DCT4 Normalizing factor */ 
+  S->normalize = normalize; 
+ 
+  /* Initialize Real FFT Instance */ 
+  S->pRfft = S_RFFT; 
+ 
+  /* Initialize Complex FFT Instance */ 
+  S->pCfft = S_CFFT; 
+ 
+  switch (N) 
+  { 
+    /* Initialize the table modifier values */ 
+  case 2048u: 
+    S->pTwiddle = twiddlePtr[2]; 
+    S->pCosFactor = pCosFactor[2]; 
+    break; 
+  case 512u: 
+    S->pTwiddle = twiddlePtr[1]; 
+    S->pCosFactor = pCosFactor[1]; 
+    break; 
+  case 128u: 
+    S->pTwiddle = twiddlePtr[0]; 
+    S->pCosFactor = pCosFactor[0]; 
+    break; 
+  default: 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+  } 
+ 
+  /* Initialize the RFFT/RIFFT */ 
+  arm_rfft_init_f32(S->pRfft, S->pCfft, S->N, 0u, 1u); 
+ 
+  /* return the status of DCT4 Init function */ 
+  return (status); 
+} 
+ 
+/**  
+   * @} end of DCT4_IDCT4 group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_dct4_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,1187 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_dct4_init_q15.c  
+*  
+* Description:	Initialization function of DCT-4 & IDCT4 Q15  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @addtogroup DCT4_IDCT4  
+ * @{  
+ */ 
+ 
+/*  
+* @brief  Weights Table  
+*/ 
+ 
+/**  
+* \par  
+* Weights tables are generated using the formula : <pre>weights[n] = e^(-j*n*pi/(2*N))</pre>  
+* \par  
+* C command to generate the table  
+* <pre>  
+* for(i = 0; i< N; i++)  
+* {  
+*   weights[2*i]= cos(i*c);  
+*   weights[(2*i)+1]= -sin(i * c);  
+* } </pre>  
+* \par  
+* where <code>N</code> is the Number of weights to be calculated and <code>c</code> is <code>pi/(2*N)</code>  
+* \par  
+* Converted the output to q15 format by multiplying with 2^31 and saturated if required.  
+* \par  
+* In the tables below the real and imaginary values are placed alternatively, hence the  
+* array length is <code>2*N</code>.  
+*/ 
+ 
+static const q15_t WeightsQ15_128[256] = { 
+  0x7fff, 0x0, 0x7ffd, 0xfe6e, 0x7ff6, 0xfcdc, 0x7fe9, 0xfb4a, 
+  0x7fd8, 0xf9b9, 0x7fc2, 0xf827, 0x7fa7, 0xf696, 0x7f87, 0xf505, 
+  0x7f62, 0xf375, 0x7f38, 0xf1e5, 0x7f09, 0xf055, 0x7ed5, 0xeec7, 
+  0x7e9d, 0xed38, 0x7e5f, 0xebab, 0x7e1d, 0xea1e, 0x7dd6, 0xe893, 
+  0x7d8a, 0xe708, 0x7d39, 0xe57e, 0x7ce3, 0xe3f5, 0x7c89, 0xe26d, 
+  0x7c29, 0xe0e7, 0x7bc5, 0xdf61, 0x7b5d, 0xdddd, 0x7aef, 0xdc5a, 
+  0x7a7d, 0xdad8, 0x7a05, 0xd958, 0x798a, 0xd7da, 0x7909, 0xd65d, 
+  0x7884, 0xd4e1, 0x77fa, 0xd368, 0x776c, 0xd1ef, 0x76d9, 0xd079, 
+  0x7641, 0xcf05, 0x75a5, 0xcd92, 0x7504, 0xcc22, 0x745f, 0xcab3, 
+  0x73b5, 0xc946, 0x7307, 0xc7dc, 0x7255, 0xc674, 0x719e, 0xc50e, 
+  0x70e2, 0xc3aa, 0x7023, 0xc248, 0x6f5f, 0xc0e9, 0x6e96, 0xbf8d, 
+  0x6dca, 0xbe32, 0x6cf9, 0xbcdb, 0x6c24, 0xbb86, 0x6b4a, 0xba33, 
+  0x6a6d, 0xb8e4, 0x698c, 0xb797, 0x68a6, 0xb64c, 0x67bd, 0xb505, 
+  0x66cf, 0xb3c1, 0x65dd, 0xb27f, 0x64e8, 0xb141, 0x63ef, 0xb005, 
+  0x62f2, 0xaecd, 0x61f1, 0xad97, 0x60ec, 0xac65, 0x5fe3, 0xab36, 
+  0x5ed7, 0xaa0b, 0x5dc7, 0xa8e3, 0x5cb4, 0xa7be, 0x5b9d, 0xa69c, 
+  0x5a82, 0xa57e, 0x5964, 0xa463, 0x5842, 0xa34c, 0x571d, 0xa239, 
+  0x55f5, 0xa129, 0x54ca, 0xa01d, 0x539b, 0x9f14, 0x5269, 0x9e0f, 
+  0x5133, 0x9d0e, 0x4ffb, 0x9c11, 0x4ebf, 0x9b18, 0x4d81, 0x9a23, 
+  0x4c3f, 0x9931, 0x4afb, 0x9843, 0x49b4, 0x975a, 0x4869, 0x9674, 
+  0x471c, 0x9593, 0x45cd, 0x94b6, 0x447a, 0x93dc, 0x4325, 0x9307, 
+  0x41ce, 0x9236, 0x4073, 0x916a, 0x3f17, 0x90a1, 0x3db8, 0x8fdd, 
+  0x3c56, 0x8f1e, 0x3af2, 0x8e62, 0x398c, 0x8dab, 0x3824, 0x8cf9, 
+  0x36ba, 0x8c4b, 0x354d, 0x8ba1, 0x33de, 0x8afc, 0x326e, 0x8a5b, 
+  0x30fb, 0x89bf, 0x2f87, 0x8927, 0x2e11, 0x8894, 0x2c98, 0x8806, 
+  0x2b1f, 0x877c, 0x29a3, 0x86f7, 0x2826, 0x8676, 0x26a8, 0x85fb, 
+  0x2528, 0x8583, 0x23a6, 0x8511, 0x2223, 0x84a3, 0x209f, 0x843b, 
+  0x1f19, 0x83d7, 0x1d93, 0x8377, 0x1c0b, 0x831d, 0x1a82, 0x82c7, 
+  0x18f8, 0x8276, 0x176d, 0x822a, 0x15e2, 0x81e3, 0x1455, 0x81a1, 
+  0x12c8, 0x8163, 0x1139, 0x812b, 0xfab, 0x80f7, 0xe1b, 0x80c8, 
+  0xc8b, 0x809e, 0xafb, 0x8079, 0x96a, 0x8059, 0x7d9, 0x803e, 
+  0x647, 0x8028, 0x4b6, 0x8017, 0x324, 0x800a, 0x192, 0x8003, 
+}; 
+ 
+static const q15_t WeightsQ15_512[1024] = { 
+  0x7fff, 0x0, 0x7fff, 0xff9c, 0x7fff, 0xff37, 0x7ffe, 0xfed3, 
+  0x7ffd, 0xfe6e, 0x7ffc, 0xfe0a, 0x7ffa, 0xfda5, 0x7ff8, 0xfd41, 
+  0x7ff6, 0xfcdc, 0x7ff3, 0xfc78, 0x7ff0, 0xfc13, 0x7fed, 0xfbaf, 
+  0x7fe9, 0xfb4a, 0x7fe5, 0xfae6, 0x7fe1, 0xfa81, 0x7fdd, 0xfa1d, 
+  0x7fd8, 0xf9b9, 0x7fd3, 0xf954, 0x7fce, 0xf8f0, 0x7fc8, 0xf88b, 
+  0x7fc2, 0xf827, 0x7fbc, 0xf7c3, 0x7fb5, 0xf75e, 0x7fae, 0xf6fa, 
+  0x7fa7, 0xf696, 0x7f9f, 0xf632, 0x7f97, 0xf5cd, 0x7f8f, 0xf569, 
+  0x7f87, 0xf505, 0x7f7e, 0xf4a1, 0x7f75, 0xf43d, 0x7f6b, 0xf3d9, 
+  0x7f62, 0xf375, 0x7f58, 0xf311, 0x7f4d, 0xf2ad, 0x7f43, 0xf249, 
+  0x7f38, 0xf1e5, 0x7f2d, 0xf181, 0x7f21, 0xf11d, 0x7f15, 0xf0b9, 
+  0x7f09, 0xf055, 0x7efd, 0xeff2, 0x7ef0, 0xef8e, 0x7ee3, 0xef2a, 
+  0x7ed5, 0xeec7, 0x7ec8, 0xee63, 0x7eba, 0xedff, 0x7eab, 0xed9c, 
+  0x7e9d, 0xed38, 0x7e8e, 0xecd5, 0x7e7f, 0xec72, 0x7e6f, 0xec0e, 
+  0x7e5f, 0xebab, 0x7e4f, 0xeb48, 0x7e3f, 0xeae5, 0x7e2e, 0xea81, 
+  0x7e1d, 0xea1e, 0x7e0c, 0xe9bb, 0x7dfa, 0xe958, 0x7de8, 0xe8f6, 
+  0x7dd6, 0xe893, 0x7dc3, 0xe830, 0x7db0, 0xe7cd, 0x7d9d, 0xe76a, 
+  0x7d8a, 0xe708, 0x7d76, 0xe6a5, 0x7d62, 0xe643, 0x7d4e, 0xe5e0, 
+  0x7d39, 0xe57e, 0x7d24, 0xe51c, 0x7d0f, 0xe4b9, 0x7cf9, 0xe457, 
+  0x7ce3, 0xe3f5, 0x7ccd, 0xe393, 0x7cb7, 0xe331, 0x7ca0, 0xe2cf, 
+  0x7c89, 0xe26d, 0x7c71, 0xe20b, 0x7c5a, 0xe1aa, 0x7c42, 0xe148, 
+  0x7c29, 0xe0e7, 0x7c11, 0xe085, 0x7bf8, 0xe024, 0x7bdf, 0xdfc2, 
+  0x7bc5, 0xdf61, 0x7bac, 0xdf00, 0x7b92, 0xde9f, 0x7b77, 0xde3e, 
+  0x7b5d, 0xdddd, 0x7b42, 0xdd7c, 0x7b26, 0xdd1b, 0x7b0b, 0xdcbb, 
+  0x7aef, 0xdc5a, 0x7ad3, 0xdbf9, 0x7ab6, 0xdb99, 0x7a9a, 0xdb39, 
+  0x7a7d, 0xdad8, 0x7a5f, 0xda78, 0x7a42, 0xda18, 0x7a24, 0xd9b8, 
+  0x7a05, 0xd958, 0x79e7, 0xd8f9, 0x79c8, 0xd899, 0x79a9, 0xd839, 
+  0x798a, 0xd7da, 0x796a, 0xd77a, 0x794a, 0xd71b, 0x792a, 0xd6bc, 
+  0x7909, 0xd65d, 0x78e8, 0xd5fe, 0x78c7, 0xd59f, 0x78a6, 0xd540, 
+  0x7884, 0xd4e1, 0x7862, 0xd483, 0x7840, 0xd424, 0x781d, 0xd3c6, 
+  0x77fa, 0xd368, 0x77d7, 0xd309, 0x77b4, 0xd2ab, 0x7790, 0xd24d, 
+  0x776c, 0xd1ef, 0x7747, 0xd192, 0x7723, 0xd134, 0x76fe, 0xd0d7, 
+  0x76d9, 0xd079, 0x76b3, 0xd01c, 0x768e, 0xcfbf, 0x7668, 0xcf62, 
+  0x7641, 0xcf05, 0x761b, 0xcea8, 0x75f4, 0xce4b, 0x75cc, 0xcdef, 
+  0x75a5, 0xcd92, 0x757d, 0xcd36, 0x7555, 0xccda, 0x752d, 0xcc7e, 
+  0x7504, 0xcc22, 0x74db, 0xcbc6, 0x74b2, 0xcb6a, 0x7489, 0xcb0e, 
+  0x745f, 0xcab3, 0x7435, 0xca58, 0x740b, 0xc9fc, 0x73e0, 0xc9a1, 
+  0x73b5, 0xc946, 0x738a, 0xc8ec, 0x735f, 0xc891, 0x7333, 0xc836, 
+  0x7307, 0xc7dc, 0x72db, 0xc782, 0x72af, 0xc728, 0x7282, 0xc6ce, 
+  0x7255, 0xc674, 0x7227, 0xc61a, 0x71fa, 0xc5c0, 0x71cc, 0xc567, 
+  0x719e, 0xc50e, 0x716f, 0xc4b4, 0x7141, 0xc45b, 0x7112, 0xc403, 
+  0x70e2, 0xc3aa, 0x70b3, 0xc351, 0x7083, 0xc2f9, 0x7053, 0xc2a0, 
+  0x7023, 0xc248, 0x6ff2, 0xc1f0, 0x6fc1, 0xc198, 0x6f90, 0xc141, 
+  0x6f5f, 0xc0e9, 0x6f2d, 0xc092, 0x6efb, 0xc03b, 0x6ec9, 0xbfe3, 
+  0x6e96, 0xbf8d, 0x6e63, 0xbf36, 0x6e30, 0xbedf, 0x6dfd, 0xbe89, 
+  0x6dca, 0xbe32, 0x6d96, 0xbddc, 0x6d62, 0xbd86, 0x6d2d, 0xbd30, 
+  0x6cf9, 0xbcdb, 0x6cc4, 0xbc85, 0x6c8f, 0xbc30, 0x6c59, 0xbbdb, 
+  0x6c24, 0xbb86, 0x6bee, 0xbb31, 0x6bb8, 0xbadc, 0x6b81, 0xba88, 
+  0x6b4a, 0xba33, 0x6b13, 0xb9df, 0x6adc, 0xb98b, 0x6aa5, 0xb937, 
+  0x6a6d, 0xb8e4, 0x6a35, 0xb890, 0x69fd, 0xb83d, 0x69c4, 0xb7ea, 
+  0x698c, 0xb797, 0x6953, 0xb744, 0x6919, 0xb6f1, 0x68e0, 0xb69f, 
+  0x68a6, 0xb64c, 0x686c, 0xb5fa, 0x6832, 0xb5a8, 0x67f7, 0xb557, 
+  0x67bd, 0xb505, 0x6782, 0xb4b4, 0x6746, 0xb462, 0x670b, 0xb411, 
+  0x66cf, 0xb3c1, 0x6693, 0xb370, 0x6657, 0xb31f, 0x661a, 0xb2cf, 
+  0x65dd, 0xb27f, 0x65a0, 0xb22f, 0x6563, 0xb1df, 0x6526, 0xb190, 
+  0x64e8, 0xb141, 0x64aa, 0xb0f1, 0x646c, 0xb0a2, 0x642d, 0xb054, 
+  0x63ef, 0xb005, 0x63b0, 0xafb7, 0x6371, 0xaf69, 0x6331, 0xaf1b, 
+  0x62f2, 0xaecd, 0x62b2, 0xae7f, 0x6271, 0xae32, 0x6231, 0xade4, 
+  0x61f1, 0xad97, 0x61b0, 0xad4b, 0x616f, 0xacfe, 0x612d, 0xacb2, 
+  0x60ec, 0xac65, 0x60aa, 0xac19, 0x6068, 0xabcd, 0x6026, 0xab82, 
+  0x5fe3, 0xab36, 0x5fa0, 0xaaeb, 0x5f5e, 0xaaa0, 0x5f1a, 0xaa55, 
+  0x5ed7, 0xaa0b, 0x5e93, 0xa9c0, 0x5e50, 0xa976, 0x5e0b, 0xa92c, 
+  0x5dc7, 0xa8e3, 0x5d83, 0xa899, 0x5d3e, 0xa850, 0x5cf9, 0xa807, 
+  0x5cb4, 0xa7be, 0x5c6e, 0xa775, 0x5c29, 0xa72c, 0x5be3, 0xa6e4, 
+  0x5b9d, 0xa69c, 0x5b56, 0xa654, 0x5b10, 0xa60d, 0x5ac9, 0xa5c5, 
+  0x5a82, 0xa57e, 0x5a3b, 0xa537, 0x59f3, 0xa4f0, 0x59ac, 0xa4aa, 
+  0x5964, 0xa463, 0x591c, 0xa41d, 0x58d4, 0xa3d7, 0x588b, 0xa392, 
+  0x5842, 0xa34c, 0x57f9, 0xa307, 0x57b0, 0xa2c2, 0x5767, 0xa27d, 
+  0x571d, 0xa239, 0x56d4, 0xa1f5, 0x568a, 0xa1b0, 0x5640, 0xa16d, 
+  0x55f5, 0xa129, 0x55ab, 0xa0e6, 0x5560, 0xa0a2, 0x5515, 0xa060, 
+  0x54ca, 0xa01d, 0x547e, 0x9fda, 0x5433, 0x9f98, 0x53e7, 0x9f56, 
+  0x539b, 0x9f14, 0x534e, 0x9ed3, 0x5302, 0x9e91, 0x52b5, 0x9e50, 
+  0x5269, 0x9e0f, 0x521c, 0x9dcf, 0x51ce, 0x9d8f, 0x5181, 0x9d4e, 
+  0x5133, 0x9d0e, 0x50e5, 0x9ccf, 0x5097, 0x9c8f, 0x5049, 0x9c50, 
+  0x4ffb, 0x9c11, 0x4fac, 0x9bd3, 0x4f5e, 0x9b94, 0x4f0f, 0x9b56, 
+  0x4ebf, 0x9b18, 0x4e70, 0x9ada, 0x4e21, 0x9a9d, 0x4dd1, 0x9a60, 
+  0x4d81, 0x9a23, 0x4d31, 0x99e6, 0x4ce1, 0x99a9, 0x4c90, 0x996d, 
+  0x4c3f, 0x9931, 0x4bef, 0x98f5, 0x4b9e, 0x98ba, 0x4b4c, 0x987e, 
+  0x4afb, 0x9843, 0x4aa9, 0x9809, 0x4a58, 0x97ce, 0x4a06, 0x9794, 
+  0x49b4, 0x975a, 0x4961, 0x9720, 0x490f, 0x96e7, 0x48bc, 0x96ad, 
+  0x4869, 0x9674, 0x4816, 0x963c, 0x47c3, 0x9603, 0x4770, 0x95cb, 
+  0x471c, 0x9593, 0x46c9, 0x955b, 0x4675, 0x9524, 0x4621, 0x94ed, 
+  0x45cd, 0x94b6, 0x4578, 0x947f, 0x4524, 0x9448, 0x44cf, 0x9412, 
+  0x447a, 0x93dc, 0x4425, 0x93a7, 0x43d0, 0x9371, 0x437b, 0x933c, 
+  0x4325, 0x9307, 0x42d0, 0x92d3, 0x427a, 0x929e, 0x4224, 0x926a, 
+  0x41ce, 0x9236, 0x4177, 0x9203, 0x4121, 0x91d0, 0x40ca, 0x919d, 
+  0x4073, 0x916a, 0x401d, 0x9137, 0x3fc5, 0x9105, 0x3f6e, 0x90d3, 
+  0x3f17, 0x90a1, 0x3ebf, 0x9070, 0x3e68, 0x903f, 0x3e10, 0x900e, 
+  0x3db8, 0x8fdd, 0x3d60, 0x8fad, 0x3d07, 0x8f7d, 0x3caf, 0x8f4d, 
+  0x3c56, 0x8f1e, 0x3bfd, 0x8eee, 0x3ba5, 0x8ebf, 0x3b4c, 0x8e91, 
+  0x3af2, 0x8e62, 0x3a99, 0x8e34, 0x3a40, 0x8e06, 0x39e6, 0x8dd9, 
+  0x398c, 0x8dab, 0x3932, 0x8d7e, 0x38d8, 0x8d51, 0x387e, 0x8d25, 
+  0x3824, 0x8cf9, 0x37ca, 0x8ccd, 0x376f, 0x8ca1, 0x3714, 0x8c76, 
+  0x36ba, 0x8c4b, 0x365f, 0x8c20, 0x3604, 0x8bf5, 0x35a8, 0x8bcb, 
+  0x354d, 0x8ba1, 0x34f2, 0x8b77, 0x3496, 0x8b4e, 0x343a, 0x8b25, 
+  0x33de, 0x8afc, 0x3382, 0x8ad3, 0x3326, 0x8aab, 0x32ca, 0x8a83, 
+  0x326e, 0x8a5b, 0x3211, 0x8a34, 0x31b5, 0x8a0c, 0x3158, 0x89e5, 
+  0x30fb, 0x89bf, 0x309e, 0x8998, 0x3041, 0x8972, 0x2fe4, 0x894d, 
+  0x2f87, 0x8927, 0x2f29, 0x8902, 0x2ecc, 0x88dd, 0x2e6e, 0x88b9, 
+  0x2e11, 0x8894, 0x2db3, 0x8870, 0x2d55, 0x884c, 0x2cf7, 0x8829, 
+  0x2c98, 0x8806, 0x2c3a, 0x87e3, 0x2bdc, 0x87c0, 0x2b7d, 0x879e, 
+  0x2b1f, 0x877c, 0x2ac0, 0x875a, 0x2a61, 0x8739, 0x2a02, 0x8718, 
+  0x29a3, 0x86f7, 0x2944, 0x86d6, 0x28e5, 0x86b6, 0x2886, 0x8696, 
+  0x2826, 0x8676, 0x27c7, 0x8657, 0x2767, 0x8638, 0x2707, 0x8619, 
+  0x26a8, 0x85fb, 0x2648, 0x85dc, 0x25e8, 0x85be, 0x2588, 0x85a1, 
+  0x2528, 0x8583, 0x24c7, 0x8566, 0x2467, 0x854a, 0x2407, 0x852d, 
+  0x23a6, 0x8511, 0x2345, 0x84f5, 0x22e5, 0x84da, 0x2284, 0x84be, 
+  0x2223, 0x84a3, 0x21c2, 0x8489, 0x2161, 0x846e, 0x2100, 0x8454, 
+  0x209f, 0x843b, 0x203e, 0x8421, 0x1fdc, 0x8408, 0x1f7b, 0x83ef, 
+  0x1f19, 0x83d7, 0x1eb8, 0x83be, 0x1e56, 0x83a6, 0x1df5, 0x838f, 
+  0x1d93, 0x8377, 0x1d31, 0x8360, 0x1ccf, 0x8349, 0x1c6d, 0x8333, 
+  0x1c0b, 0x831d, 0x1ba9, 0x8307, 0x1b47, 0x82f1, 0x1ae4, 0x82dc, 
+  0x1a82, 0x82c7, 0x1a20, 0x82b2, 0x19bd, 0x829e, 0x195b, 0x828a, 
+  0x18f8, 0x8276, 0x1896, 0x8263, 0x1833, 0x8250, 0x17d0, 0x823d, 
+  0x176d, 0x822a, 0x170a, 0x8218, 0x16a8, 0x8206, 0x1645, 0x81f4, 
+  0x15e2, 0x81e3, 0x157f, 0x81d2, 0x151b, 0x81c1, 0x14b8, 0x81b1, 
+  0x1455, 0x81a1, 0x13f2, 0x8191, 0x138e, 0x8181, 0x132b, 0x8172, 
+  0x12c8, 0x8163, 0x1264, 0x8155, 0x1201, 0x8146, 0x119d, 0x8138, 
+  0x1139, 0x812b, 0x10d6, 0x811d, 0x1072, 0x8110, 0x100e, 0x8103, 
+  0xfab, 0x80f7, 0xf47, 0x80eb, 0xee3, 0x80df, 0xe7f, 0x80d3, 
+  0xe1b, 0x80c8, 0xdb7, 0x80bd, 0xd53, 0x80b3, 0xcef, 0x80a8, 
+  0xc8b, 0x809e, 0xc27, 0x8095, 0xbc3, 0x808b, 0xb5f, 0x8082, 
+  0xafb, 0x8079, 0xa97, 0x8071, 0xa33, 0x8069, 0x9ce, 0x8061, 
+  0x96a, 0x8059, 0x906, 0x8052, 0x8a2, 0x804b, 0x83d, 0x8044, 
+  0x7d9, 0x803e, 0x775, 0x8038, 0x710, 0x8032, 0x6ac, 0x802d, 
+  0x647, 0x8028, 0x5e3, 0x8023, 0x57f, 0x801f, 0x51a, 0x801b, 
+  0x4b6, 0x8017, 0x451, 0x8013, 0x3ed, 0x8010, 0x388, 0x800d, 
+  0x324, 0x800a, 0x2bf, 0x8008, 0x25b, 0x8006, 0x1f6, 0x8004, 
+  0x192, 0x8003, 0x12d, 0x8002, 0xc9, 0x8001, 0x64, 0x8001, 
+}; 
+ 
+static const q15_t WeightsQ15_2048[4096] = { 
+  0x7fff, 0x0, 0x7fff, 0xffe7, 0x7fff, 0xffce, 0x7fff, 0xffb5, 
+  0x7fff, 0xff9c, 0x7fff, 0xff83, 0x7fff, 0xff6a, 0x7fff, 0xff51, 
+  0x7fff, 0xff37, 0x7fff, 0xff1e, 0x7fff, 0xff05, 0x7ffe, 0xfeec, 
+  0x7ffe, 0xfed3, 0x7ffe, 0xfeba, 0x7ffe, 0xfea1, 0x7ffd, 0xfe88, 
+  0x7ffd, 0xfe6e, 0x7ffd, 0xfe55, 0x7ffc, 0xfe3c, 0x7ffc, 0xfe23, 
+  0x7ffc, 0xfe0a, 0x7ffb, 0xfdf1, 0x7ffb, 0xfdd8, 0x7ffa, 0xfdbe, 
+  0x7ffa, 0xfda5, 0x7ff9, 0xfd8c, 0x7ff9, 0xfd73, 0x7ff8, 0xfd5a, 
+  0x7ff8, 0xfd41, 0x7ff7, 0xfd28, 0x7ff7, 0xfd0f, 0x7ff6, 0xfcf5, 
+  0x7ff6, 0xfcdc, 0x7ff5, 0xfcc3, 0x7ff4, 0xfcaa, 0x7ff4, 0xfc91, 
+  0x7ff3, 0xfc78, 0x7ff2, 0xfc5f, 0x7ff2, 0xfc46, 0x7ff1, 0xfc2c, 
+  0x7ff0, 0xfc13, 0x7fef, 0xfbfa, 0x7fee, 0xfbe1, 0x7fee, 0xfbc8, 
+  0x7fed, 0xfbaf, 0x7fec, 0xfb96, 0x7feb, 0xfb7d, 0x7fea, 0xfb64, 
+  0x7fe9, 0xfb4a, 0x7fe8, 0xfb31, 0x7fe7, 0xfb18, 0x7fe6, 0xfaff, 
+  0x7fe5, 0xfae6, 0x7fe4, 0xfacd, 0x7fe3, 0xfab4, 0x7fe2, 0xfa9b, 
+  0x7fe1, 0xfa81, 0x7fe0, 0xfa68, 0x7fdf, 0xfa4f, 0x7fde, 0xfa36, 
+  0x7fdd, 0xfa1d, 0x7fdc, 0xfa04, 0x7fda, 0xf9eb, 0x7fd9, 0xf9d2, 
+  0x7fd8, 0xf9b9, 0x7fd7, 0xf9a0, 0x7fd6, 0xf986, 0x7fd4, 0xf96d, 
+  0x7fd3, 0xf954, 0x7fd2, 0xf93b, 0x7fd0, 0xf922, 0x7fcf, 0xf909, 
+  0x7fce, 0xf8f0, 0x7fcc, 0xf8d7, 0x7fcb, 0xf8be, 0x7fc9, 0xf8a5, 
+  0x7fc8, 0xf88b, 0x7fc6, 0xf872, 0x7fc5, 0xf859, 0x7fc3, 0xf840, 
+  0x7fc2, 0xf827, 0x7fc0, 0xf80e, 0x7fbf, 0xf7f5, 0x7fbd, 0xf7dc, 
+  0x7fbc, 0xf7c3, 0x7fba, 0xf7aa, 0x7fb8, 0xf791, 0x7fb7, 0xf778, 
+  0x7fb5, 0xf75e, 0x7fb3, 0xf745, 0x7fb1, 0xf72c, 0x7fb0, 0xf713, 
+  0x7fae, 0xf6fa, 0x7fac, 0xf6e1, 0x7faa, 0xf6c8, 0x7fa9, 0xf6af, 
+  0x7fa7, 0xf696, 0x7fa5, 0xf67d, 0x7fa3, 0xf664, 0x7fa1, 0xf64b, 
+  0x7f9f, 0xf632, 0x7f9d, 0xf619, 0x7f9b, 0xf600, 0x7f99, 0xf5e7, 
+  0x7f97, 0xf5cd, 0x7f95, 0xf5b4, 0x7f93, 0xf59b, 0x7f91, 0xf582, 
+  0x7f8f, 0xf569, 0x7f8d, 0xf550, 0x7f8b, 0xf537, 0x7f89, 0xf51e, 
+  0x7f87, 0xf505, 0x7f85, 0xf4ec, 0x7f82, 0xf4d3, 0x7f80, 0xf4ba, 
+  0x7f7e, 0xf4a1, 0x7f7c, 0xf488, 0x7f79, 0xf46f, 0x7f77, 0xf456, 
+  0x7f75, 0xf43d, 0x7f72, 0xf424, 0x7f70, 0xf40b, 0x7f6e, 0xf3f2, 
+  0x7f6b, 0xf3d9, 0x7f69, 0xf3c0, 0x7f67, 0xf3a7, 0x7f64, 0xf38e, 
+  0x7f62, 0xf375, 0x7f5f, 0xf35c, 0x7f5d, 0xf343, 0x7f5a, 0xf32a, 
+  0x7f58, 0xf311, 0x7f55, 0xf2f8, 0x7f53, 0xf2df, 0x7f50, 0xf2c6, 
+  0x7f4d, 0xf2ad, 0x7f4b, 0xf294, 0x7f48, 0xf27b, 0x7f45, 0xf262, 
+  0x7f43, 0xf249, 0x7f40, 0xf230, 0x7f3d, 0xf217, 0x7f3b, 0xf1fe, 
+  0x7f38, 0xf1e5, 0x7f35, 0xf1cc, 0x7f32, 0xf1b3, 0x7f2f, 0xf19a, 
+  0x7f2d, 0xf181, 0x7f2a, 0xf168, 0x7f27, 0xf14f, 0x7f24, 0xf136, 
+  0x7f21, 0xf11d, 0x7f1e, 0xf104, 0x7f1b, 0xf0eb, 0x7f18, 0xf0d2, 
+  0x7f15, 0xf0b9, 0x7f12, 0xf0a0, 0x7f0f, 0xf087, 0x7f0c, 0xf06e, 
+  0x7f09, 0xf055, 0x7f06, 0xf03c, 0x7f03, 0xf023, 0x7f00, 0xf00b, 
+  0x7efd, 0xeff2, 0x7ef9, 0xefd9, 0x7ef6, 0xefc0, 0x7ef3, 0xefa7, 
+  0x7ef0, 0xef8e, 0x7eed, 0xef75, 0x7ee9, 0xef5c, 0x7ee6, 0xef43, 
+  0x7ee3, 0xef2a, 0x7edf, 0xef11, 0x7edc, 0xeef8, 0x7ed9, 0xeedf, 
+  0x7ed5, 0xeec7, 0x7ed2, 0xeeae, 0x7ecf, 0xee95, 0x7ecb, 0xee7c, 
+  0x7ec8, 0xee63, 0x7ec4, 0xee4a, 0x7ec1, 0xee31, 0x7ebd, 0xee18, 
+  0x7eba, 0xedff, 0x7eb6, 0xede7, 0x7eb3, 0xedce, 0x7eaf, 0xedb5, 
+  0x7eab, 0xed9c, 0x7ea8, 0xed83, 0x7ea4, 0xed6a, 0x7ea1, 0xed51, 
+  0x7e9d, 0xed38, 0x7e99, 0xed20, 0x7e95, 0xed07, 0x7e92, 0xecee, 
+  0x7e8e, 0xecd5, 0x7e8a, 0xecbc, 0x7e86, 0xeca3, 0x7e83, 0xec8a, 
+  0x7e7f, 0xec72, 0x7e7b, 0xec59, 0x7e77, 0xec40, 0x7e73, 0xec27, 
+  0x7e6f, 0xec0e, 0x7e6b, 0xebf5, 0x7e67, 0xebdd, 0x7e63, 0xebc4, 
+  0x7e5f, 0xebab, 0x7e5b, 0xeb92, 0x7e57, 0xeb79, 0x7e53, 0xeb61, 
+  0x7e4f, 0xeb48, 0x7e4b, 0xeb2f, 0x7e47, 0xeb16, 0x7e43, 0xeafd, 
+  0x7e3f, 0xeae5, 0x7e3b, 0xeacc, 0x7e37, 0xeab3, 0x7e32, 0xea9a, 
+  0x7e2e, 0xea81, 0x7e2a, 0xea69, 0x7e26, 0xea50, 0x7e21, 0xea37, 
+  0x7e1d, 0xea1e, 0x7e19, 0xea06, 0x7e14, 0xe9ed, 0x7e10, 0xe9d4, 
+  0x7e0c, 0xe9bb, 0x7e07, 0xe9a3, 0x7e03, 0xe98a, 0x7dff, 0xe971, 
+  0x7dfa, 0xe958, 0x7df6, 0xe940, 0x7df1, 0xe927, 0x7ded, 0xe90e, 
+  0x7de8, 0xe8f6, 0x7de4, 0xe8dd, 0x7ddf, 0xe8c4, 0x7dda, 0xe8ab, 
+  0x7dd6, 0xe893, 0x7dd1, 0xe87a, 0x7dcd, 0xe861, 0x7dc8, 0xe849, 
+  0x7dc3, 0xe830, 0x7dbf, 0xe817, 0x7dba, 0xe7fe, 0x7db5, 0xe7e6, 
+  0x7db0, 0xe7cd, 0x7dac, 0xe7b4, 0x7da7, 0xe79c, 0x7da2, 0xe783, 
+  0x7d9d, 0xe76a, 0x7d98, 0xe752, 0x7d94, 0xe739, 0x7d8f, 0xe720, 
+  0x7d8a, 0xe708, 0x7d85, 0xe6ef, 0x7d80, 0xe6d6, 0x7d7b, 0xe6be, 
+  0x7d76, 0xe6a5, 0x7d71, 0xe68d, 0x7d6c, 0xe674, 0x7d67, 0xe65b, 
+  0x7d62, 0xe643, 0x7d5d, 0xe62a, 0x7d58, 0xe611, 0x7d53, 0xe5f9, 
+  0x7d4e, 0xe5e0, 0x7d49, 0xe5c8, 0x7d43, 0xe5af, 0x7d3e, 0xe596, 
+  0x7d39, 0xe57e, 0x7d34, 0xe565, 0x7d2f, 0xe54d, 0x7d29, 0xe534, 
+  0x7d24, 0xe51c, 0x7d1f, 0xe503, 0x7d19, 0xe4ea, 0x7d14, 0xe4d2, 
+  0x7d0f, 0xe4b9, 0x7d09, 0xe4a1, 0x7d04, 0xe488, 0x7cff, 0xe470, 
+  0x7cf9, 0xe457, 0x7cf4, 0xe43f, 0x7cee, 0xe426, 0x7ce9, 0xe40e, 
+  0x7ce3, 0xe3f5, 0x7cde, 0xe3dc, 0x7cd8, 0xe3c4, 0x7cd3, 0xe3ab, 
+  0x7ccd, 0xe393, 0x7cc8, 0xe37a, 0x7cc2, 0xe362, 0x7cbc, 0xe349, 
+  0x7cb7, 0xe331, 0x7cb1, 0xe318, 0x7cab, 0xe300, 0x7ca6, 0xe2e8, 
+  0x7ca0, 0xe2cf, 0x7c9a, 0xe2b7, 0x7c94, 0xe29e, 0x7c8f, 0xe286, 
+  0x7c89, 0xe26d, 0x7c83, 0xe255, 0x7c7d, 0xe23c, 0x7c77, 0xe224, 
+  0x7c71, 0xe20b, 0x7c6c, 0xe1f3, 0x7c66, 0xe1db, 0x7c60, 0xe1c2, 
+  0x7c5a, 0xe1aa, 0x7c54, 0xe191, 0x7c4e, 0xe179, 0x7c48, 0xe160, 
+  0x7c42, 0xe148, 0x7c3c, 0xe130, 0x7c36, 0xe117, 0x7c30, 0xe0ff, 
+  0x7c29, 0xe0e7, 0x7c23, 0xe0ce, 0x7c1d, 0xe0b6, 0x7c17, 0xe09d, 
+  0x7c11, 0xe085, 0x7c0b, 0xe06d, 0x7c05, 0xe054, 0x7bfe, 0xe03c, 
+  0x7bf8, 0xe024, 0x7bf2, 0xe00b, 0x7beb, 0xdff3, 0x7be5, 0xdfdb, 
+  0x7bdf, 0xdfc2, 0x7bd9, 0xdfaa, 0x7bd2, 0xdf92, 0x7bcc, 0xdf79, 
+  0x7bc5, 0xdf61, 0x7bbf, 0xdf49, 0x7bb9, 0xdf30, 0x7bb2, 0xdf18, 
+  0x7bac, 0xdf00, 0x7ba5, 0xdee8, 0x7b9f, 0xdecf, 0x7b98, 0xdeb7, 
+  0x7b92, 0xde9f, 0x7b8b, 0xde87, 0x7b84, 0xde6e, 0x7b7e, 0xde56, 
+  0x7b77, 0xde3e, 0x7b71, 0xde26, 0x7b6a, 0xde0d, 0x7b63, 0xddf5, 
+  0x7b5d, 0xdddd, 0x7b56, 0xddc5, 0x7b4f, 0xddac, 0x7b48, 0xdd94, 
+  0x7b42, 0xdd7c, 0x7b3b, 0xdd64, 0x7b34, 0xdd4c, 0x7b2d, 0xdd33, 
+  0x7b26, 0xdd1b, 0x7b1f, 0xdd03, 0x7b19, 0xdceb, 0x7b12, 0xdcd3, 
+  0x7b0b, 0xdcbb, 0x7b04, 0xdca2, 0x7afd, 0xdc8a, 0x7af6, 0xdc72, 
+  0x7aef, 0xdc5a, 0x7ae8, 0xdc42, 0x7ae1, 0xdc2a, 0x7ada, 0xdc12, 
+  0x7ad3, 0xdbf9, 0x7acc, 0xdbe1, 0x7ac5, 0xdbc9, 0x7abd, 0xdbb1, 
+  0x7ab6, 0xdb99, 0x7aaf, 0xdb81, 0x7aa8, 0xdb69, 0x7aa1, 0xdb51, 
+  0x7a9a, 0xdb39, 0x7a92, 0xdb21, 0x7a8b, 0xdb09, 0x7a84, 0xdaf1, 
+  0x7a7d, 0xdad8, 0x7a75, 0xdac0, 0x7a6e, 0xdaa8, 0x7a67, 0xda90, 
+  0x7a5f, 0xda78, 0x7a58, 0xda60, 0x7a50, 0xda48, 0x7a49, 0xda30, 
+  0x7a42, 0xda18, 0x7a3a, 0xda00, 0x7a33, 0xd9e8, 0x7a2b, 0xd9d0, 
+  0x7a24, 0xd9b8, 0x7a1c, 0xd9a0, 0x7a15, 0xd988, 0x7a0d, 0xd970, 
+  0x7a05, 0xd958, 0x79fe, 0xd940, 0x79f6, 0xd928, 0x79ef, 0xd911, 
+  0x79e7, 0xd8f9, 0x79df, 0xd8e1, 0x79d8, 0xd8c9, 0x79d0, 0xd8b1, 
+  0x79c8, 0xd899, 0x79c0, 0xd881, 0x79b9, 0xd869, 0x79b1, 0xd851, 
+  0x79a9, 0xd839, 0x79a1, 0xd821, 0x7999, 0xd80a, 0x7992, 0xd7f2, 
+  0x798a, 0xd7da, 0x7982, 0xd7c2, 0x797a, 0xd7aa, 0x7972, 0xd792, 
+  0x796a, 0xd77a, 0x7962, 0xd763, 0x795a, 0xd74b, 0x7952, 0xd733, 
+  0x794a, 0xd71b, 0x7942, 0xd703, 0x793a, 0xd6eb, 0x7932, 0xd6d4, 
+  0x792a, 0xd6bc, 0x7922, 0xd6a4, 0x7919, 0xd68c, 0x7911, 0xd675, 
+  0x7909, 0xd65d, 0x7901, 0xd645, 0x78f9, 0xd62d, 0x78f1, 0xd615, 
+  0x78e8, 0xd5fe, 0x78e0, 0xd5e6, 0x78d8, 0xd5ce, 0x78cf, 0xd5b7, 
+  0x78c7, 0xd59f, 0x78bf, 0xd587, 0x78b6, 0xd56f, 0x78ae, 0xd558, 
+  0x78a6, 0xd540, 0x789d, 0xd528, 0x7895, 0xd511, 0x788c, 0xd4f9, 
+  0x7884, 0xd4e1, 0x787c, 0xd4ca, 0x7873, 0xd4b2, 0x786b, 0xd49a, 
+  0x7862, 0xd483, 0x7859, 0xd46b, 0x7851, 0xd453, 0x7848, 0xd43c, 
+  0x7840, 0xd424, 0x7837, 0xd40d, 0x782e, 0xd3f5, 0x7826, 0xd3dd, 
+  0x781d, 0xd3c6, 0x7814, 0xd3ae, 0x780c, 0xd397, 0x7803, 0xd37f, 
+  0x77fa, 0xd368, 0x77f1, 0xd350, 0x77e9, 0xd338, 0x77e0, 0xd321, 
+  0x77d7, 0xd309, 0x77ce, 0xd2f2, 0x77c5, 0xd2da, 0x77bc, 0xd2c3, 
+  0x77b4, 0xd2ab, 0x77ab, 0xd294, 0x77a2, 0xd27c, 0x7799, 0xd265, 
+  0x7790, 0xd24d, 0x7787, 0xd236, 0x777e, 0xd21e, 0x7775, 0xd207, 
+  0x776c, 0xd1ef, 0x7763, 0xd1d8, 0x775a, 0xd1c1, 0x7751, 0xd1a9, 
+  0x7747, 0xd192, 0x773e, 0xd17a, 0x7735, 0xd163, 0x772c, 0xd14b, 
+  0x7723, 0xd134, 0x771a, 0xd11d, 0x7710, 0xd105, 0x7707, 0xd0ee, 
+  0x76fe, 0xd0d7, 0x76f5, 0xd0bf, 0x76eb, 0xd0a8, 0x76e2, 0xd091, 
+  0x76d9, 0xd079, 0x76cf, 0xd062, 0x76c6, 0xd04b, 0x76bd, 0xd033, 
+  0x76b3, 0xd01c, 0x76aa, 0xd005, 0x76a0, 0xcfed, 0x7697, 0xcfd6, 
+  0x768e, 0xcfbf, 0x7684, 0xcfa7, 0x767b, 0xcf90, 0x7671, 0xcf79, 
+  0x7668, 0xcf62, 0x765e, 0xcf4a, 0x7654, 0xcf33, 0x764b, 0xcf1c, 
+  0x7641, 0xcf05, 0x7638, 0xceee, 0x762e, 0xced6, 0x7624, 0xcebf, 
+  0x761b, 0xcea8, 0x7611, 0xce91, 0x7607, 0xce7a, 0x75fd, 0xce62, 
+  0x75f4, 0xce4b, 0x75ea, 0xce34, 0x75e0, 0xce1d, 0x75d6, 0xce06, 
+  0x75cc, 0xcdef, 0x75c3, 0xcdd8, 0x75b9, 0xcdc0, 0x75af, 0xcda9, 
+  0x75a5, 0xcd92, 0x759b, 0xcd7b, 0x7591, 0xcd64, 0x7587, 0xcd4d, 
+  0x757d, 0xcd36, 0x7573, 0xcd1f, 0x7569, 0xcd08, 0x755f, 0xccf1, 
+  0x7555, 0xccda, 0x754b, 0xccc3, 0x7541, 0xccac, 0x7537, 0xcc95, 
+  0x752d, 0xcc7e, 0x7523, 0xcc67, 0x7519, 0xcc50, 0x750f, 0xcc39, 
+  0x7504, 0xcc22, 0x74fa, 0xcc0b, 0x74f0, 0xcbf4, 0x74e6, 0xcbdd, 
+  0x74db, 0xcbc6, 0x74d1, 0xcbaf, 0x74c7, 0xcb98, 0x74bd, 0xcb81, 
+  0x74b2, 0xcb6a, 0x74a8, 0xcb53, 0x749e, 0xcb3c, 0x7493, 0xcb25, 
+  0x7489, 0xcb0e, 0x747e, 0xcaf8, 0x7474, 0xcae1, 0x746a, 0xcaca, 
+  0x745f, 0xcab3, 0x7455, 0xca9c, 0x744a, 0xca85, 0x7440, 0xca6e, 
+  0x7435, 0xca58, 0x742b, 0xca41, 0x7420, 0xca2a, 0x7415, 0xca13, 
+  0x740b, 0xc9fc, 0x7400, 0xc9e6, 0x73f6, 0xc9cf, 0x73eb, 0xc9b8, 
+  0x73e0, 0xc9a1, 0x73d6, 0xc98b, 0x73cb, 0xc974, 0x73c0, 0xc95d, 
+  0x73b5, 0xc946, 0x73ab, 0xc930, 0x73a0, 0xc919, 0x7395, 0xc902, 
+  0x738a, 0xc8ec, 0x737f, 0xc8d5, 0x7375, 0xc8be, 0x736a, 0xc8a8, 
+  0x735f, 0xc891, 0x7354, 0xc87a, 0x7349, 0xc864, 0x733e, 0xc84d, 
+  0x7333, 0xc836, 0x7328, 0xc820, 0x731d, 0xc809, 0x7312, 0xc7f3, 
+  0x7307, 0xc7dc, 0x72fc, 0xc7c5, 0x72f1, 0xc7af, 0x72e6, 0xc798, 
+  0x72db, 0xc782, 0x72d0, 0xc76b, 0x72c5, 0xc755, 0x72ba, 0xc73e, 
+  0x72af, 0xc728, 0x72a3, 0xc711, 0x7298, 0xc6fa, 0x728d, 0xc6e4, 
+  0x7282, 0xc6ce, 0x7276, 0xc6b7, 0x726b, 0xc6a1, 0x7260, 0xc68a, 
+  0x7255, 0xc674, 0x7249, 0xc65d, 0x723e, 0xc647, 0x7233, 0xc630, 
+  0x7227, 0xc61a, 0x721c, 0xc603, 0x7211, 0xc5ed, 0x7205, 0xc5d7, 
+  0x71fa, 0xc5c0, 0x71ee, 0xc5aa, 0x71e3, 0xc594, 0x71d7, 0xc57d, 
+  0x71cc, 0xc567, 0x71c0, 0xc551, 0x71b5, 0xc53a, 0x71a9, 0xc524, 
+  0x719e, 0xc50e, 0x7192, 0xc4f7, 0x7186, 0xc4e1, 0x717b, 0xc4cb, 
+  0x716f, 0xc4b4, 0x7164, 0xc49e, 0x7158, 0xc488, 0x714c, 0xc472, 
+  0x7141, 0xc45b, 0x7135, 0xc445, 0x7129, 0xc42f, 0x711d, 0xc419, 
+  0x7112, 0xc403, 0x7106, 0xc3ec, 0x70fa, 0xc3d6, 0x70ee, 0xc3c0, 
+  0x70e2, 0xc3aa, 0x70d6, 0xc394, 0x70cb, 0xc37d, 0x70bf, 0xc367, 
+  0x70b3, 0xc351, 0x70a7, 0xc33b, 0x709b, 0xc325, 0x708f, 0xc30f, 
+  0x7083, 0xc2f9, 0x7077, 0xc2e3, 0x706b, 0xc2cd, 0x705f, 0xc2b7, 
+  0x7053, 0xc2a0, 0x7047, 0xc28a, 0x703b, 0xc274, 0x702f, 0xc25e, 
+  0x7023, 0xc248, 0x7016, 0xc232, 0x700a, 0xc21c, 0x6ffe, 0xc206, 
+  0x6ff2, 0xc1f0, 0x6fe6, 0xc1da, 0x6fda, 0xc1c4, 0x6fcd, 0xc1ae, 
+  0x6fc1, 0xc198, 0x6fb5, 0xc183, 0x6fa9, 0xc16d, 0x6f9c, 0xc157, 
+  0x6f90, 0xc141, 0x6f84, 0xc12b, 0x6f77, 0xc115, 0x6f6b, 0xc0ff, 
+  0x6f5f, 0xc0e9, 0x6f52, 0xc0d3, 0x6f46, 0xc0bd, 0x6f39, 0xc0a8, 
+  0x6f2d, 0xc092, 0x6f20, 0xc07c, 0x6f14, 0xc066, 0x6f07, 0xc050, 
+  0x6efb, 0xc03b, 0x6eee, 0xc025, 0x6ee2, 0xc00f, 0x6ed5, 0xbff9, 
+  0x6ec9, 0xbfe3, 0x6ebc, 0xbfce, 0x6eaf, 0xbfb8, 0x6ea3, 0xbfa2, 
+  0x6e96, 0xbf8d, 0x6e89, 0xbf77, 0x6e7d, 0xbf61, 0x6e70, 0xbf4b, 
+  0x6e63, 0xbf36, 0x6e57, 0xbf20, 0x6e4a, 0xbf0a, 0x6e3d, 0xbef5, 
+  0x6e30, 0xbedf, 0x6e24, 0xbeca, 0x6e17, 0xbeb4, 0x6e0a, 0xbe9e, 
+  0x6dfd, 0xbe89, 0x6df0, 0xbe73, 0x6de3, 0xbe5e, 0x6dd6, 0xbe48, 
+  0x6dca, 0xbe32, 0x6dbd, 0xbe1d, 0x6db0, 0xbe07, 0x6da3, 0xbdf2, 
+  0x6d96, 0xbddc, 0x6d89, 0xbdc7, 0x6d7c, 0xbdb1, 0x6d6f, 0xbd9c, 
+  0x6d62, 0xbd86, 0x6d55, 0xbd71, 0x6d48, 0xbd5b, 0x6d3a, 0xbd46, 
+  0x6d2d, 0xbd30, 0x6d20, 0xbd1b, 0x6d13, 0xbd06, 0x6d06, 0xbcf0, 
+  0x6cf9, 0xbcdb, 0x6cec, 0xbcc5, 0x6cde, 0xbcb0, 0x6cd1, 0xbc9b, 
+  0x6cc4, 0xbc85, 0x6cb7, 0xbc70, 0x6ca9, 0xbc5b, 0x6c9c, 0xbc45, 
+  0x6c8f, 0xbc30, 0x6c81, 0xbc1b, 0x6c74, 0xbc05, 0x6c67, 0xbbf0, 
+  0x6c59, 0xbbdb, 0x6c4c, 0xbbc5, 0x6c3f, 0xbbb0, 0x6c31, 0xbb9b, 
+  0x6c24, 0xbb86, 0x6c16, 0xbb70, 0x6c09, 0xbb5b, 0x6bfb, 0xbb46, 
+  0x6bee, 0xbb31, 0x6be0, 0xbb1c, 0x6bd3, 0xbb06, 0x6bc5, 0xbaf1, 
+  0x6bb8, 0xbadc, 0x6baa, 0xbac7, 0x6b9c, 0xbab2, 0x6b8f, 0xba9d, 
+  0x6b81, 0xba88, 0x6b73, 0xba73, 0x6b66, 0xba5d, 0x6b58, 0xba48, 
+  0x6b4a, 0xba33, 0x6b3d, 0xba1e, 0x6b2f, 0xba09, 0x6b21, 0xb9f4, 
+  0x6b13, 0xb9df, 0x6b06, 0xb9ca, 0x6af8, 0xb9b5, 0x6aea, 0xb9a0, 
+  0x6adc, 0xb98b, 0x6ace, 0xb976, 0x6ac1, 0xb961, 0x6ab3, 0xb94c, 
+  0x6aa5, 0xb937, 0x6a97, 0xb922, 0x6a89, 0xb90d, 0x6a7b, 0xb8f8, 
+  0x6a6d, 0xb8e4, 0x6a5f, 0xb8cf, 0x6a51, 0xb8ba, 0x6a43, 0xb8a5, 
+  0x6a35, 0xb890, 0x6a27, 0xb87b, 0x6a19, 0xb866, 0x6a0b, 0xb852, 
+  0x69fd, 0xb83d, 0x69ef, 0xb828, 0x69e1, 0xb813, 0x69d3, 0xb7fe, 
+  0x69c4, 0xb7ea, 0x69b6, 0xb7d5, 0x69a8, 0xb7c0, 0x699a, 0xb7ab, 
+  0x698c, 0xb797, 0x697d, 0xb782, 0x696f, 0xb76d, 0x6961, 0xb758, 
+  0x6953, 0xb744, 0x6944, 0xb72f, 0x6936, 0xb71a, 0x6928, 0xb706, 
+  0x6919, 0xb6f1, 0x690b, 0xb6dd, 0x68fd, 0xb6c8, 0x68ee, 0xb6b3, 
+  0x68e0, 0xb69f, 0x68d1, 0xb68a, 0x68c3, 0xb676, 0x68b5, 0xb661, 
+  0x68a6, 0xb64c, 0x6898, 0xb638, 0x6889, 0xb623, 0x687b, 0xb60f, 
+  0x686c, 0xb5fa, 0x685e, 0xb5e6, 0x684f, 0xb5d1, 0x6840, 0xb5bd, 
+  0x6832, 0xb5a8, 0x6823, 0xb594, 0x6815, 0xb57f, 0x6806, 0xb56b, 
+  0x67f7, 0xb557, 0x67e9, 0xb542, 0x67da, 0xb52e, 0x67cb, 0xb519, 
+  0x67bd, 0xb505, 0x67ae, 0xb4f1, 0x679f, 0xb4dc, 0x6790, 0xb4c8, 
+  0x6782, 0xb4b4, 0x6773, 0xb49f, 0x6764, 0xb48b, 0x6755, 0xb477, 
+  0x6746, 0xb462, 0x6737, 0xb44e, 0x6729, 0xb43a, 0x671a, 0xb426, 
+  0x670b, 0xb411, 0x66fc, 0xb3fd, 0x66ed, 0xb3e9, 0x66de, 0xb3d5, 
+  0x66cf, 0xb3c1, 0x66c0, 0xb3ac, 0x66b1, 0xb398, 0x66a2, 0xb384, 
+  0x6693, 0xb370, 0x6684, 0xb35c, 0x6675, 0xb348, 0x6666, 0xb334, 
+  0x6657, 0xb31f, 0x6648, 0xb30b, 0x6639, 0xb2f7, 0x6629, 0xb2e3, 
+  0x661a, 0xb2cf, 0x660b, 0xb2bb, 0x65fc, 0xb2a7, 0x65ed, 0xb293, 
+  0x65dd, 0xb27f, 0x65ce, 0xb26b, 0x65bf, 0xb257, 0x65b0, 0xb243, 
+  0x65a0, 0xb22f, 0x6591, 0xb21b, 0x6582, 0xb207, 0x6573, 0xb1f3, 
+  0x6563, 0xb1df, 0x6554, 0xb1cc, 0x6545, 0xb1b8, 0x6535, 0xb1a4, 
+  0x6526, 0xb190, 0x6516, 0xb17c, 0x6507, 0xb168, 0x64f7, 0xb154, 
+  0x64e8, 0xb141, 0x64d9, 0xb12d, 0x64c9, 0xb119, 0x64ba, 0xb105, 
+  0x64aa, 0xb0f1, 0x649b, 0xb0de, 0x648b, 0xb0ca, 0x647b, 0xb0b6, 
+  0x646c, 0xb0a2, 0x645c, 0xb08f, 0x644d, 0xb07b, 0x643d, 0xb067, 
+  0x642d, 0xb054, 0x641e, 0xb040, 0x640e, 0xb02c, 0x63fe, 0xb019, 
+  0x63ef, 0xb005, 0x63df, 0xaff1, 0x63cf, 0xafde, 0x63c0, 0xafca, 
+  0x63b0, 0xafb7, 0x63a0, 0xafa3, 0x6390, 0xaf90, 0x6380, 0xaf7c, 
+  0x6371, 0xaf69, 0x6361, 0xaf55, 0x6351, 0xaf41, 0x6341, 0xaf2e, 
+  0x6331, 0xaf1b, 0x6321, 0xaf07, 0x6311, 0xaef4, 0x6301, 0xaee0, 
+  0x62f2, 0xaecd, 0x62e2, 0xaeb9, 0x62d2, 0xaea6, 0x62c2, 0xae92, 
+  0x62b2, 0xae7f, 0x62a2, 0xae6c, 0x6292, 0xae58, 0x6282, 0xae45, 
+  0x6271, 0xae32, 0x6261, 0xae1e, 0x6251, 0xae0b, 0x6241, 0xadf8, 
+  0x6231, 0xade4, 0x6221, 0xadd1, 0x6211, 0xadbe, 0x6201, 0xadab, 
+  0x61f1, 0xad97, 0x61e0, 0xad84, 0x61d0, 0xad71, 0x61c0, 0xad5e, 
+  0x61b0, 0xad4b, 0x619f, 0xad37, 0x618f, 0xad24, 0x617f, 0xad11, 
+  0x616f, 0xacfe, 0x615e, 0xaceb, 0x614e, 0xacd8, 0x613e, 0xacc5, 
+  0x612d, 0xacb2, 0x611d, 0xac9e, 0x610d, 0xac8b, 0x60fc, 0xac78, 
+  0x60ec, 0xac65, 0x60db, 0xac52, 0x60cb, 0xac3f, 0x60ba, 0xac2c, 
+  0x60aa, 0xac19, 0x6099, 0xac06, 0x6089, 0xabf3, 0x6078, 0xabe0, 
+  0x6068, 0xabcd, 0x6057, 0xabbb, 0x6047, 0xaba8, 0x6036, 0xab95, 
+  0x6026, 0xab82, 0x6015, 0xab6f, 0x6004, 0xab5c, 0x5ff4, 0xab49, 
+  0x5fe3, 0xab36, 0x5fd3, 0xab24, 0x5fc2, 0xab11, 0x5fb1, 0xaafe, 
+  0x5fa0, 0xaaeb, 0x5f90, 0xaad8, 0x5f7f, 0xaac6, 0x5f6e, 0xaab3, 
+  0x5f5e, 0xaaa0, 0x5f4d, 0xaa8e, 0x5f3c, 0xaa7b, 0x5f2b, 0xaa68, 
+  0x5f1a, 0xaa55, 0x5f0a, 0xaa43, 0x5ef9, 0xaa30, 0x5ee8, 0xaa1d, 
+  0x5ed7, 0xaa0b, 0x5ec6, 0xa9f8, 0x5eb5, 0xa9e6, 0x5ea4, 0xa9d3, 
+  0x5e93, 0xa9c0, 0x5e82, 0xa9ae, 0x5e71, 0xa99b, 0x5e60, 0xa989, 
+  0x5e50, 0xa976, 0x5e3f, 0xa964, 0x5e2d, 0xa951, 0x5e1c, 0xa93f, 
+  0x5e0b, 0xa92c, 0x5dfa, 0xa91a, 0x5de9, 0xa907, 0x5dd8, 0xa8f5, 
+  0x5dc7, 0xa8e3, 0x5db6, 0xa8d0, 0x5da5, 0xa8be, 0x5d94, 0xa8ab, 
+  0x5d83, 0xa899, 0x5d71, 0xa887, 0x5d60, 0xa874, 0x5d4f, 0xa862, 
+  0x5d3e, 0xa850, 0x5d2d, 0xa83d, 0x5d1b, 0xa82b, 0x5d0a, 0xa819, 
+  0x5cf9, 0xa807, 0x5ce8, 0xa7f4, 0x5cd6, 0xa7e2, 0x5cc5, 0xa7d0, 
+  0x5cb4, 0xa7be, 0x5ca2, 0xa7ab, 0x5c91, 0xa799, 0x5c80, 0xa787, 
+  0x5c6e, 0xa775, 0x5c5d, 0xa763, 0x5c4b, 0xa751, 0x5c3a, 0xa73f, 
+  0x5c29, 0xa72c, 0x5c17, 0xa71a, 0x5c06, 0xa708, 0x5bf4, 0xa6f6, 
+  0x5be3, 0xa6e4, 0x5bd1, 0xa6d2, 0x5bc0, 0xa6c0, 0x5bae, 0xa6ae, 
+  0x5b9d, 0xa69c, 0x5b8b, 0xa68a, 0x5b79, 0xa678, 0x5b68, 0xa666, 
+  0x5b56, 0xa654, 0x5b45, 0xa642, 0x5b33, 0xa630, 0x5b21, 0xa61f, 
+  0x5b10, 0xa60d, 0x5afe, 0xa5fb, 0x5aec, 0xa5e9, 0x5adb, 0xa5d7, 
+  0x5ac9, 0xa5c5, 0x5ab7, 0xa5b3, 0x5aa5, 0xa5a2, 0x5a94, 0xa590, 
+  0x5a82, 0xa57e, 0x5a70, 0xa56c, 0x5a5e, 0xa55b, 0x5a4d, 0xa549, 
+  0x5a3b, 0xa537, 0x5a29, 0xa525, 0x5a17, 0xa514, 0x5a05, 0xa502, 
+  0x59f3, 0xa4f0, 0x59e1, 0xa4df, 0x59d0, 0xa4cd, 0x59be, 0xa4bb, 
+  0x59ac, 0xa4aa, 0x599a, 0xa498, 0x5988, 0xa487, 0x5976, 0xa475, 
+  0x5964, 0xa463, 0x5952, 0xa452, 0x5940, 0xa440, 0x592e, 0xa42f, 
+  0x591c, 0xa41d, 0x590a, 0xa40c, 0x58f8, 0xa3fa, 0x58e6, 0xa3e9, 
+  0x58d4, 0xa3d7, 0x58c1, 0xa3c6, 0x58af, 0xa3b5, 0x589d, 0xa3a3, 
+  0x588b, 0xa392, 0x5879, 0xa380, 0x5867, 0xa36f, 0x5855, 0xa35e, 
+  0x5842, 0xa34c, 0x5830, 0xa33b, 0x581e, 0xa32a, 0x580c, 0xa318, 
+  0x57f9, 0xa307, 0x57e7, 0xa2f6, 0x57d5, 0xa2e5, 0x57c3, 0xa2d3, 
+  0x57b0, 0xa2c2, 0x579e, 0xa2b1, 0x578c, 0xa2a0, 0x5779, 0xa28f, 
+  0x5767, 0xa27d, 0x5755, 0xa26c, 0x5742, 0xa25b, 0x5730, 0xa24a, 
+  0x571d, 0xa239, 0x570b, 0xa228, 0x56f9, 0xa217, 0x56e6, 0xa206, 
+  0x56d4, 0xa1f5, 0x56c1, 0xa1e4, 0x56af, 0xa1d3, 0x569c, 0xa1c1, 
+  0x568a, 0xa1b0, 0x5677, 0xa1a0, 0x5665, 0xa18f, 0x5652, 0xa17e, 
+  0x5640, 0xa16d, 0x562d, 0xa15c, 0x561a, 0xa14b, 0x5608, 0xa13a, 
+  0x55f5, 0xa129, 0x55e3, 0xa118, 0x55d0, 0xa107, 0x55bd, 0xa0f6, 
+  0x55ab, 0xa0e6, 0x5598, 0xa0d5, 0x5585, 0xa0c4, 0x5572, 0xa0b3, 
+  0x5560, 0xa0a2, 0x554d, 0xa092, 0x553a, 0xa081, 0x5528, 0xa070, 
+  0x5515, 0xa060, 0x5502, 0xa04f, 0x54ef, 0xa03e, 0x54dc, 0xa02d, 
+  0x54ca, 0xa01d, 0x54b7, 0xa00c, 0x54a4, 0x9ffc, 0x5491, 0x9feb, 
+  0x547e, 0x9fda, 0x546b, 0x9fca, 0x5458, 0x9fb9, 0x5445, 0x9fa9, 
+  0x5433, 0x9f98, 0x5420, 0x9f88, 0x540d, 0x9f77, 0x53fa, 0x9f67, 
+  0x53e7, 0x9f56, 0x53d4, 0x9f46, 0x53c1, 0x9f35, 0x53ae, 0x9f25, 
+  0x539b, 0x9f14, 0x5388, 0x9f04, 0x5375, 0x9ef3, 0x5362, 0x9ee3, 
+  0x534e, 0x9ed3, 0x533b, 0x9ec2, 0x5328, 0x9eb2, 0x5315, 0x9ea2, 
+  0x5302, 0x9e91, 0x52ef, 0x9e81, 0x52dc, 0x9e71, 0x52c9, 0x9e61, 
+  0x52b5, 0x9e50, 0x52a2, 0x9e40, 0x528f, 0x9e30, 0x527c, 0x9e20, 
+  0x5269, 0x9e0f, 0x5255, 0x9dff, 0x5242, 0x9def, 0x522f, 0x9ddf, 
+  0x521c, 0x9dcf, 0x5208, 0x9dbf, 0x51f5, 0x9daf, 0x51e2, 0x9d9f, 
+  0x51ce, 0x9d8f, 0x51bb, 0x9d7e, 0x51a8, 0x9d6e, 0x5194, 0x9d5e, 
+  0x5181, 0x9d4e, 0x516e, 0x9d3e, 0x515a, 0x9d2e, 0x5147, 0x9d1e, 
+  0x5133, 0x9d0e, 0x5120, 0x9cff, 0x510c, 0x9cef, 0x50f9, 0x9cdf, 
+  0x50e5, 0x9ccf, 0x50d2, 0x9cbf, 0x50bf, 0x9caf, 0x50ab, 0x9c9f, 
+  0x5097, 0x9c8f, 0x5084, 0x9c80, 0x5070, 0x9c70, 0x505d, 0x9c60, 
+  0x5049, 0x9c50, 0x5036, 0x9c40, 0x5022, 0x9c31, 0x500f, 0x9c21, 
+  0x4ffb, 0x9c11, 0x4fe7, 0x9c02, 0x4fd4, 0x9bf2, 0x4fc0, 0x9be2, 
+  0x4fac, 0x9bd3, 0x4f99, 0x9bc3, 0x4f85, 0x9bb3, 0x4f71, 0x9ba4, 
+  0x4f5e, 0x9b94, 0x4f4a, 0x9b85, 0x4f36, 0x9b75, 0x4f22, 0x9b65, 
+  0x4f0f, 0x9b56, 0x4efb, 0x9b46, 0x4ee7, 0x9b37, 0x4ed3, 0x9b27, 
+  0x4ebf, 0x9b18, 0x4eac, 0x9b09, 0x4e98, 0x9af9, 0x4e84, 0x9aea, 
+  0x4e70, 0x9ada, 0x4e5c, 0x9acb, 0x4e48, 0x9abb, 0x4e34, 0x9aac, 
+  0x4e21, 0x9a9d, 0x4e0d, 0x9a8d, 0x4df9, 0x9a7e, 0x4de5, 0x9a6f, 
+  0x4dd1, 0x9a60, 0x4dbd, 0x9a50, 0x4da9, 0x9a41, 0x4d95, 0x9a32, 
+  0x4d81, 0x9a23, 0x4d6d, 0x9a13, 0x4d59, 0x9a04, 0x4d45, 0x99f5, 
+  0x4d31, 0x99e6, 0x4d1d, 0x99d7, 0x4d09, 0x99c7, 0x4cf5, 0x99b8, 
+  0x4ce1, 0x99a9, 0x4ccc, 0x999a, 0x4cb8, 0x998b, 0x4ca4, 0x997c, 
+  0x4c90, 0x996d, 0x4c7c, 0x995e, 0x4c68, 0x994f, 0x4c54, 0x9940, 
+  0x4c3f, 0x9931, 0x4c2b, 0x9922, 0x4c17, 0x9913, 0x4c03, 0x9904, 
+  0x4bef, 0x98f5, 0x4bda, 0x98e6, 0x4bc6, 0x98d7, 0x4bb2, 0x98c9, 
+  0x4b9e, 0x98ba, 0x4b89, 0x98ab, 0x4b75, 0x989c, 0x4b61, 0x988d, 
+  0x4b4c, 0x987e, 0x4b38, 0x9870, 0x4b24, 0x9861, 0x4b0f, 0x9852, 
+  0x4afb, 0x9843, 0x4ae7, 0x9835, 0x4ad2, 0x9826, 0x4abe, 0x9817, 
+  0x4aa9, 0x9809, 0x4a95, 0x97fa, 0x4a81, 0x97eb, 0x4a6c, 0x97dd, 
+  0x4a58, 0x97ce, 0x4a43, 0x97c0, 0x4a2f, 0x97b1, 0x4a1a, 0x97a2, 
+  0x4a06, 0x9794, 0x49f1, 0x9785, 0x49dd, 0x9777, 0x49c8, 0x9768, 
+  0x49b4, 0x975a, 0x499f, 0x974b, 0x498a, 0x973d, 0x4976, 0x972f, 
+  0x4961, 0x9720, 0x494d, 0x9712, 0x4938, 0x9703, 0x4923, 0x96f5, 
+  0x490f, 0x96e7, 0x48fa, 0x96d8, 0x48e6, 0x96ca, 0x48d1, 0x96bc, 
+  0x48bc, 0x96ad, 0x48a8, 0x969f, 0x4893, 0x9691, 0x487e, 0x9683, 
+  0x4869, 0x9674, 0x4855, 0x9666, 0x4840, 0x9658, 0x482b, 0x964a, 
+  0x4816, 0x963c, 0x4802, 0x962d, 0x47ed, 0x961f, 0x47d8, 0x9611, 
+  0x47c3, 0x9603, 0x47ae, 0x95f5, 0x479a, 0x95e7, 0x4785, 0x95d9, 
+  0x4770, 0x95cb, 0x475b, 0x95bd, 0x4746, 0x95af, 0x4731, 0x95a1, 
+  0x471c, 0x9593, 0x4708, 0x9585, 0x46f3, 0x9577, 0x46de, 0x9569, 
+  0x46c9, 0x955b, 0x46b4, 0x954d, 0x469f, 0x953f, 0x468a, 0x9532, 
+  0x4675, 0x9524, 0x4660, 0x9516, 0x464b, 0x9508, 0x4636, 0x94fa, 
+  0x4621, 0x94ed, 0x460c, 0x94df, 0x45f7, 0x94d1, 0x45e2, 0x94c3, 
+  0x45cd, 0x94b6, 0x45b8, 0x94a8, 0x45a3, 0x949a, 0x458d, 0x948d, 
+  0x4578, 0x947f, 0x4563, 0x9471, 0x454e, 0x9464, 0x4539, 0x9456, 
+  0x4524, 0x9448, 0x450f, 0x943b, 0x44fa, 0x942d, 0x44e4, 0x9420, 
+  0x44cf, 0x9412, 0x44ba, 0x9405, 0x44a5, 0x93f7, 0x4490, 0x93ea, 
+  0x447a, 0x93dc, 0x4465, 0x93cf, 0x4450, 0x93c1, 0x443b, 0x93b4, 
+  0x4425, 0x93a7, 0x4410, 0x9399, 0x43fb, 0x938c, 0x43e5, 0x937f, 
+  0x43d0, 0x9371, 0x43bb, 0x9364, 0x43a5, 0x9357, 0x4390, 0x9349, 
+  0x437b, 0x933c, 0x4365, 0x932f, 0x4350, 0x9322, 0x433b, 0x9314, 
+  0x4325, 0x9307, 0x4310, 0x92fa, 0x42fa, 0x92ed, 0x42e5, 0x92e0, 
+  0x42d0, 0x92d3, 0x42ba, 0x92c6, 0x42a5, 0x92b8, 0x428f, 0x92ab, 
+  0x427a, 0x929e, 0x4264, 0x9291, 0x424f, 0x9284, 0x4239, 0x9277, 
+  0x4224, 0x926a, 0x420e, 0x925d, 0x41f9, 0x9250, 0x41e3, 0x9243, 
+  0x41ce, 0x9236, 0x41b8, 0x922a, 0x41a2, 0x921d, 0x418d, 0x9210, 
+  0x4177, 0x9203, 0x4162, 0x91f6, 0x414c, 0x91e9, 0x4136, 0x91dc, 
+  0x4121, 0x91d0, 0x410b, 0x91c3, 0x40f6, 0x91b6, 0x40e0, 0x91a9, 
+  0x40ca, 0x919d, 0x40b5, 0x9190, 0x409f, 0x9183, 0x4089, 0x9177, 
+  0x4073, 0x916a, 0x405e, 0x915d, 0x4048, 0x9151, 0x4032, 0x9144, 
+  0x401d, 0x9137, 0x4007, 0x912b, 0x3ff1, 0x911e, 0x3fdb, 0x9112, 
+  0x3fc5, 0x9105, 0x3fb0, 0x90f9, 0x3f9a, 0x90ec, 0x3f84, 0x90e0, 
+  0x3f6e, 0x90d3, 0x3f58, 0x90c7, 0x3f43, 0x90ba, 0x3f2d, 0x90ae, 
+  0x3f17, 0x90a1, 0x3f01, 0x9095, 0x3eeb, 0x9089, 0x3ed5, 0x907c, 
+  0x3ebf, 0x9070, 0x3ea9, 0x9064, 0x3e93, 0x9057, 0x3e7d, 0x904b, 
+  0x3e68, 0x903f, 0x3e52, 0x9033, 0x3e3c, 0x9026, 0x3e26, 0x901a, 
+  0x3e10, 0x900e, 0x3dfa, 0x9002, 0x3de4, 0x8ff6, 0x3dce, 0x8fea, 
+  0x3db8, 0x8fdd, 0x3da2, 0x8fd1, 0x3d8c, 0x8fc5, 0x3d76, 0x8fb9, 
+  0x3d60, 0x8fad, 0x3d49, 0x8fa1, 0x3d33, 0x8f95, 0x3d1d, 0x8f89, 
+  0x3d07, 0x8f7d, 0x3cf1, 0x8f71, 0x3cdb, 0x8f65, 0x3cc5, 0x8f59, 
+  0x3caf, 0x8f4d, 0x3c99, 0x8f41, 0x3c83, 0x8f35, 0x3c6c, 0x8f2a, 
+  0x3c56, 0x8f1e, 0x3c40, 0x8f12, 0x3c2a, 0x8f06, 0x3c14, 0x8efa, 
+  0x3bfd, 0x8eee, 0x3be7, 0x8ee3, 0x3bd1, 0x8ed7, 0x3bbb, 0x8ecb, 
+  0x3ba5, 0x8ebf, 0x3b8e, 0x8eb4, 0x3b78, 0x8ea8, 0x3b62, 0x8e9c, 
+  0x3b4c, 0x8e91, 0x3b35, 0x8e85, 0x3b1f, 0x8e7a, 0x3b09, 0x8e6e, 
+  0x3af2, 0x8e62, 0x3adc, 0x8e57, 0x3ac6, 0x8e4b, 0x3aaf, 0x8e40, 
+  0x3a99, 0x8e34, 0x3a83, 0x8e29, 0x3a6c, 0x8e1d, 0x3a56, 0x8e12, 
+  0x3a40, 0x8e06, 0x3a29, 0x8dfb, 0x3a13, 0x8def, 0x39fd, 0x8de4, 
+  0x39e6, 0x8dd9, 0x39d0, 0x8dcd, 0x39b9, 0x8dc2, 0x39a3, 0x8db7, 
+  0x398c, 0x8dab, 0x3976, 0x8da0, 0x395f, 0x8d95, 0x3949, 0x8d8a, 
+  0x3932, 0x8d7e, 0x391c, 0x8d73, 0x3906, 0x8d68, 0x38ef, 0x8d5d, 
+  0x38d8, 0x8d51, 0x38c2, 0x8d46, 0x38ab, 0x8d3b, 0x3895, 0x8d30, 
+  0x387e, 0x8d25, 0x3868, 0x8d1a, 0x3851, 0x8d0f, 0x383b, 0x8d04, 
+  0x3824, 0x8cf9, 0x380d, 0x8cee, 0x37f7, 0x8ce3, 0x37e0, 0x8cd8, 
+  0x37ca, 0x8ccd, 0x37b3, 0x8cc2, 0x379c, 0x8cb7, 0x3786, 0x8cac, 
+  0x376f, 0x8ca1, 0x3758, 0x8c96, 0x3742, 0x8c8b, 0x372b, 0x8c81, 
+  0x3714, 0x8c76, 0x36fe, 0x8c6b, 0x36e7, 0x8c60, 0x36d0, 0x8c55, 
+  0x36ba, 0x8c4b, 0x36a3, 0x8c40, 0x368c, 0x8c35, 0x3675, 0x8c2a, 
+  0x365f, 0x8c20, 0x3648, 0x8c15, 0x3631, 0x8c0a, 0x361a, 0x8c00, 
+  0x3604, 0x8bf5, 0x35ed, 0x8beb, 0x35d6, 0x8be0, 0x35bf, 0x8bd5, 
+  0x35a8, 0x8bcb, 0x3592, 0x8bc0, 0x357b, 0x8bb6, 0x3564, 0x8bab, 
+  0x354d, 0x8ba1, 0x3536, 0x8b96, 0x351f, 0x8b8c, 0x3508, 0x8b82, 
+  0x34f2, 0x8b77, 0x34db, 0x8b6d, 0x34c4, 0x8b62, 0x34ad, 0x8b58, 
+  0x3496, 0x8b4e, 0x347f, 0x8b43, 0x3468, 0x8b39, 0x3451, 0x8b2f, 
+  0x343a, 0x8b25, 0x3423, 0x8b1a, 0x340c, 0x8b10, 0x33f5, 0x8b06, 
+  0x33de, 0x8afc, 0x33c7, 0x8af1, 0x33b0, 0x8ae7, 0x3399, 0x8add, 
+  0x3382, 0x8ad3, 0x336b, 0x8ac9, 0x3354, 0x8abf, 0x333d, 0x8ab5, 
+  0x3326, 0x8aab, 0x330f, 0x8aa1, 0x32f8, 0x8a97, 0x32e1, 0x8a8d, 
+  0x32ca, 0x8a83, 0x32b3, 0x8a79, 0x329c, 0x8a6f, 0x3285, 0x8a65, 
+  0x326e, 0x8a5b, 0x3257, 0x8a51, 0x3240, 0x8a47, 0x3228, 0x8a3d, 
+  0x3211, 0x8a34, 0x31fa, 0x8a2a, 0x31e3, 0x8a20, 0x31cc, 0x8a16, 
+  0x31b5, 0x8a0c, 0x319e, 0x8a03, 0x3186, 0x89f9, 0x316f, 0x89ef, 
+  0x3158, 0x89e5, 0x3141, 0x89dc, 0x312a, 0x89d2, 0x3112, 0x89c8, 
+  0x30fb, 0x89bf, 0x30e4, 0x89b5, 0x30cd, 0x89ac, 0x30b6, 0x89a2, 
+  0x309e, 0x8998, 0x3087, 0x898f, 0x3070, 0x8985, 0x3059, 0x897c, 
+  0x3041, 0x8972, 0x302a, 0x8969, 0x3013, 0x8960, 0x2ffb, 0x8956, 
+  0x2fe4, 0x894d, 0x2fcd, 0x8943, 0x2fb5, 0x893a, 0x2f9e, 0x8931, 
+  0x2f87, 0x8927, 0x2f6f, 0x891e, 0x2f58, 0x8915, 0x2f41, 0x890b, 
+  0x2f29, 0x8902, 0x2f12, 0x88f9, 0x2efb, 0x88f0, 0x2ee3, 0x88e6, 
+  0x2ecc, 0x88dd, 0x2eb5, 0x88d4, 0x2e9d, 0x88cb, 0x2e86, 0x88c2, 
+  0x2e6e, 0x88b9, 0x2e57, 0x88af, 0x2e3f, 0x88a6, 0x2e28, 0x889d, 
+  0x2e11, 0x8894, 0x2df9, 0x888b, 0x2de2, 0x8882, 0x2dca, 0x8879, 
+  0x2db3, 0x8870, 0x2d9b, 0x8867, 0x2d84, 0x885e, 0x2d6c, 0x8855, 
+  0x2d55, 0x884c, 0x2d3d, 0x8844, 0x2d26, 0x883b, 0x2d0e, 0x8832, 
+  0x2cf7, 0x8829, 0x2cdf, 0x8820, 0x2cc8, 0x8817, 0x2cb0, 0x880f, 
+  0x2c98, 0x8806, 0x2c81, 0x87fd, 0x2c69, 0x87f4, 0x2c52, 0x87ec, 
+  0x2c3a, 0x87e3, 0x2c23, 0x87da, 0x2c0b, 0x87d2, 0x2bf3, 0x87c9, 
+  0x2bdc, 0x87c0, 0x2bc4, 0x87b8, 0x2bad, 0x87af, 0x2b95, 0x87a7, 
+  0x2b7d, 0x879e, 0x2b66, 0x8795, 0x2b4e, 0x878d, 0x2b36, 0x8784, 
+  0x2b1f, 0x877c, 0x2b07, 0x8774, 0x2aef, 0x876b, 0x2ad8, 0x8763, 
+  0x2ac0, 0x875a, 0x2aa8, 0x8752, 0x2a91, 0x874a, 0x2a79, 0x8741, 
+  0x2a61, 0x8739, 0x2a49, 0x8731, 0x2a32, 0x8728, 0x2a1a, 0x8720, 
+  0x2a02, 0x8718, 0x29eb, 0x870f, 0x29d3, 0x8707, 0x29bb, 0x86ff, 
+  0x29a3, 0x86f7, 0x298b, 0x86ef, 0x2974, 0x86e7, 0x295c, 0x86de, 
+  0x2944, 0x86d6, 0x292c, 0x86ce, 0x2915, 0x86c6, 0x28fd, 0x86be, 
+  0x28e5, 0x86b6, 0x28cd, 0x86ae, 0x28b5, 0x86a6, 0x289d, 0x869e, 
+  0x2886, 0x8696, 0x286e, 0x868e, 0x2856, 0x8686, 0x283e, 0x867e, 
+  0x2826, 0x8676, 0x280e, 0x866e, 0x27f6, 0x8667, 0x27df, 0x865f, 
+  0x27c7, 0x8657, 0x27af, 0x864f, 0x2797, 0x8647, 0x277f, 0x8640, 
+  0x2767, 0x8638, 0x274f, 0x8630, 0x2737, 0x8628, 0x271f, 0x8621, 
+  0x2707, 0x8619, 0x26ef, 0x8611, 0x26d8, 0x860a, 0x26c0, 0x8602, 
+  0x26a8, 0x85fb, 0x2690, 0x85f3, 0x2678, 0x85eb, 0x2660, 0x85e4, 
+  0x2648, 0x85dc, 0x2630, 0x85d5, 0x2618, 0x85cd, 0x2600, 0x85c6, 
+  0x25e8, 0x85be, 0x25d0, 0x85b7, 0x25b8, 0x85b0, 0x25a0, 0x85a8, 
+  0x2588, 0x85a1, 0x2570, 0x8599, 0x2558, 0x8592, 0x2540, 0x858b, 
+  0x2528, 0x8583, 0x250f, 0x857c, 0x24f7, 0x8575, 0x24df, 0x856e, 
+  0x24c7, 0x8566, 0x24af, 0x855f, 0x2497, 0x8558, 0x247f, 0x8551, 
+  0x2467, 0x854a, 0x244f, 0x8543, 0x2437, 0x853b, 0x241f, 0x8534, 
+  0x2407, 0x852d, 0x23ee, 0x8526, 0x23d6, 0x851f, 0x23be, 0x8518, 
+  0x23a6, 0x8511, 0x238e, 0x850a, 0x2376, 0x8503, 0x235e, 0x84fc, 
+  0x2345, 0x84f5, 0x232d, 0x84ee, 0x2315, 0x84e7, 0x22fd, 0x84e1, 
+  0x22e5, 0x84da, 0x22cd, 0x84d3, 0x22b4, 0x84cc, 0x229c, 0x84c5, 
+  0x2284, 0x84be, 0x226c, 0x84b8, 0x2254, 0x84b1, 0x223b, 0x84aa, 
+  0x2223, 0x84a3, 0x220b, 0x849d, 0x21f3, 0x8496, 0x21da, 0x848f, 
+  0x21c2, 0x8489, 0x21aa, 0x8482, 0x2192, 0x847c, 0x2179, 0x8475, 
+  0x2161, 0x846e, 0x2149, 0x8468, 0x2131, 0x8461, 0x2118, 0x845b, 
+  0x2100, 0x8454, 0x20e8, 0x844e, 0x20d0, 0x8447, 0x20b7, 0x8441, 
+  0x209f, 0x843b, 0x2087, 0x8434, 0x206e, 0x842e, 0x2056, 0x8427, 
+  0x203e, 0x8421, 0x2025, 0x841b, 0x200d, 0x8415, 0x1ff5, 0x840e, 
+  0x1fdc, 0x8408, 0x1fc4, 0x8402, 0x1fac, 0x83fb, 0x1f93, 0x83f5, 
+  0x1f7b, 0x83ef, 0x1f63, 0x83e9, 0x1f4a, 0x83e3, 0x1f32, 0x83dd, 
+  0x1f19, 0x83d7, 0x1f01, 0x83d0, 0x1ee9, 0x83ca, 0x1ed0, 0x83c4, 
+  0x1eb8, 0x83be, 0x1ea0, 0x83b8, 0x1e87, 0x83b2, 0x1e6f, 0x83ac, 
+  0x1e56, 0x83a6, 0x1e3e, 0x83a0, 0x1e25, 0x839a, 0x1e0d, 0x8394, 
+  0x1df5, 0x838f, 0x1ddc, 0x8389, 0x1dc4, 0x8383, 0x1dab, 0x837d, 
+  0x1d93, 0x8377, 0x1d7a, 0x8371, 0x1d62, 0x836c, 0x1d49, 0x8366, 
+  0x1d31, 0x8360, 0x1d18, 0x835a, 0x1d00, 0x8355, 0x1ce8, 0x834f, 
+  0x1ccf, 0x8349, 0x1cb7, 0x8344, 0x1c9e, 0x833e, 0x1c86, 0x8338, 
+  0x1c6d, 0x8333, 0x1c55, 0x832d, 0x1c3c, 0x8328, 0x1c24, 0x8322, 
+  0x1c0b, 0x831d, 0x1bf2, 0x8317, 0x1bda, 0x8312, 0x1bc1, 0x830c, 
+  0x1ba9, 0x8307, 0x1b90, 0x8301, 0x1b78, 0x82fc, 0x1b5f, 0x82f7, 
+  0x1b47, 0x82f1, 0x1b2e, 0x82ec, 0x1b16, 0x82e7, 0x1afd, 0x82e1, 
+  0x1ae4, 0x82dc, 0x1acc, 0x82d7, 0x1ab3, 0x82d1, 0x1a9b, 0x82cc, 
+  0x1a82, 0x82c7, 0x1a6a, 0x82c2, 0x1a51, 0x82bd, 0x1a38, 0x82b7, 
+  0x1a20, 0x82b2, 0x1a07, 0x82ad, 0x19ef, 0x82a8, 0x19d6, 0x82a3, 
+  0x19bd, 0x829e, 0x19a5, 0x8299, 0x198c, 0x8294, 0x1973, 0x828f, 
+  0x195b, 0x828a, 0x1942, 0x8285, 0x192a, 0x8280, 0x1911, 0x827b, 
+  0x18f8, 0x8276, 0x18e0, 0x8271, 0x18c7, 0x826c, 0x18ae, 0x8268, 
+  0x1896, 0x8263, 0x187d, 0x825e, 0x1864, 0x8259, 0x184c, 0x8254, 
+  0x1833, 0x8250, 0x181a, 0x824b, 0x1802, 0x8246, 0x17e9, 0x8241, 
+  0x17d0, 0x823d, 0x17b7, 0x8238, 0x179f, 0x8233, 0x1786, 0x822f, 
+  0x176d, 0x822a, 0x1755, 0x8226, 0x173c, 0x8221, 0x1723, 0x821c, 
+  0x170a, 0x8218, 0x16f2, 0x8213, 0x16d9, 0x820f, 0x16c0, 0x820a, 
+  0x16a8, 0x8206, 0x168f, 0x8201, 0x1676, 0x81fd, 0x165d, 0x81f9, 
+  0x1645, 0x81f4, 0x162c, 0x81f0, 0x1613, 0x81ec, 0x15fa, 0x81e7, 
+  0x15e2, 0x81e3, 0x15c9, 0x81df, 0x15b0, 0x81da, 0x1597, 0x81d6, 
+  0x157f, 0x81d2, 0x1566, 0x81ce, 0x154d, 0x81c9, 0x1534, 0x81c5, 
+  0x151b, 0x81c1, 0x1503, 0x81bd, 0x14ea, 0x81b9, 0x14d1, 0x81b5, 
+  0x14b8, 0x81b1, 0x149f, 0x81ad, 0x1487, 0x81a9, 0x146e, 0x81a5, 
+  0x1455, 0x81a1, 0x143c, 0x819d, 0x1423, 0x8199, 0x140b, 0x8195, 
+  0x13f2, 0x8191, 0x13d9, 0x818d, 0x13c0, 0x8189, 0x13a7, 0x8185, 
+  0x138e, 0x8181, 0x1376, 0x817d, 0x135d, 0x817a, 0x1344, 0x8176, 
+  0x132b, 0x8172, 0x1312, 0x816e, 0x12f9, 0x816b, 0x12e0, 0x8167, 
+  0x12c8, 0x8163, 0x12af, 0x815f, 0x1296, 0x815c, 0x127d, 0x8158, 
+  0x1264, 0x8155, 0x124b, 0x8151, 0x1232, 0x814d, 0x1219, 0x814a, 
+  0x1201, 0x8146, 0x11e8, 0x8143, 0x11cf, 0x813f, 0x11b6, 0x813c, 
+  0x119d, 0x8138, 0x1184, 0x8135, 0x116b, 0x8131, 0x1152, 0x812e, 
+  0x1139, 0x812b, 0x1121, 0x8127, 0x1108, 0x8124, 0x10ef, 0x8121, 
+  0x10d6, 0x811d, 0x10bd, 0x811a, 0x10a4, 0x8117, 0x108b, 0x8113, 
+  0x1072, 0x8110, 0x1059, 0x810d, 0x1040, 0x810a, 0x1027, 0x8107, 
+  0x100e, 0x8103, 0xff5, 0x8100, 0xfdd, 0x80fd, 0xfc4, 0x80fa, 
+  0xfab, 0x80f7, 0xf92, 0x80f4, 0xf79, 0x80f1, 0xf60, 0x80ee, 
+  0xf47, 0x80eb, 0xf2e, 0x80e8, 0xf15, 0x80e5, 0xefc, 0x80e2, 
+  0xee3, 0x80df, 0xeca, 0x80dc, 0xeb1, 0x80d9, 0xe98, 0x80d6, 
+  0xe7f, 0x80d3, 0xe66, 0x80d1, 0xe4d, 0x80ce, 0xe34, 0x80cb, 
+  0xe1b, 0x80c8, 0xe02, 0x80c5, 0xde9, 0x80c3, 0xdd0, 0x80c0, 
+  0xdb7, 0x80bd, 0xd9e, 0x80bb, 0xd85, 0x80b8, 0xd6c, 0x80b5, 
+  0xd53, 0x80b3, 0xd3a, 0x80b0, 0xd21, 0x80ad, 0xd08, 0x80ab, 
+  0xcef, 0x80a8, 0xcd6, 0x80a6, 0xcbd, 0x80a3, 0xca4, 0x80a1, 
+  0xc8b, 0x809e, 0xc72, 0x809c, 0xc59, 0x8099, 0xc40, 0x8097, 
+  0xc27, 0x8095, 0xc0e, 0x8092, 0xbf5, 0x8090, 0xbdc, 0x808e, 
+  0xbc3, 0x808b, 0xbaa, 0x8089, 0xb91, 0x8087, 0xb78, 0x8084, 
+  0xb5f, 0x8082, 0xb46, 0x8080, 0xb2d, 0x807e, 0xb14, 0x807b, 
+  0xafb, 0x8079, 0xae2, 0x8077, 0xac9, 0x8075, 0xab0, 0x8073, 
+  0xa97, 0x8071, 0xa7e, 0x806f, 0xa65, 0x806d, 0xa4c, 0x806b, 
+  0xa33, 0x8069, 0xa19, 0x8067, 0xa00, 0x8065, 0x9e7, 0x8063, 
+  0x9ce, 0x8061, 0x9b5, 0x805f, 0x99c, 0x805d, 0x983, 0x805b, 
+  0x96a, 0x8059, 0x951, 0x8057, 0x938, 0x8056, 0x91f, 0x8054, 
+  0x906, 0x8052, 0x8ed, 0x8050, 0x8d4, 0x804f, 0x8bb, 0x804d, 
+  0x8a2, 0x804b, 0x888, 0x8049, 0x86f, 0x8048, 0x856, 0x8046, 
+  0x83d, 0x8044, 0x824, 0x8043, 0x80b, 0x8041, 0x7f2, 0x8040, 
+  0x7d9, 0x803e, 0x7c0, 0x803d, 0x7a7, 0x803b, 0x78e, 0x803a, 
+  0x775, 0x8038, 0x75b, 0x8037, 0x742, 0x8035, 0x729, 0x8034, 
+  0x710, 0x8032, 0x6f7, 0x8031, 0x6de, 0x8030, 0x6c5, 0x802e, 
+  0x6ac, 0x802d, 0x693, 0x802c, 0x67a, 0x802a, 0x660, 0x8029, 
+  0x647, 0x8028, 0x62e, 0x8027, 0x615, 0x8026, 0x5fc, 0x8024, 
+  0x5e3, 0x8023, 0x5ca, 0x8022, 0x5b1, 0x8021, 0x598, 0x8020, 
+  0x57f, 0x801f, 0x565, 0x801e, 0x54c, 0x801d, 0x533, 0x801c, 
+  0x51a, 0x801b, 0x501, 0x801a, 0x4e8, 0x8019, 0x4cf, 0x8018, 
+  0x4b6, 0x8017, 0x49c, 0x8016, 0x483, 0x8015, 0x46a, 0x8014, 
+  0x451, 0x8013, 0x438, 0x8012, 0x41f, 0x8012, 0x406, 0x8011, 
+  0x3ed, 0x8010, 0x3d4, 0x800f, 0x3ba, 0x800e, 0x3a1, 0x800e, 
+  0x388, 0x800d, 0x36f, 0x800c, 0x356, 0x800c, 0x33d, 0x800b, 
+  0x324, 0x800a, 0x30b, 0x800a, 0x2f1, 0x8009, 0x2d8, 0x8009, 
+  0x2bf, 0x8008, 0x2a6, 0x8008, 0x28d, 0x8007, 0x274, 0x8007, 
+  0x25b, 0x8006, 0x242, 0x8006, 0x228, 0x8005, 0x20f, 0x8005, 
+  0x1f6, 0x8004, 0x1dd, 0x8004, 0x1c4, 0x8004, 0x1ab, 0x8003, 
+  0x192, 0x8003, 0x178, 0x8003, 0x15f, 0x8002, 0x146, 0x8002, 
+  0x12d, 0x8002, 0x114, 0x8002, 0xfb, 0x8001, 0xe2, 0x8001, 
+  0xc9, 0x8001, 0xaf, 0x8001, 0x96, 0x8001, 0x7d, 0x8001, 
+  0x64, 0x8001, 0x4b, 0x8001, 0x32, 0x8001, 0x19, 0x8001, 
+}; 
+ 
+/**  
+* \par  
+* cosFactor tables are generated using the formula : <pre> cos_factors[n] = 2 * cos((2n+1)*pi/(4*N)) </pre>  
+* \par  
+* C command to generate the table  
+* <pre>  
+* for(i = 0; i< N; i++)  
+* {  
+*   cos_factors[i]= 2 * cos((2*i+1)*c/2);  
+* } </pre>  
+* \par  
+* where <code>N</code> is the number of factors to generate and <code>c</code> is <code>pi/(2*N)</code>  
+* \par  
+* Then converted to q15 format by multiplying with 2^31 and saturated if required.  
+  
+*/ 
+ 
+static const q15_t cos_factorsQ15_128[128] = { 
+  0x7fff, 0x7ffa, 0x7ff0, 0x7fe1, 0x7fce, 0x7fb5, 0x7f97, 0x7f75, 
+  0x7f4d, 0x7f21, 0x7ef0, 0x7eba, 0x7e7f, 0x7e3f, 0x7dfa, 0x7db0, 
+  0x7d62, 0x7d0f, 0x7cb7, 0x7c5a, 0x7bf8, 0x7b92, 0x7b26, 0x7ab6, 
+  0x7a42, 0x79c8, 0x794a, 0x78c7, 0x7840, 0x77b4, 0x7723, 0x768e, 
+  0x75f4, 0x7555, 0x74b2, 0x740b, 0x735f, 0x72af, 0x71fa, 0x7141, 
+  0x7083, 0x6fc1, 0x6efb, 0x6e30, 0x6d62, 0x6c8f, 0x6bb8, 0x6adc, 
+  0x69fd, 0x6919, 0x6832, 0x6746, 0x6657, 0x6563, 0x646c, 0x6371, 
+  0x6271, 0x616f, 0x6068, 0x5f5e, 0x5e50, 0x5d3e, 0x5c29, 0x5b10, 
+  0x59f3, 0x58d4, 0x57b0, 0x568a, 0x5560, 0x5433, 0x5302, 0x51ce, 
+  0x5097, 0x4f5e, 0x4e21, 0x4ce1, 0x4b9e, 0x4a58, 0x490f, 0x47c3, 
+  0x4675, 0x4524, 0x43d0, 0x427a, 0x4121, 0x3fc5, 0x3e68, 0x3d07, 
+  0x3ba5, 0x3a40, 0x38d8, 0x376f, 0x3604, 0x3496, 0x3326, 0x31b5, 
+  0x3041, 0x2ecc, 0x2d55, 0x2bdc, 0x2a61, 0x28e5, 0x2767, 0x25e8, 
+  0x2467, 0x22e5, 0x2161, 0x1fdc, 0x1e56, 0x1ccf, 0x1b47, 0x19bd, 
+  0x1833, 0x16a8, 0x151b, 0x138e, 0x1201, 0x1072, 0xee3, 0xd53, 
+  0xbc3, 0xa33, 0x8a2, 0x710, 0x57f, 0x3ed, 0x25b, 0xc9 
+}; 
+ 
+static const q15_t cos_factorsQ15_512[512] = { 
+  0x7fff, 0x7fff, 0x7fff, 0x7ffe, 0x7ffc, 0x7ffb, 0x7ff9, 0x7ff7, 
+  0x7ff4, 0x7ff2, 0x7fee, 0x7feb, 0x7fe7, 0x7fe3, 0x7fdf, 0x7fda, 
+  0x7fd6, 0x7fd0, 0x7fcb, 0x7fc5, 0x7fbf, 0x7fb8, 0x7fb1, 0x7faa, 
+  0x7fa3, 0x7f9b, 0x7f93, 0x7f8b, 0x7f82, 0x7f79, 0x7f70, 0x7f67, 
+  0x7f5d, 0x7f53, 0x7f48, 0x7f3d, 0x7f32, 0x7f27, 0x7f1b, 0x7f0f, 
+  0x7f03, 0x7ef6, 0x7ee9, 0x7edc, 0x7ecf, 0x7ec1, 0x7eb3, 0x7ea4, 
+  0x7e95, 0x7e86, 0x7e77, 0x7e67, 0x7e57, 0x7e47, 0x7e37, 0x7e26, 
+  0x7e14, 0x7e03, 0x7df1, 0x7ddf, 0x7dcd, 0x7dba, 0x7da7, 0x7d94, 
+  0x7d80, 0x7d6c, 0x7d58, 0x7d43, 0x7d2f, 0x7d19, 0x7d04, 0x7cee, 
+  0x7cd8, 0x7cc2, 0x7cab, 0x7c94, 0x7c7d, 0x7c66, 0x7c4e, 0x7c36, 
+  0x7c1d, 0x7c05, 0x7beb, 0x7bd2, 0x7bb9, 0x7b9f, 0x7b84, 0x7b6a, 
+  0x7b4f, 0x7b34, 0x7b19, 0x7afd, 0x7ae1, 0x7ac5, 0x7aa8, 0x7a8b, 
+  0x7a6e, 0x7a50, 0x7a33, 0x7a15, 0x79f6, 0x79d8, 0x79b9, 0x7999, 
+  0x797a, 0x795a, 0x793a, 0x7919, 0x78f9, 0x78d8, 0x78b6, 0x7895, 
+  0x7873, 0x7851, 0x782e, 0x780c, 0x77e9, 0x77c5, 0x77a2, 0x777e, 
+  0x775a, 0x7735, 0x7710, 0x76eb, 0x76c6, 0x76a0, 0x767b, 0x7654, 
+  0x762e, 0x7607, 0x75e0, 0x75b9, 0x7591, 0x7569, 0x7541, 0x7519, 
+  0x74f0, 0x74c7, 0x749e, 0x7474, 0x744a, 0x7420, 0x73f6, 0x73cb, 
+  0x73a0, 0x7375, 0x7349, 0x731d, 0x72f1, 0x72c5, 0x7298, 0x726b, 
+  0x723e, 0x7211, 0x71e3, 0x71b5, 0x7186, 0x7158, 0x7129, 0x70fa, 
+  0x70cb, 0x709b, 0x706b, 0x703b, 0x700a, 0x6fda, 0x6fa9, 0x6f77, 
+  0x6f46, 0x6f14, 0x6ee2, 0x6eaf, 0x6e7d, 0x6e4a, 0x6e17, 0x6de3, 
+  0x6db0, 0x6d7c, 0x6d48, 0x6d13, 0x6cde, 0x6ca9, 0x6c74, 0x6c3f, 
+  0x6c09, 0x6bd3, 0x6b9c, 0x6b66, 0x6b2f, 0x6af8, 0x6ac1, 0x6a89, 
+  0x6a51, 0x6a19, 0x69e1, 0x69a8, 0x696f, 0x6936, 0x68fd, 0x68c3, 
+  0x6889, 0x684f, 0x6815, 0x67da, 0x679f, 0x6764, 0x6729, 0x66ed, 
+  0x66b1, 0x6675, 0x6639, 0x65fc, 0x65bf, 0x6582, 0x6545, 0x6507, 
+  0x64c9, 0x648b, 0x644d, 0x640e, 0x63cf, 0x6390, 0x6351, 0x6311, 
+  0x62d2, 0x6292, 0x6251, 0x6211, 0x61d0, 0x618f, 0x614e, 0x610d, 
+  0x60cb, 0x6089, 0x6047, 0x6004, 0x5fc2, 0x5f7f, 0x5f3c, 0x5ef9, 
+  0x5eb5, 0x5e71, 0x5e2d, 0x5de9, 0x5da5, 0x5d60, 0x5d1b, 0x5cd6, 
+  0x5c91, 0x5c4b, 0x5c06, 0x5bc0, 0x5b79, 0x5b33, 0x5aec, 0x5aa5, 
+  0x5a5e, 0x5a17, 0x59d0, 0x5988, 0x5940, 0x58f8, 0x58af, 0x5867, 
+  0x581e, 0x57d5, 0x578c, 0x5742, 0x56f9, 0x56af, 0x5665, 0x561a, 
+  0x55d0, 0x5585, 0x553a, 0x54ef, 0x54a4, 0x5458, 0x540d, 0x53c1, 
+  0x5375, 0x5328, 0x52dc, 0x528f, 0x5242, 0x51f5, 0x51a8, 0x515a, 
+  0x510c, 0x50bf, 0x5070, 0x5022, 0x4fd4, 0x4f85, 0x4f36, 0x4ee7, 
+  0x4e98, 0x4e48, 0x4df9, 0x4da9, 0x4d59, 0x4d09, 0x4cb8, 0x4c68, 
+  0x4c17, 0x4bc6, 0x4b75, 0x4b24, 0x4ad2, 0x4a81, 0x4a2f, 0x49dd, 
+  0x498a, 0x4938, 0x48e6, 0x4893, 0x4840, 0x47ed, 0x479a, 0x4746, 
+  0x46f3, 0x469f, 0x464b, 0x45f7, 0x45a3, 0x454e, 0x44fa, 0x44a5, 
+  0x4450, 0x43fb, 0x43a5, 0x4350, 0x42fa, 0x42a5, 0x424f, 0x41f9, 
+  0x41a2, 0x414c, 0x40f6, 0x409f, 0x4048, 0x3ff1, 0x3f9a, 0x3f43, 
+  0x3eeb, 0x3e93, 0x3e3c, 0x3de4, 0x3d8c, 0x3d33, 0x3cdb, 0x3c83, 
+  0x3c2a, 0x3bd1, 0x3b78, 0x3b1f, 0x3ac6, 0x3a6c, 0x3a13, 0x39b9, 
+  0x395f, 0x3906, 0x38ab, 0x3851, 0x37f7, 0x379c, 0x3742, 0x36e7, 
+  0x368c, 0x3631, 0x35d6, 0x357b, 0x351f, 0x34c4, 0x3468, 0x340c, 
+  0x33b0, 0x3354, 0x32f8, 0x329c, 0x3240, 0x31e3, 0x3186, 0x312a, 
+  0x30cd, 0x3070, 0x3013, 0x2fb5, 0x2f58, 0x2efb, 0x2e9d, 0x2e3f, 
+  0x2de2, 0x2d84, 0x2d26, 0x2cc8, 0x2c69, 0x2c0b, 0x2bad, 0x2b4e, 
+  0x2aef, 0x2a91, 0x2a32, 0x29d3, 0x2974, 0x2915, 0x28b5, 0x2856, 
+  0x27f6, 0x2797, 0x2737, 0x26d8, 0x2678, 0x2618, 0x25b8, 0x2558, 
+  0x24f7, 0x2497, 0x2437, 0x23d6, 0x2376, 0x2315, 0x22b4, 0x2254, 
+  0x21f3, 0x2192, 0x2131, 0x20d0, 0x206e, 0x200d, 0x1fac, 0x1f4a, 
+  0x1ee9, 0x1e87, 0x1e25, 0x1dc4, 0x1d62, 0x1d00, 0x1c9e, 0x1c3c, 
+  0x1bda, 0x1b78, 0x1b16, 0x1ab3, 0x1a51, 0x19ef, 0x198c, 0x192a, 
+  0x18c7, 0x1864, 0x1802, 0x179f, 0x173c, 0x16d9, 0x1676, 0x1613, 
+  0x15b0, 0x154d, 0x14ea, 0x1487, 0x1423, 0x13c0, 0x135d, 0x12f9, 
+  0x1296, 0x1232, 0x11cf, 0x116b, 0x1108, 0x10a4, 0x1040, 0xfdd, 
+  0xf79, 0xf15, 0xeb1, 0xe4d, 0xde9, 0xd85, 0xd21, 0xcbd, 
+  0xc59, 0xbf5, 0xb91, 0xb2d, 0xac9, 0xa65, 0xa00, 0x99c, 
+  0x938, 0x8d4, 0x86f, 0x80b, 0x7a7, 0x742, 0x6de, 0x67a, 
+  0x615, 0x5b1, 0x54c, 0x4e8, 0x483, 0x41f, 0x3ba, 0x356, 
+  0x2f1, 0x28d, 0x228, 0x1c4, 0x15f, 0xfb, 0x96, 0x32, 
+}; 
+ 
+static const q15_t cos_factorsQ15_2048[2048] = { 
+  0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 
+  0x7fff, 0x7fff, 0x7ffe, 0x7ffe, 0x7ffe, 0x7ffe, 0x7ffd, 0x7ffd, 
+  0x7ffd, 0x7ffd, 0x7ffc, 0x7ffc, 0x7ffb, 0x7ffb, 0x7ffb, 0x7ffa, 
+  0x7ffa, 0x7ff9, 0x7ff9, 0x7ff8, 0x7ff8, 0x7ff7, 0x7ff7, 0x7ff6, 
+  0x7ff5, 0x7ff5, 0x7ff4, 0x7ff3, 0x7ff3, 0x7ff2, 0x7ff1, 0x7ff0, 
+  0x7ff0, 0x7fef, 0x7fee, 0x7fed, 0x7fec, 0x7fec, 0x7feb, 0x7fea, 
+  0x7fe9, 0x7fe8, 0x7fe7, 0x7fe6, 0x7fe5, 0x7fe4, 0x7fe3, 0x7fe2, 
+  0x7fe1, 0x7fe0, 0x7fdf, 0x7fdd, 0x7fdc, 0x7fdb, 0x7fda, 0x7fd9, 
+  0x7fd7, 0x7fd6, 0x7fd5, 0x7fd4, 0x7fd2, 0x7fd1, 0x7fd0, 0x7fce, 
+  0x7fcd, 0x7fcb, 0x7fca, 0x7fc9, 0x7fc7, 0x7fc6, 0x7fc4, 0x7fc3, 
+  0x7fc1, 0x7fc0, 0x7fbe, 0x7fbc, 0x7fbb, 0x7fb9, 0x7fb7, 0x7fb6, 
+  0x7fb4, 0x7fb2, 0x7fb1, 0x7faf, 0x7fad, 0x7fab, 0x7fa9, 0x7fa8, 
+  0x7fa6, 0x7fa4, 0x7fa2, 0x7fa0, 0x7f9e, 0x7f9c, 0x7f9a, 0x7f98, 
+  0x7f96, 0x7f94, 0x7f92, 0x7f90, 0x7f8e, 0x7f8c, 0x7f8a, 0x7f88, 
+  0x7f86, 0x7f83, 0x7f81, 0x7f7f, 0x7f7d, 0x7f7b, 0x7f78, 0x7f76, 
+  0x7f74, 0x7f71, 0x7f6f, 0x7f6d, 0x7f6a, 0x7f68, 0x7f65, 0x7f63, 
+  0x7f60, 0x7f5e, 0x7f5b, 0x7f59, 0x7f56, 0x7f54, 0x7f51, 0x7f4f, 
+  0x7f4c, 0x7f49, 0x7f47, 0x7f44, 0x7f41, 0x7f3f, 0x7f3c, 0x7f39, 
+  0x7f36, 0x7f34, 0x7f31, 0x7f2e, 0x7f2b, 0x7f28, 0x7f25, 0x7f23, 
+  0x7f20, 0x7f1d, 0x7f1a, 0x7f17, 0x7f14, 0x7f11, 0x7f0e, 0x7f0b, 
+  0x7f08, 0x7f04, 0x7f01, 0x7efe, 0x7efb, 0x7ef8, 0x7ef5, 0x7ef1, 
+  0x7eee, 0x7eeb, 0x7ee8, 0x7ee4, 0x7ee1, 0x7ede, 0x7eda, 0x7ed7, 
+  0x7ed4, 0x7ed0, 0x7ecd, 0x7ec9, 0x7ec6, 0x7ec3, 0x7ebf, 0x7ebb, 
+  0x7eb8, 0x7eb4, 0x7eb1, 0x7ead, 0x7eaa, 0x7ea6, 0x7ea2, 0x7e9f, 
+  0x7e9b, 0x7e97, 0x7e94, 0x7e90, 0x7e8c, 0x7e88, 0x7e84, 0x7e81, 
+  0x7e7d, 0x7e79, 0x7e75, 0x7e71, 0x7e6d, 0x7e69, 0x7e65, 0x7e61, 
+  0x7e5d, 0x7e59, 0x7e55, 0x7e51, 0x7e4d, 0x7e49, 0x7e45, 0x7e41, 
+  0x7e3d, 0x7e39, 0x7e34, 0x7e30, 0x7e2c, 0x7e28, 0x7e24, 0x7e1f, 
+  0x7e1b, 0x7e17, 0x7e12, 0x7e0e, 0x7e0a, 0x7e05, 0x7e01, 0x7dfc, 
+  0x7df8, 0x7df3, 0x7def, 0x7dea, 0x7de6, 0x7de1, 0x7ddd, 0x7dd8, 
+  0x7dd4, 0x7dcf, 0x7dca, 0x7dc6, 0x7dc1, 0x7dbc, 0x7db8, 0x7db3, 
+  0x7dae, 0x7da9, 0x7da5, 0x7da0, 0x7d9b, 0x7d96, 0x7d91, 0x7d8c, 
+  0x7d87, 0x7d82, 0x7d7e, 0x7d79, 0x7d74, 0x7d6f, 0x7d6a, 0x7d65, 
+  0x7d60, 0x7d5a, 0x7d55, 0x7d50, 0x7d4b, 0x7d46, 0x7d41, 0x7d3c, 
+  0x7d36, 0x7d31, 0x7d2c, 0x7d27, 0x7d21, 0x7d1c, 0x7d17, 0x7d11, 
+  0x7d0c, 0x7d07, 0x7d01, 0x7cfc, 0x7cf6, 0x7cf1, 0x7cec, 0x7ce6, 
+  0x7ce1, 0x7cdb, 0x7cd5, 0x7cd0, 0x7cca, 0x7cc5, 0x7cbf, 0x7cb9, 
+  0x7cb4, 0x7cae, 0x7ca8, 0x7ca3, 0x7c9d, 0x7c97, 0x7c91, 0x7c8c, 
+  0x7c86, 0x7c80, 0x7c7a, 0x7c74, 0x7c6e, 0x7c69, 0x7c63, 0x7c5d, 
+  0x7c57, 0x7c51, 0x7c4b, 0x7c45, 0x7c3f, 0x7c39, 0x7c33, 0x7c2d, 
+  0x7c26, 0x7c20, 0x7c1a, 0x7c14, 0x7c0e, 0x7c08, 0x7c01, 0x7bfb, 
+  0x7bf5, 0x7bef, 0x7be8, 0x7be2, 0x7bdc, 0x7bd5, 0x7bcf, 0x7bc9, 
+  0x7bc2, 0x7bbc, 0x7bb5, 0x7baf, 0x7ba8, 0x7ba2, 0x7b9b, 0x7b95, 
+  0x7b8e, 0x7b88, 0x7b81, 0x7b7a, 0x7b74, 0x7b6d, 0x7b67, 0x7b60, 
+  0x7b59, 0x7b52, 0x7b4c, 0x7b45, 0x7b3e, 0x7b37, 0x7b31, 0x7b2a, 
+  0x7b23, 0x7b1c, 0x7b15, 0x7b0e, 0x7b07, 0x7b00, 0x7af9, 0x7af2, 
+  0x7aeb, 0x7ae4, 0x7add, 0x7ad6, 0x7acf, 0x7ac8, 0x7ac1, 0x7aba, 
+  0x7ab3, 0x7aac, 0x7aa4, 0x7a9d, 0x7a96, 0x7a8f, 0x7a87, 0x7a80, 
+  0x7a79, 0x7a72, 0x7a6a, 0x7a63, 0x7a5c, 0x7a54, 0x7a4d, 0x7a45, 
+  0x7a3e, 0x7a36, 0x7a2f, 0x7a27, 0x7a20, 0x7a18, 0x7a11, 0x7a09, 
+  0x7a02, 0x79fa, 0x79f2, 0x79eb, 0x79e3, 0x79db, 0x79d4, 0x79cc, 
+  0x79c4, 0x79bc, 0x79b5, 0x79ad, 0x79a5, 0x799d, 0x7995, 0x798e, 
+  0x7986, 0x797e, 0x7976, 0x796e, 0x7966, 0x795e, 0x7956, 0x794e, 
+  0x7946, 0x793e, 0x7936, 0x792e, 0x7926, 0x791e, 0x7915, 0x790d, 
+  0x7905, 0x78fd, 0x78f5, 0x78ec, 0x78e4, 0x78dc, 0x78d4, 0x78cb, 
+  0x78c3, 0x78bb, 0x78b2, 0x78aa, 0x78a2, 0x7899, 0x7891, 0x7888, 
+  0x7880, 0x7877, 0x786f, 0x7866, 0x785e, 0x7855, 0x784d, 0x7844, 
+  0x783b, 0x7833, 0x782a, 0x7821, 0x7819, 0x7810, 0x7807, 0x77ff, 
+  0x77f6, 0x77ed, 0x77e4, 0x77db, 0x77d3, 0x77ca, 0x77c1, 0x77b8, 
+  0x77af, 0x77a6, 0x779d, 0x7794, 0x778b, 0x7782, 0x7779, 0x7770, 
+  0x7767, 0x775e, 0x7755, 0x774c, 0x7743, 0x773a, 0x7731, 0x7727, 
+  0x771e, 0x7715, 0x770c, 0x7703, 0x76f9, 0x76f0, 0x76e7, 0x76dd, 
+  0x76d4, 0x76cb, 0x76c1, 0x76b8, 0x76af, 0x76a5, 0x769c, 0x7692, 
+  0x7689, 0x767f, 0x7676, 0x766c, 0x7663, 0x7659, 0x7650, 0x7646, 
+  0x763c, 0x7633, 0x7629, 0x761f, 0x7616, 0x760c, 0x7602, 0x75f9, 
+  0x75ef, 0x75e5, 0x75db, 0x75d1, 0x75c8, 0x75be, 0x75b4, 0x75aa, 
+  0x75a0, 0x7596, 0x758c, 0x7582, 0x7578, 0x756e, 0x7564, 0x755a, 
+  0x7550, 0x7546, 0x753c, 0x7532, 0x7528, 0x751e, 0x7514, 0x7509, 
+  0x74ff, 0x74f5, 0x74eb, 0x74e1, 0x74d6, 0x74cc, 0x74c2, 0x74b7, 
+  0x74ad, 0x74a3, 0x7498, 0x748e, 0x7484, 0x7479, 0x746f, 0x7464, 
+  0x745a, 0x744f, 0x7445, 0x743a, 0x7430, 0x7425, 0x741b, 0x7410, 
+  0x7406, 0x73fb, 0x73f0, 0x73e6, 0x73db, 0x73d0, 0x73c6, 0x73bb, 
+  0x73b0, 0x73a5, 0x739b, 0x7390, 0x7385, 0x737a, 0x736f, 0x7364, 
+  0x7359, 0x734f, 0x7344, 0x7339, 0x732e, 0x7323, 0x7318, 0x730d, 
+  0x7302, 0x72f7, 0x72ec, 0x72e1, 0x72d5, 0x72ca, 0x72bf, 0x72b4, 
+  0x72a9, 0x729e, 0x7293, 0x7287, 0x727c, 0x7271, 0x7266, 0x725a, 
+  0x724f, 0x7244, 0x7238, 0x722d, 0x7222, 0x7216, 0x720b, 0x71ff, 
+  0x71f4, 0x71e9, 0x71dd, 0x71d2, 0x71c6, 0x71bb, 0x71af, 0x71a3, 
+  0x7198, 0x718c, 0x7181, 0x7175, 0x7169, 0x715e, 0x7152, 0x7146, 
+  0x713b, 0x712f, 0x7123, 0x7117, 0x710c, 0x7100, 0x70f4, 0x70e8, 
+  0x70dc, 0x70d1, 0x70c5, 0x70b9, 0x70ad, 0x70a1, 0x7095, 0x7089, 
+  0x707d, 0x7071, 0x7065, 0x7059, 0x704d, 0x7041, 0x7035, 0x7029, 
+  0x701d, 0x7010, 0x7004, 0x6ff8, 0x6fec, 0x6fe0, 0x6fd3, 0x6fc7, 
+  0x6fbb, 0x6faf, 0x6fa2, 0x6f96, 0x6f8a, 0x6f7d, 0x6f71, 0x6f65, 
+  0x6f58, 0x6f4c, 0x6f3f, 0x6f33, 0x6f27, 0x6f1a, 0x6f0e, 0x6f01, 
+  0x6ef5, 0x6ee8, 0x6edc, 0x6ecf, 0x6ec2, 0x6eb6, 0x6ea9, 0x6e9c, 
+  0x6e90, 0x6e83, 0x6e76, 0x6e6a, 0x6e5d, 0x6e50, 0x6e44, 0x6e37, 
+  0x6e2a, 0x6e1d, 0x6e10, 0x6e04, 0x6df7, 0x6dea, 0x6ddd, 0x6dd0, 
+  0x6dc3, 0x6db6, 0x6da9, 0x6d9c, 0x6d8f, 0x6d82, 0x6d75, 0x6d68, 
+  0x6d5b, 0x6d4e, 0x6d41, 0x6d34, 0x6d27, 0x6d1a, 0x6d0c, 0x6cff, 
+  0x6cf2, 0x6ce5, 0x6cd8, 0x6cca, 0x6cbd, 0x6cb0, 0x6ca3, 0x6c95, 
+  0x6c88, 0x6c7b, 0x6c6d, 0x6c60, 0x6c53, 0x6c45, 0x6c38, 0x6c2a, 
+  0x6c1d, 0x6c0f, 0x6c02, 0x6bf5, 0x6be7, 0x6bd9, 0x6bcc, 0x6bbe, 
+  0x6bb1, 0x6ba3, 0x6b96, 0x6b88, 0x6b7a, 0x6b6d, 0x6b5f, 0x6b51, 
+  0x6b44, 0x6b36, 0x6b28, 0x6b1a, 0x6b0d, 0x6aff, 0x6af1, 0x6ae3, 
+  0x6ad5, 0x6ac8, 0x6aba, 0x6aac, 0x6a9e, 0x6a90, 0x6a82, 0x6a74, 
+  0x6a66, 0x6a58, 0x6a4a, 0x6a3c, 0x6a2e, 0x6a20, 0x6a12, 0x6a04, 
+  0x69f6, 0x69e8, 0x69da, 0x69cb, 0x69bd, 0x69af, 0x69a1, 0x6993, 
+  0x6985, 0x6976, 0x6968, 0x695a, 0x694b, 0x693d, 0x692f, 0x6921, 
+  0x6912, 0x6904, 0x68f5, 0x68e7, 0x68d9, 0x68ca, 0x68bc, 0x68ad, 
+  0x689f, 0x6890, 0x6882, 0x6873, 0x6865, 0x6856, 0x6848, 0x6839, 
+  0x682b, 0x681c, 0x680d, 0x67ff, 0x67f0, 0x67e1, 0x67d3, 0x67c4, 
+  0x67b5, 0x67a6, 0x6798, 0x6789, 0x677a, 0x676b, 0x675d, 0x674e, 
+  0x673f, 0x6730, 0x6721, 0x6712, 0x6703, 0x66f4, 0x66e5, 0x66d6, 
+  0x66c8, 0x66b9, 0x66aa, 0x669b, 0x668b, 0x667c, 0x666d, 0x665e, 
+  0x664f, 0x6640, 0x6631, 0x6622, 0x6613, 0x6603, 0x65f4, 0x65e5, 
+  0x65d6, 0x65c7, 0x65b7, 0x65a8, 0x6599, 0x658a, 0x657a, 0x656b, 
+  0x655c, 0x654c, 0x653d, 0x652d, 0x651e, 0x650f, 0x64ff, 0x64f0, 
+  0x64e0, 0x64d1, 0x64c1, 0x64b2, 0x64a2, 0x6493, 0x6483, 0x6474, 
+  0x6464, 0x6454, 0x6445, 0x6435, 0x6426, 0x6416, 0x6406, 0x63f7, 
+  0x63e7, 0x63d7, 0x63c7, 0x63b8, 0x63a8, 0x6398, 0x6388, 0x6378, 
+  0x6369, 0x6359, 0x6349, 0x6339, 0x6329, 0x6319, 0x6309, 0x62f9, 
+  0x62ea, 0x62da, 0x62ca, 0x62ba, 0x62aa, 0x629a, 0x628a, 0x627a, 
+  0x6269, 0x6259, 0x6249, 0x6239, 0x6229, 0x6219, 0x6209, 0x61f9, 
+  0x61e8, 0x61d8, 0x61c8, 0x61b8, 0x61a8, 0x6197, 0x6187, 0x6177, 
+  0x6166, 0x6156, 0x6146, 0x6135, 0x6125, 0x6115, 0x6104, 0x60f4, 
+  0x60e4, 0x60d3, 0x60c3, 0x60b2, 0x60a2, 0x6091, 0x6081, 0x6070, 
+  0x6060, 0x604f, 0x603f, 0x602e, 0x601d, 0x600d, 0x5ffc, 0x5fec, 
+  0x5fdb, 0x5fca, 0x5fba, 0x5fa9, 0x5f98, 0x5f87, 0x5f77, 0x5f66, 
+  0x5f55, 0x5f44, 0x5f34, 0x5f23, 0x5f12, 0x5f01, 0x5ef0, 0x5edf, 
+  0x5ecf, 0x5ebe, 0x5ead, 0x5e9c, 0x5e8b, 0x5e7a, 0x5e69, 0x5e58, 
+  0x5e47, 0x5e36, 0x5e25, 0x5e14, 0x5e03, 0x5df2, 0x5de1, 0x5dd0, 
+  0x5dbf, 0x5dad, 0x5d9c, 0x5d8b, 0x5d7a, 0x5d69, 0x5d58, 0x5d46, 
+  0x5d35, 0x5d24, 0x5d13, 0x5d01, 0x5cf0, 0x5cdf, 0x5cce, 0x5cbc, 
+  0x5cab, 0x5c9a, 0x5c88, 0x5c77, 0x5c66, 0x5c54, 0x5c43, 0x5c31, 
+  0x5c20, 0x5c0e, 0x5bfd, 0x5beb, 0x5bda, 0x5bc8, 0x5bb7, 0x5ba5, 
+  0x5b94, 0x5b82, 0x5b71, 0x5b5f, 0x5b4d, 0x5b3c, 0x5b2a, 0x5b19, 
+  0x5b07, 0x5af5, 0x5ae4, 0x5ad2, 0x5ac0, 0x5aae, 0x5a9d, 0x5a8b, 
+  0x5a79, 0x5a67, 0x5a56, 0x5a44, 0x5a32, 0x5a20, 0x5a0e, 0x59fc, 
+  0x59ea, 0x59d9, 0x59c7, 0x59b5, 0x59a3, 0x5991, 0x597f, 0x596d, 
+  0x595b, 0x5949, 0x5937, 0x5925, 0x5913, 0x5901, 0x58ef, 0x58dd, 
+  0x58cb, 0x58b8, 0x58a6, 0x5894, 0x5882, 0x5870, 0x585e, 0x584b, 
+  0x5839, 0x5827, 0x5815, 0x5803, 0x57f0, 0x57de, 0x57cc, 0x57b9, 
+  0x57a7, 0x5795, 0x5783, 0x5770, 0x575e, 0x574b, 0x5739, 0x5727, 
+  0x5714, 0x5702, 0x56ef, 0x56dd, 0x56ca, 0x56b8, 0x56a5, 0x5693, 
+  0x5680, 0x566e, 0x565b, 0x5649, 0x5636, 0x5624, 0x5611, 0x55fe, 
+  0x55ec, 0x55d9, 0x55c7, 0x55b4, 0x55a1, 0x558f, 0x557c, 0x5569, 
+  0x5556, 0x5544, 0x5531, 0x551e, 0x550b, 0x54f9, 0x54e6, 0x54d3, 
+  0x54c0, 0x54ad, 0x549a, 0x5488, 0x5475, 0x5462, 0x544f, 0x543c, 
+  0x5429, 0x5416, 0x5403, 0x53f0, 0x53dd, 0x53ca, 0x53b7, 0x53a4, 
+  0x5391, 0x537e, 0x536b, 0x5358, 0x5345, 0x5332, 0x531f, 0x530c, 
+  0x52f8, 0x52e5, 0x52d2, 0x52bf, 0x52ac, 0x5299, 0x5285, 0x5272, 
+  0x525f, 0x524c, 0x5238, 0x5225, 0x5212, 0x51ff, 0x51eb, 0x51d8, 
+  0x51c5, 0x51b1, 0x519e, 0x518b, 0x5177, 0x5164, 0x5150, 0x513d, 
+  0x512a, 0x5116, 0x5103, 0x50ef, 0x50dc, 0x50c8, 0x50b5, 0x50a1, 
+  0x508e, 0x507a, 0x5067, 0x5053, 0x503f, 0x502c, 0x5018, 0x5005, 
+  0x4ff1, 0x4fdd, 0x4fca, 0x4fb6, 0x4fa2, 0x4f8f, 0x4f7b, 0x4f67, 
+  0x4f54, 0x4f40, 0x4f2c, 0x4f18, 0x4f05, 0x4ef1, 0x4edd, 0x4ec9, 
+  0x4eb6, 0x4ea2, 0x4e8e, 0x4e7a, 0x4e66, 0x4e52, 0x4e3e, 0x4e2a, 
+  0x4e17, 0x4e03, 0x4def, 0x4ddb, 0x4dc7, 0x4db3, 0x4d9f, 0x4d8b, 
+  0x4d77, 0x4d63, 0x4d4f, 0x4d3b, 0x4d27, 0x4d13, 0x4cff, 0x4ceb, 
+  0x4cd6, 0x4cc2, 0x4cae, 0x4c9a, 0x4c86, 0x4c72, 0x4c5e, 0x4c49, 
+  0x4c35, 0x4c21, 0x4c0d, 0x4bf9, 0x4be4, 0x4bd0, 0x4bbc, 0x4ba8, 
+  0x4b93, 0x4b7f, 0x4b6b, 0x4b56, 0x4b42, 0x4b2e, 0x4b19, 0x4b05, 
+  0x4af1, 0x4adc, 0x4ac8, 0x4ab4, 0x4a9f, 0x4a8b, 0x4a76, 0x4a62, 
+  0x4a4d, 0x4a39, 0x4a24, 0x4a10, 0x49fb, 0x49e7, 0x49d2, 0x49be, 
+  0x49a9, 0x4995, 0x4980, 0x496c, 0x4957, 0x4942, 0x492e, 0x4919, 
+  0x4905, 0x48f0, 0x48db, 0x48c7, 0x48b2, 0x489d, 0x4888, 0x4874, 
+  0x485f, 0x484a, 0x4836, 0x4821, 0x480c, 0x47f7, 0x47e2, 0x47ce, 
+  0x47b9, 0x47a4, 0x478f, 0x477a, 0x4765, 0x4751, 0x473c, 0x4727, 
+  0x4712, 0x46fd, 0x46e8, 0x46d3, 0x46be, 0x46a9, 0x4694, 0x467f, 
+  0x466a, 0x4655, 0x4640, 0x462b, 0x4616, 0x4601, 0x45ec, 0x45d7, 
+  0x45c2, 0x45ad, 0x4598, 0x4583, 0x456e, 0x4559, 0x4544, 0x452e, 
+  0x4519, 0x4504, 0x44ef, 0x44da, 0x44c5, 0x44af, 0x449a, 0x4485, 
+  0x4470, 0x445a, 0x4445, 0x4430, 0x441b, 0x4405, 0x43f0, 0x43db, 
+  0x43c5, 0x43b0, 0x439b, 0x4385, 0x4370, 0x435b, 0x4345, 0x4330, 
+  0x431b, 0x4305, 0x42f0, 0x42da, 0x42c5, 0x42af, 0x429a, 0x4284, 
+  0x426f, 0x425a, 0x4244, 0x422f, 0x4219, 0x4203, 0x41ee, 0x41d8, 
+  0x41c3, 0x41ad, 0x4198, 0x4182, 0x416d, 0x4157, 0x4141, 0x412c, 
+  0x4116, 0x4100, 0x40eb, 0x40d5, 0x40bf, 0x40aa, 0x4094, 0x407e, 
+  0x4069, 0x4053, 0x403d, 0x4027, 0x4012, 0x3ffc, 0x3fe6, 0x3fd0, 
+  0x3fbb, 0x3fa5, 0x3f8f, 0x3f79, 0x3f63, 0x3f4d, 0x3f38, 0x3f22, 
+  0x3f0c, 0x3ef6, 0x3ee0, 0x3eca, 0x3eb4, 0x3e9e, 0x3e88, 0x3e73, 
+  0x3e5d, 0x3e47, 0x3e31, 0x3e1b, 0x3e05, 0x3def, 0x3dd9, 0x3dc3, 
+  0x3dad, 0x3d97, 0x3d81, 0x3d6b, 0x3d55, 0x3d3e, 0x3d28, 0x3d12, 
+  0x3cfc, 0x3ce6, 0x3cd0, 0x3cba, 0x3ca4, 0x3c8e, 0x3c77, 0x3c61, 
+  0x3c4b, 0x3c35, 0x3c1f, 0x3c09, 0x3bf2, 0x3bdc, 0x3bc6, 0x3bb0, 
+  0x3b99, 0x3b83, 0x3b6d, 0x3b57, 0x3b40, 0x3b2a, 0x3b14, 0x3afe, 
+  0x3ae7, 0x3ad1, 0x3abb, 0x3aa4, 0x3a8e, 0x3a78, 0x3a61, 0x3a4b, 
+  0x3a34, 0x3a1e, 0x3a08, 0x39f1, 0x39db, 0x39c4, 0x39ae, 0x3998, 
+  0x3981, 0x396b, 0x3954, 0x393e, 0x3927, 0x3911, 0x38fa, 0x38e4, 
+  0x38cd, 0x38b7, 0x38a0, 0x388a, 0x3873, 0x385d, 0x3846, 0x382f, 
+  0x3819, 0x3802, 0x37ec, 0x37d5, 0x37be, 0x37a8, 0x3791, 0x377a, 
+  0x3764, 0x374d, 0x3736, 0x3720, 0x3709, 0x36f2, 0x36dc, 0x36c5, 
+  0x36ae, 0x3698, 0x3681, 0x366a, 0x3653, 0x363d, 0x3626, 0x360f, 
+  0x35f8, 0x35e1, 0x35cb, 0x35b4, 0x359d, 0x3586, 0x356f, 0x3558, 
+  0x3542, 0x352b, 0x3514, 0x34fd, 0x34e6, 0x34cf, 0x34b8, 0x34a1, 
+  0x348b, 0x3474, 0x345d, 0x3446, 0x342f, 0x3418, 0x3401, 0x33ea, 
+  0x33d3, 0x33bc, 0x33a5, 0x338e, 0x3377, 0x3360, 0x3349, 0x3332, 
+  0x331b, 0x3304, 0x32ed, 0x32d6, 0x32bf, 0x32a8, 0x3290, 0x3279, 
+  0x3262, 0x324b, 0x3234, 0x321d, 0x3206, 0x31ef, 0x31d8, 0x31c0, 
+  0x31a9, 0x3192, 0x317b, 0x3164, 0x314c, 0x3135, 0x311e, 0x3107, 
+  0x30f0, 0x30d8, 0x30c1, 0x30aa, 0x3093, 0x307b, 0x3064, 0x304d, 
+  0x3036, 0x301e, 0x3007, 0x2ff0, 0x2fd8, 0x2fc1, 0x2faa, 0x2f92, 
+  0x2f7b, 0x2f64, 0x2f4c, 0x2f35, 0x2f1e, 0x2f06, 0x2eef, 0x2ed8, 
+  0x2ec0, 0x2ea9, 0x2e91, 0x2e7a, 0x2e63, 0x2e4b, 0x2e34, 0x2e1c, 
+  0x2e05, 0x2ded, 0x2dd6, 0x2dbe, 0x2da7, 0x2d8f, 0x2d78, 0x2d60, 
+  0x2d49, 0x2d31, 0x2d1a, 0x2d02, 0x2ceb, 0x2cd3, 0x2cbc, 0x2ca4, 
+  0x2c8d, 0x2c75, 0x2c5e, 0x2c46, 0x2c2e, 0x2c17, 0x2bff, 0x2be8, 
+  0x2bd0, 0x2bb8, 0x2ba1, 0x2b89, 0x2b71, 0x2b5a, 0x2b42, 0x2b2b, 
+  0x2b13, 0x2afb, 0x2ae4, 0x2acc, 0x2ab4, 0x2a9c, 0x2a85, 0x2a6d, 
+  0x2a55, 0x2a3e, 0x2a26, 0x2a0e, 0x29f6, 0x29df, 0x29c7, 0x29af, 
+  0x2997, 0x2980, 0x2968, 0x2950, 0x2938, 0x2920, 0x2909, 0x28f1, 
+  0x28d9, 0x28c1, 0x28a9, 0x2892, 0x287a, 0x2862, 0x284a, 0x2832, 
+  0x281a, 0x2802, 0x27eb, 0x27d3, 0x27bb, 0x27a3, 0x278b, 0x2773, 
+  0x275b, 0x2743, 0x272b, 0x2713, 0x26fb, 0x26e4, 0x26cc, 0x26b4, 
+  0x269c, 0x2684, 0x266c, 0x2654, 0x263c, 0x2624, 0x260c, 0x25f4, 
+  0x25dc, 0x25c4, 0x25ac, 0x2594, 0x257c, 0x2564, 0x254c, 0x2534, 
+  0x251c, 0x2503, 0x24eb, 0x24d3, 0x24bb, 0x24a3, 0x248b, 0x2473, 
+  0x245b, 0x2443, 0x242b, 0x2413, 0x23fa, 0x23e2, 0x23ca, 0x23b2, 
+  0x239a, 0x2382, 0x236a, 0x2352, 0x2339, 0x2321, 0x2309, 0x22f1, 
+  0x22d9, 0x22c0, 0x22a8, 0x2290, 0x2278, 0x2260, 0x2247, 0x222f, 
+  0x2217, 0x21ff, 0x21e7, 0x21ce, 0x21b6, 0x219e, 0x2186, 0x216d, 
+  0x2155, 0x213d, 0x2125, 0x210c, 0x20f4, 0x20dc, 0x20c3, 0x20ab, 
+  0x2093, 0x207a, 0x2062, 0x204a, 0x2032, 0x2019, 0x2001, 0x1fe9, 
+  0x1fd0, 0x1fb8, 0x1f9f, 0x1f87, 0x1f6f, 0x1f56, 0x1f3e, 0x1f26, 
+  0x1f0d, 0x1ef5, 0x1edd, 0x1ec4, 0x1eac, 0x1e93, 0x1e7b, 0x1e62, 
+  0x1e4a, 0x1e32, 0x1e19, 0x1e01, 0x1de8, 0x1dd0, 0x1db7, 0x1d9f, 
+  0x1d87, 0x1d6e, 0x1d56, 0x1d3d, 0x1d25, 0x1d0c, 0x1cf4, 0x1cdb, 
+  0x1cc3, 0x1caa, 0x1c92, 0x1c79, 0x1c61, 0x1c48, 0x1c30, 0x1c17, 
+  0x1bff, 0x1be6, 0x1bce, 0x1bb5, 0x1b9d, 0x1b84, 0x1b6c, 0x1b53, 
+  0x1b3a, 0x1b22, 0x1b09, 0x1af1, 0x1ad8, 0x1ac0, 0x1aa7, 0x1a8e, 
+  0x1a76, 0x1a5d, 0x1a45, 0x1a2c, 0x1a13, 0x19fb, 0x19e2, 0x19ca, 
+  0x19b1, 0x1998, 0x1980, 0x1967, 0x194e, 0x1936, 0x191d, 0x1905, 
+  0x18ec, 0x18d3, 0x18bb, 0x18a2, 0x1889, 0x1871, 0x1858, 0x183f, 
+  0x1827, 0x180e, 0x17f5, 0x17dd, 0x17c4, 0x17ab, 0x1792, 0x177a, 
+  0x1761, 0x1748, 0x1730, 0x1717, 0x16fe, 0x16e5, 0x16cd, 0x16b4, 
+  0x169b, 0x1682, 0x166a, 0x1651, 0x1638, 0x161f, 0x1607, 0x15ee, 
+  0x15d5, 0x15bc, 0x15a4, 0x158b, 0x1572, 0x1559, 0x1541, 0x1528, 
+  0x150f, 0x14f6, 0x14dd, 0x14c5, 0x14ac, 0x1493, 0x147a, 0x1461, 
+  0x1449, 0x1430, 0x1417, 0x13fe, 0x13e5, 0x13cc, 0x13b4, 0x139b, 
+  0x1382, 0x1369, 0x1350, 0x1337, 0x131f, 0x1306, 0x12ed, 0x12d4, 
+  0x12bb, 0x12a2, 0x1289, 0x1271, 0x1258, 0x123f, 0x1226, 0x120d, 
+  0x11f4, 0x11db, 0x11c2, 0x11a9, 0x1191, 0x1178, 0x115f, 0x1146, 
+  0x112d, 0x1114, 0x10fb, 0x10e2, 0x10c9, 0x10b0, 0x1098, 0x107f, 
+  0x1066, 0x104d, 0x1034, 0x101b, 0x1002, 0xfe9, 0xfd0, 0xfb7, 
+  0xf9e, 0xf85, 0xf6c, 0xf53, 0xf3a, 0xf21, 0xf08, 0xef0, 
+  0xed7, 0xebe, 0xea5, 0xe8c, 0xe73, 0xe5a, 0xe41, 0xe28, 
+  0xe0f, 0xdf6, 0xddd, 0xdc4, 0xdab, 0xd92, 0xd79, 0xd60, 
+  0xd47, 0xd2e, 0xd15, 0xcfc, 0xce3, 0xcca, 0xcb1, 0xc98, 
+  0xc7f, 0xc66, 0xc4d, 0xc34, 0xc1b, 0xc02, 0xbe9, 0xbd0, 
+  0xbb7, 0xb9e, 0xb85, 0xb6c, 0xb53, 0xb3a, 0xb20, 0xb07, 
+  0xaee, 0xad5, 0xabc, 0xaa3, 0xa8a, 0xa71, 0xa58, 0xa3f, 
+  0xa26, 0xa0d, 0x9f4, 0x9db, 0x9c2, 0x9a9, 0x990, 0x977, 
+  0x95e, 0x944, 0x92b, 0x912, 0x8f9, 0x8e0, 0x8c7, 0x8ae, 
+  0x895, 0x87c, 0x863, 0x84a, 0x831, 0x818, 0x7fe, 0x7e5, 
+  0x7cc, 0x7b3, 0x79a, 0x781, 0x768, 0x74f, 0x736, 0x71d, 
+  0x704, 0x6ea, 0x6d1, 0x6b8, 0x69f, 0x686, 0x66d, 0x654, 
+  0x63b, 0x622, 0x609, 0x5ef, 0x5d6, 0x5bd, 0x5a4, 0x58b, 
+  0x572, 0x559, 0x540, 0x527, 0x50d, 0x4f4, 0x4db, 0x4c2, 
+  0x4a9, 0x490, 0x477, 0x45e, 0x445, 0x42b, 0x412, 0x3f9, 
+  0x3e0, 0x3c7, 0x3ae, 0x395, 0x37c, 0x362, 0x349, 0x330, 
+  0x317, 0x2fe, 0x2e5, 0x2cc, 0x2b3, 0x299, 0x280, 0x267, 
+  0x24e, 0x235, 0x21c, 0x203, 0x1ea, 0x1d0, 0x1b7, 0x19e, 
+  0x185, 0x16c, 0x153, 0x13a, 0x121, 0x107, 0xee, 0xd5, 
+  0xbc, 0xa3, 0x8a, 0x71, 0x57, 0x3e, 0x25, 0xc, 
+ 
+}; 
+ 
+/**  
+ * @brief  Initialization function for the Q15 DCT4/IDCT4. 
+ * @param[in,out] *S         points to an instance of Q15 DCT4/IDCT4 structure. 
+ * @param[in]     *S_RFFT    points to an instance of Q15 RFFT/RIFFT structure. 
+ * @param[in]     *S_CFFT    points to an instance of Q15 CFFT/CIFFT structure. 
+ * @param[in]     N          length of the DCT4. 
+ * @param[in]     Nby2       half of the length of the DCT4. 
+ * @param[in]     normalize  normalizing factor. 
+ * @return  	  arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length. 
+ * \par Normalizing factor:  
+ * The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.  
+ * Normalizing factors in 1.15 format are mentioned in the table below for different DCT sizes:  
+ * \image html dct4NormalizingQ15Table.gif  
+ */ 
+ 
+arm_status arm_dct4_init_q15( 
+  arm_dct4_instance_q15 * S, 
+  arm_rfft_instance_q15 * S_RFFT, 
+  arm_cfft_radix4_instance_q15 * S_CFFT, 
+  uint16_t N, 
+  uint16_t Nby2, 
+  q15_t normalize) 
+{ 
+  /*  Initialise the default arm status */ 
+  arm_status status = ARM_MATH_SUCCESS; 
+ 
+  /* Initializing the pointer array with the weight table base addresses of different lengths */ 
+  q15_t *twiddlePtr[3] = { (q15_t *) WeightsQ15_128, (q15_t *) WeightsQ15_512, 
+    (q15_t *) WeightsQ15_2048 
+  }; 
+ 
+  /* Initializing the pointer array with the cos factor table base addresses of different lengths */ 
+  q15_t *pCosFactor[3] = 
+    { (q15_t *) cos_factorsQ15_128, (q15_t *) cos_factorsQ15_512, 
+    (q15_t *) cos_factorsQ15_2048 
+  }; 
+ 
+  /* Initialize the DCT4 length */ 
+  S->N = N; 
+ 
+  /* Initialize the half of DCT4 length */ 
+  S->Nby2 = Nby2; 
+ 
+  /* Initialize the DCT4 Normalizing factor */ 
+  S->normalize = normalize; 
+ 
+  /* Initialize Real FFT Instance */ 
+  S->pRfft = S_RFFT; 
+ 
+  /* Initialize Complex FFT Instance */ 
+  S->pCfft = S_CFFT; 
+ 
+  switch (N) 
+  { 
+    /* Initialize the table modifier values */ 
+  case 2048u: 
+    S->pTwiddle = twiddlePtr[2]; 
+    S->pCosFactor = pCosFactor[2]; 
+    break; 
+  case 512u: 
+    S->pTwiddle = twiddlePtr[1]; 
+    S->pCosFactor = pCosFactor[1]; 
+    break; 
+  case 128u: 
+    S->pTwiddle = twiddlePtr[0]; 
+    S->pCosFactor = pCosFactor[0]; 
+    break; 
+  default: 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+  } 
+ 
+  /* Initialize the RFFT/RIFFT */ 
+  arm_rfft_init_q15(S->pRfft, S->pCfft, S->N, 0u, 1u); 
+ 
+  /* return the status of DCT4 Init function */ 
+  return (status); 
+} 
+ 
+/**  
+   * @} end of DCT4_IDCT4 group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_dct4_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,2195 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_dct4_init_q31.c  
+*  
+* Description:	Initialization function of DCT-4 & IDCT4 Q31  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @addtogroup DCT4_IDCT4  
+ * @{  
+ */ 
+ 
+/*  
+* @brief  Weights Table  
+*/ 
+ 
+/**  
+* \par  
+* Weights tables are generated using the formula : <pre>weights[n] = e^(-j*n*pi/(2*N))</pre>  
+* \par  
+* C command to generate the table  
+* <pre>  
+* for(i = 0; i< N; i++)  
+* {  
+*   weights[2*i]= cos(i*c);  
+*   weights[(2*i)+1]= -sin(i * c);  
+* } </pre>  
+* \par  
+* where <code>N</code> is the Number of weights to be calculated and <code>c</code> is <code>pi/(2*N)</code>  
+* \par  
+* Convert the output to q31 format by multiplying with 2^31 and saturated if required.  
+* \par  
+* In the tables below the real and imaginary values are placed alternatively, hence the  
+* array length is <code>2*N</code>.  
+*/ 
+ 
+static const q31_t WeightsQ31_128[256] = { 
+  0x7fffffff, 0x0, 0x7ffd885a, 0xfe6de2e0, 0x7ff62182, 0xfcdbd541, 0x7fe9cbc0, 
+  0xfb49e6a3, 
+  0x7fd8878e, 0xf9b82684, 0x7fc25596, 0xf826a462, 0x7fa736b4, 0xf6956fb7, 
+  0x7f872bf3, 0xf50497fb, 
+  0x7f62368f, 0xf3742ca2, 0x7f3857f6, 0xf1e43d1c, 0x7f0991c4, 0xf054d8d5, 
+  0x7ed5e5c6, 0xeec60f31, 
+  0x7e9d55fc, 0xed37ef91, 0x7e5fe493, 0xebaa894f, 0x7e1d93ea, 0xea1debbb, 
+  0x7dd6668f, 0xe8922622, 
+  0x7d8a5f40, 0xe70747c4, 0x7d3980ec, 0xe57d5fda, 0x7ce3ceb2, 0xe3f47d96, 
+  0x7c894bde, 0xe26cb01b, 
+  0x7c29fbee, 0xe0e60685, 0x7bc5e290, 0xdf608fe4, 0x7b5d039e, 0xdddc5b3b, 
+  0x7aef6323, 0xdc597781, 
+  0x7a7d055b, 0xdad7f3a2, 0x7a05eead, 0xd957de7a, 0x798a23b1, 0xd7d946d8, 
+  0x7909a92d, 0xd65c3b7b, 
+  0x78848414, 0xd4e0cb15, 0x77fab989, 0xd3670446, 0x776c4edb, 0xd1eef59e, 
+  0x76d94989, 0xd078ad9e, 
+  0x7641af3d, 0xcf043ab3, 0x75a585cf, 0xcd91ab39, 0x7504d345, 0xcc210d79, 
+  0x745f9dd1, 0xcab26fa9, 
+  0x73b5ebd1, 0xc945dfec, 0x7307c3d0, 0xc7db6c50, 0x72552c85, 0xc67322ce, 
+  0x719e2cd2, 0xc50d1149, 
+  0x70e2cbc6, 0xc3a94590, 0x7023109a, 0xc247cd5a, 0x6f5f02b2, 0xc0e8b648, 
+  0x6e96a99d, 0xbf8c0de3, 
+  0x6dca0d14, 0xbe31e19b, 0x6cf934fc, 0xbcda3ecb, 0x6c242960, 0xbb8532b0, 
+  0x6b4af279, 0xba32ca71, 
+  0x6a6d98a4, 0xb8e31319, 0x698c246c, 0xb796199b, 0x68a69e81, 0xb64beacd, 
+  0x67bd0fbd, 0xb5049368, 
+  0x66cf8120, 0xb3c0200c, 0x65ddfbd3, 0xb27e9d3c, 0x64e88926, 0xb140175b, 
+  0x63ef3290, 0xb0049ab3, 
+  0x62f201ac, 0xaecc336c, 0x61f1003f, 0xad96ed92, 0x60ec3830, 0xac64d510, 
+  0x5fe3b38d, 0xab35f5b5, 
+  0x5ed77c8a, 0xaa0a5b2e, 0x5dc79d7c, 0xa8e21106, 0x5cb420e0, 0xa7bd22ac, 
+  0x5b9d1154, 0xa69b9b68, 
+  0x5a82799a, 0xa57d8666, 0x59646498, 0xa462eeac, 0x5842dd54, 0xa34bdf20, 
+  0x571deefa, 0xa2386284, 
+  0x55f5a4d2, 0xa1288376, 0x54ca0a4b, 0xa01c4c73, 0x539b2af0, 0x9f13c7d0, 
+  0x5269126e, 0x9e0effc1, 
+  0x5133cc94, 0x9d0dfe54, 0x4ffb654d, 0x9c10cd70, 0x4ebfe8a5, 0x9b1776da, 
+  0x4d8162c4, 0x9a22042d, 
+  0x4c3fdff4, 0x99307ee0, 0x4afb6c98, 0x9842f043, 0x49b41533, 0x9759617f, 
+  0x4869e665, 0x9673db94, 
+  0x471cece7, 0x9592675c, 0x45cd358f, 0x94b50d87, 0x447acd50, 0x93dbd6a0, 
+  0x4325c135, 0x9306cb04, 
+  0x41ce1e65, 0x9235f2ec, 0x4073f21d, 0x91695663, 0x3f1749b8, 0x90a0fd4e, 
+  0x3db832a6, 0x8fdcef66, 
+  0x3c56ba70, 0x8f1d343a, 0x3af2eeb7, 0x8e61d32e, 0x398cdd32, 0x8daad37b, 
+  0x382493b0, 0x8cf83c30, 
+  0x36ba2014, 0x8c4a142f, 0x354d9057, 0x8ba0622f, 0x33def287, 0x8afb2cbb, 
+  0x326e54c7, 0x8a5a7a31, 
+  0x30fbc54d, 0x89be50c3, 0x2f875262, 0x8926b677, 0x2e110a62, 0x8893b125, 
+  0x2c98fbba, 0x88054677, 
+  0x2b1f34eb, 0x877b7bec, 0x29a3c485, 0x86f656d3, 0x2826b928, 0x8675dc4f, 
+  0x26a82186, 0x85fa1153, 
+  0x25280c5e, 0x8582faa5, 0x23a6887f, 0x85109cdd, 0x2223a4c5, 0x84a2fc62, 
+  0x209f701c, 0x843a1d70, 
+  0x1f19f97b, 0x83d60412, 0x1d934fe5, 0x8376b422, 0x1c0b826a, 0x831c314e, 
+  0x1a82a026, 0x82c67f14, 
+  0x18f8b83c, 0x8275a0c0, 0x176dd9de, 0x82299971, 0x15e21445, 0x81e26c16, 
+  0x145576b1, 0x81a01b6d, 
+  0x12c8106f, 0x8162aa04, 0x1139f0cf, 0x812a1a3a, 0xfab272b, 0x80f66e3c, 
+  0xe1bc2e4, 0x80c7a80a, 
+  0xc8bd35e, 0x809dc971, 0xafb6805, 0x8078d40d, 0x96a9049, 0x8058c94c, 
+  0x7d95b9e, 0x803daa6a, 
+  0x647d97c, 0x80277872, 0x4b6195d, 0x80163440, 0x3242abf, 0x8009de7e, 
+  0x1921d20, 0x800277a6, 
+}; 
+ 
+static const q31_t WeightsQ31_512[1024] = { 
+  0x7fffffff, 0x0, 0x7fffd886, 0xff9b781d, 0x7fff6216, 0xff36f078, 0x7ffe9cb2, 
+  0xfed2694f, 
+  0x7ffd885a, 0xfe6de2e0, 0x7ffc250f, 0xfe095d69, 0x7ffa72d1, 0xfda4d929, 
+  0x7ff871a2, 0xfd40565c, 
+  0x7ff62182, 0xfcdbd541, 0x7ff38274, 0xfc775616, 0x7ff09478, 0xfc12d91a, 
+  0x7fed5791, 0xfbae5e89, 
+  0x7fe9cbc0, 0xfb49e6a3, 0x7fe5f108, 0xfae571a4, 0x7fe1c76b, 0xfa80ffcb, 
+  0x7fdd4eec, 0xfa1c9157, 
+  0x7fd8878e, 0xf9b82684, 0x7fd37153, 0xf953bf91, 0x7fce0c3e, 0xf8ef5cbb, 
+  0x7fc85854, 0xf88afe42, 
+  0x7fc25596, 0xf826a462, 0x7fbc040a, 0xf7c24f59, 0x7fb563b3, 0xf75dff66, 
+  0x7fae7495, 0xf6f9b4c6, 
+  0x7fa736b4, 0xf6956fb7, 0x7f9faa15, 0xf6313077, 0x7f97cebd, 0xf5ccf743, 
+  0x7f8fa4b0, 0xf568c45b, 
+  0x7f872bf3, 0xf50497fb, 0x7f7e648c, 0xf4a07261, 0x7f754e80, 0xf43c53cb, 
+  0x7f6be9d4, 0xf3d83c77, 
+  0x7f62368f, 0xf3742ca2, 0x7f5834b7, 0xf310248a, 0x7f4de451, 0xf2ac246e, 
+  0x7f434563, 0xf2482c8a, 
+  0x7f3857f6, 0xf1e43d1c, 0x7f2d1c0e, 0xf1805662, 0x7f2191b4, 0xf11c789a, 
+  0x7f15b8ee, 0xf0b8a401, 
+  0x7f0991c4, 0xf054d8d5, 0x7efd1c3c, 0xeff11753, 0x7ef05860, 0xef8d5fb8, 
+  0x7ee34636, 0xef29b243, 
+  0x7ed5e5c6, 0xeec60f31, 0x7ec8371a, 0xee6276bf, 0x7eba3a39, 0xedfee92b, 
+  0x7eabef2c, 0xed9b66b2, 
+  0x7e9d55fc, 0xed37ef91, 0x7e8e6eb2, 0xecd48407, 0x7e7f3957, 0xec71244f, 
+  0x7e6fb5f4, 0xec0dd0a8, 
+  0x7e5fe493, 0xebaa894f, 0x7e4fc53e, 0xeb474e81, 0x7e3f57ff, 0xeae4207a, 
+  0x7e2e9cdf, 0xea80ff7a, 
+  0x7e1d93ea, 0xea1debbb, 0x7e0c3d29, 0xe9bae57d, 0x7dfa98a8, 0xe957ecfb, 
+  0x7de8a670, 0xe8f50273, 
+  0x7dd6668f, 0xe8922622, 0x7dc3d90d, 0xe82f5844, 0x7db0fdf8, 0xe7cc9917, 
+  0x7d9dd55a, 0xe769e8d8, 
+  0x7d8a5f40, 0xe70747c4, 0x7d769bb5, 0xe6a4b616, 0x7d628ac6, 0xe642340d, 
+  0x7d4e2c7f, 0xe5dfc1e5, 
+  0x7d3980ec, 0xe57d5fda, 0x7d24881b, 0xe51b0e2a, 0x7d0f4218, 0xe4b8cd11, 
+  0x7cf9aef0, 0xe4569ccb, 
+  0x7ce3ceb2, 0xe3f47d96, 0x7ccda169, 0xe3926fad, 0x7cb72724, 0xe330734d, 
+  0x7ca05ff1, 0xe2ce88b3, 
+  0x7c894bde, 0xe26cb01b, 0x7c71eaf9, 0xe20ae9c1, 0x7c5a3d50, 0xe1a935e2, 
+  0x7c4242f2, 0xe14794ba, 
+  0x7c29fbee, 0xe0e60685, 0x7c116853, 0xe0848b7f, 0x7bf88830, 0xe02323e5, 
+  0x7bdf5b94, 0xdfc1cff3, 
+  0x7bc5e290, 0xdf608fe4, 0x7bac1d31, 0xdeff63f4, 0x7b920b89, 0xde9e4c60, 
+  0x7b77ada8, 0xde3d4964, 
+  0x7b5d039e, 0xdddc5b3b, 0x7b420d7a, 0xdd7b8220, 0x7b26cb4f, 0xdd1abe51, 
+  0x7b0b3d2c, 0xdcba1008, 
+  0x7aef6323, 0xdc597781, 0x7ad33d45, 0xdbf8f4f8, 0x7ab6cba4, 0xdb9888a8, 
+  0x7a9a0e50, 0xdb3832cd, 
+  0x7a7d055b, 0xdad7f3a2, 0x7a5fb0d8, 0xda77cb63, 0x7a4210d8, 0xda17ba4a, 
+  0x7a24256f, 0xd9b7c094, 
+  0x7a05eead, 0xd957de7a, 0x79e76ca7, 0xd8f81439, 0x79c89f6e, 0xd898620c, 
+  0x79a98715, 0xd838c82d, 
+  0x798a23b1, 0xd7d946d8, 0x796a7554, 0xd779de47, 0x794a7c12, 0xd71a8eb5, 
+  0x792a37fe, 0xd6bb585e, 
+  0x7909a92d, 0xd65c3b7b, 0x78e8cfb2, 0xd5fd3848, 0x78c7aba2, 0xd59e4eff, 
+  0x78a63d11, 0xd53f7fda, 
+  0x78848414, 0xd4e0cb15, 0x786280bf, 0xd48230e9, 0x78403329, 0xd423b191, 
+  0x781d9b65, 0xd3c54d47, 
+  0x77fab989, 0xd3670446, 0x77d78daa, 0xd308d6c7, 0x77b417df, 0xd2aac504, 
+  0x7790583e, 0xd24ccf39, 
+  0x776c4edb, 0xd1eef59e, 0x7747fbce, 0xd191386e, 0x77235f2d, 0xd13397e2, 
+  0x76fe790e, 0xd0d61434, 
+  0x76d94989, 0xd078ad9e, 0x76b3d0b4, 0xd01b6459, 0x768e0ea6, 0xcfbe389f, 
+  0x76680376, 0xcf612aaa, 
+  0x7641af3d, 0xcf043ab3, 0x761b1211, 0xcea768f2, 0x75f42c0b, 0xce4ab5a2, 
+  0x75ccfd42, 0xcdee20fc, 
+  0x75a585cf, 0xcd91ab39, 0x757dc5ca, 0xcd355491, 0x7555bd4c, 0xccd91d3d, 
+  0x752d6c6c, 0xcc7d0578, 
+  0x7504d345, 0xcc210d79, 0x74dbf1ef, 0xcbc53579, 0x74b2c884, 0xcb697db0, 
+  0x7489571c, 0xcb0de658, 
+  0x745f9dd1, 0xcab26fa9, 0x74359cbd, 0xca5719db, 0x740b53fb, 0xc9fbe527, 
+  0x73e0c3a3, 0xc9a0d1c5, 
+  0x73b5ebd1, 0xc945dfec, 0x738acc9e, 0xc8eb0fd6, 0x735f6626, 0xc89061ba, 
+  0x7333b883, 0xc835d5d0, 
+  0x7307c3d0, 0xc7db6c50, 0x72db8828, 0xc7812572, 0x72af05a7, 0xc727016d, 
+  0x72823c67, 0xc6cd0079, 
+  0x72552c85, 0xc67322ce, 0x7227d61c, 0xc61968a2, 0x71fa3949, 0xc5bfd22e, 
+  0x71cc5626, 0xc5665fa9, 
+  0x719e2cd2, 0xc50d1149, 0x716fbd68, 0xc4b3e746, 0x71410805, 0xc45ae1d7, 
+  0x71120cc5, 0xc4020133, 
+  0x70e2cbc6, 0xc3a94590, 0x70b34525, 0xc350af26, 0x708378ff, 0xc2f83e2a, 
+  0x70536771, 0xc29ff2d4, 
+  0x7023109a, 0xc247cd5a, 0x6ff27497, 0xc1efcdf3, 0x6fc19385, 0xc197f4d4, 
+  0x6f906d84, 0xc1404233, 
+  0x6f5f02b2, 0xc0e8b648, 0x6f2d532c, 0xc0915148, 0x6efb5f12, 0xc03a1368, 
+  0x6ec92683, 0xbfe2fcdf, 
+  0x6e96a99d, 0xbf8c0de3, 0x6e63e87f, 0xbf3546a8, 0x6e30e34a, 0xbedea765, 
+  0x6dfd9a1c, 0xbe88304f, 
+  0x6dca0d14, 0xbe31e19b, 0x6d963c54, 0xbddbbb7f, 0x6d6227fa, 0xbd85be30, 
+  0x6d2dd027, 0xbd2fe9e2, 
+  0x6cf934fc, 0xbcda3ecb, 0x6cc45698, 0xbc84bd1f, 0x6c8f351c, 0xbc2f6513, 
+  0x6c59d0a9, 0xbbda36dd, 
+  0x6c242960, 0xbb8532b0, 0x6bee3f62, 0xbb3058c0, 0x6bb812d1, 0xbadba943, 
+  0x6b81a3cd, 0xba87246d, 
+  0x6b4af279, 0xba32ca71, 0x6b13fef5, 0xb9de9b83, 0x6adcc964, 0xb98a97d8, 
+  0x6aa551e9, 0xb936bfa4, 
+  0x6a6d98a4, 0xb8e31319, 0x6a359db9, 0xb88f926d, 0x69fd614a, 0xb83c3dd1, 
+  0x69c4e37a, 0xb7e9157a, 
+  0x698c246c, 0xb796199b, 0x69532442, 0xb7434a67, 0x6919e320, 0xb6f0a812, 
+  0x68e06129, 0xb69e32cd, 
+  0x68a69e81, 0xb64beacd, 0x686c9b4b, 0xb5f9d043, 0x683257ab, 0xb5a7e362, 
+  0x67f7d3c5, 0xb556245e, 
+  0x67bd0fbd, 0xb5049368, 0x67820bb7, 0xb4b330b3, 0x6746c7d8, 0xb461fc70, 
+  0x670b4444, 0xb410f6d3, 
+  0x66cf8120, 0xb3c0200c, 0x66937e91, 0xb36f784f, 0x66573cbb, 0xb31effcc, 
+  0x661abbc5, 0xb2ceb6b5, 
+  0x65ddfbd3, 0xb27e9d3c, 0x65a0fd0b, 0xb22eb392, 0x6563bf92, 0xb1def9e9, 
+  0x6526438f, 0xb18f7071, 
+  0x64e88926, 0xb140175b, 0x64aa907f, 0xb0f0eeda, 0x646c59bf, 0xb0a1f71d, 
+  0x642de50d, 0xb0533055, 
+  0x63ef3290, 0xb0049ab3, 0x63b0426d, 0xafb63667, 0x637114cc, 0xaf6803a2, 
+  0x6331a9d4, 0xaf1a0293, 
+  0x62f201ac, 0xaecc336c, 0x62b21c7b, 0xae7e965b, 0x6271fa69, 0xae312b92, 
+  0x62319b9d, 0xade3f33e, 
+  0x61f1003f, 0xad96ed92, 0x61b02876, 0xad4a1aba, 0x616f146c, 0xacfd7ae8, 
+  0x612dc447, 0xacb10e4b, 
+  0x60ec3830, 0xac64d510, 0x60aa7050, 0xac18cf69, 0x60686ccf, 0xabccfd83, 
+  0x60262dd6, 0xab815f8d, 
+  0x5fe3b38d, 0xab35f5b5, 0x5fa0fe1f, 0xaaeac02c, 0x5f5e0db3, 0xaa9fbf1e, 
+  0x5f1ae274, 0xaa54f2ba, 
+  0x5ed77c8a, 0xaa0a5b2e, 0x5e93dc1f, 0xa9bff8a8, 0x5e50015d, 0xa975cb57, 
+  0x5e0bec6e, 0xa92bd367, 
+  0x5dc79d7c, 0xa8e21106, 0x5d8314b1, 0xa8988463, 0x5d3e5237, 0xa84f2daa, 
+  0x5cf95638, 0xa8060d08, 
+  0x5cb420e0, 0xa7bd22ac, 0x5c6eb258, 0xa7746ec0, 0x5c290acc, 0xa72bf174, 
+  0x5be32a67, 0xa6e3aaf2, 
+  0x5b9d1154, 0xa69b9b68, 0x5b56bfbd, 0xa653c303, 0x5b1035cf, 0xa60c21ee, 
+  0x5ac973b5, 0xa5c4b855, 
+  0x5a82799a, 0xa57d8666, 0x5a3b47ab, 0xa5368c4b, 0x59f3de12, 0xa4efca31, 
+  0x59ac3cfd, 0xa4a94043, 
+  0x59646498, 0xa462eeac, 0x591c550e, 0xa41cd599, 0x58d40e8c, 0xa3d6f534, 
+  0x588b9140, 0xa3914da8, 
+  0x5842dd54, 0xa34bdf20, 0x57f9f2f8, 0xa306a9c8, 0x57b0d256, 0xa2c1adc9, 
+  0x57677b9d, 0xa27ceb4f, 
+  0x571deefa, 0xa2386284, 0x56d42c99, 0xa1f41392, 0x568a34a9, 0xa1affea3, 
+  0x56400758, 0xa16c23e1, 
+  0x55f5a4d2, 0xa1288376, 0x55ab0d46, 0xa0e51d8c, 0x556040e2, 0xa0a1f24d, 
+  0x55153fd4, 0xa05f01e1, 
+  0x54ca0a4b, 0xa01c4c73, 0x547ea073, 0x9fd9d22a, 0x5433027d, 0x9f979331, 
+  0x53e73097, 0x9f558fb0, 
+  0x539b2af0, 0x9f13c7d0, 0x534ef1b5, 0x9ed23bb9, 0x53028518, 0x9e90eb94, 
+  0x52b5e546, 0x9e4fd78a, 
+  0x5269126e, 0x9e0effc1, 0x521c0cc2, 0x9dce6463, 0x51ced46e, 0x9d8e0597, 
+  0x518169a5, 0x9d4de385, 
+  0x5133cc94, 0x9d0dfe54, 0x50e5fd6d, 0x9cce562c, 0x5097fc5e, 0x9c8eeb34, 
+  0x5049c999, 0x9c4fbd93, 
+  0x4ffb654d, 0x9c10cd70, 0x4faccfab, 0x9bd21af3, 0x4f5e08e3, 0x9b93a641, 
+  0x4f0f1126, 0x9b556f81, 
+  0x4ebfe8a5, 0x9b1776da, 0x4e708f8f, 0x9ad9bc71, 0x4e210617, 0x9a9c406e, 
+  0x4dd14c6e, 0x9a5f02f5, 
+  0x4d8162c4, 0x9a22042d, 0x4d31494b, 0x99e5443b, 0x4ce10034, 0x99a8c345, 
+  0x4c9087b1, 0x996c816f, 
+  0x4c3fdff4, 0x99307ee0, 0x4bef092d, 0x98f4bbbc, 0x4b9e0390, 0x98b93828, 
+  0x4b4ccf4d, 0x987df449, 
+  0x4afb6c98, 0x9842f043, 0x4aa9dba2, 0x98082c3b, 0x4a581c9e, 0x97cda855, 
+  0x4a062fbd, 0x979364b5, 
+  0x49b41533, 0x9759617f, 0x4961cd33, 0x971f9ed7, 0x490f57ee, 0x96e61ce0, 
+  0x48bcb599, 0x96acdbbe, 
+  0x4869e665, 0x9673db94, 0x4816ea86, 0x963b1c86, 0x47c3c22f, 0x96029eb6, 
+  0x47706d93, 0x95ca6247, 
+  0x471cece7, 0x9592675c, 0x46c9405c, 0x955aae17, 0x46756828, 0x9523369c, 
+  0x4621647d, 0x94ec010b, 
+  0x45cd358f, 0x94b50d87, 0x4578db93, 0x947e5c33, 0x452456bd, 0x9447ed2f, 
+  0x44cfa740, 0x9411c09e, 
+  0x447acd50, 0x93dbd6a0, 0x4425c923, 0x93a62f57, 0x43d09aed, 0x9370cae4, 
+  0x437b42e1, 0x933ba968, 
+  0x4325c135, 0x9306cb04, 0x42d0161e, 0x92d22fd9, 0x427a41d0, 0x929dd806, 
+  0x42244481, 0x9269c3ac, 
+  0x41ce1e65, 0x9235f2ec, 0x4177cfb1, 0x920265e4, 0x4121589b, 0x91cf1cb6, 
+  0x40cab958, 0x919c1781, 
+  0x4073f21d, 0x91695663, 0x401d0321, 0x9136d97d, 0x3fc5ec98, 0x9104a0ee, 
+  0x3f6eaeb8, 0x90d2acd4, 
+  0x3f1749b8, 0x90a0fd4e, 0x3ebfbdcd, 0x906f927c, 0x3e680b2c, 0x903e6c7b, 
+  0x3e10320d, 0x900d8b69, 
+  0x3db832a6, 0x8fdcef66, 0x3d600d2c, 0x8fac988f, 0x3d07c1d6, 0x8f7c8701, 
+  0x3caf50da, 0x8f4cbadb, 
+  0x3c56ba70, 0x8f1d343a, 0x3bfdfecd, 0x8eedf33b, 0x3ba51e29, 0x8ebef7fb, 
+  0x3b4c18ba, 0x8e904298, 
+  0x3af2eeb7, 0x8e61d32e, 0x3a99a057, 0x8e33a9da, 0x3a402dd2, 0x8e05c6b7, 
+  0x39e6975e, 0x8dd829e4, 
+  0x398cdd32, 0x8daad37b, 0x3932ff87, 0x8d7dc399, 0x38d8fe93, 0x8d50fa59, 
+  0x387eda8e, 0x8d2477d8, 
+  0x382493b0, 0x8cf83c30, 0x37ca2a30, 0x8ccc477d, 0x376f9e46, 0x8ca099da, 
+  0x3714f02a, 0x8c753362, 
+  0x36ba2014, 0x8c4a142f, 0x365f2e3b, 0x8c1f3c5d, 0x36041ad9, 0x8bf4ac05, 
+  0x35a8e625, 0x8bca6343, 
+  0x354d9057, 0x8ba0622f, 0x34f219a8, 0x8b76a8e4, 0x34968250, 0x8b4d377c, 
+  0x343aca87, 0x8b240e11, 
+  0x33def287, 0x8afb2cbb, 0x3382fa88, 0x8ad29394, 0x3326e2c3, 0x8aaa42b4, 
+  0x32caab6f, 0x8a823a36, 
+  0x326e54c7, 0x8a5a7a31, 0x3211df04, 0x8a3302be, 0x31b54a5e, 0x8a0bd3f5, 
+  0x3158970e, 0x89e4edef, 
+  0x30fbc54d, 0x89be50c3, 0x309ed556, 0x8997fc8a, 0x3041c761, 0x8971f15a, 
+  0x2fe49ba7, 0x894c2f4c, 
+  0x2f875262, 0x8926b677, 0x2f29ebcc, 0x890186f2, 0x2ecc681e, 0x88dca0d3, 
+  0x2e6ec792, 0x88b80432, 
+  0x2e110a62, 0x8893b125, 0x2db330c7, 0x886fa7c2, 0x2d553afc, 0x884be821, 
+  0x2cf72939, 0x88287256, 
+  0x2c98fbba, 0x88054677, 0x2c3ab2b9, 0x87e2649b, 0x2bdc4e6f, 0x87bfccd7, 
+  0x2b7dcf17, 0x879d7f41, 
+  0x2b1f34eb, 0x877b7bec, 0x2ac08026, 0x8759c2ef, 0x2a61b101, 0x8738545e, 
+  0x2a02c7b8, 0x8717304e, 
+  0x29a3c485, 0x86f656d3, 0x2944a7a2, 0x86d5c802, 0x28e5714b, 0x86b583ee, 
+  0x288621b9, 0x86958aac, 
+  0x2826b928, 0x8675dc4f, 0x27c737d3, 0x865678eb, 0x27679df4, 0x86376092, 
+  0x2707ebc7, 0x86189359, 
+  0x26a82186, 0x85fa1153, 0x26483f6c, 0x85dbda91, 0x25e845b6, 0x85bdef28, 
+  0x2588349d, 0x85a04f28, 
+  0x25280c5e, 0x8582faa5, 0x24c7cd33, 0x8565f1b0, 0x24677758, 0x8549345c, 
+  0x24070b08, 0x852cc2bb, 
+  0x23a6887f, 0x85109cdd, 0x2345eff8, 0x84f4c2d4, 0x22e541af, 0x84d934b1, 
+  0x22847de0, 0x84bdf286, 
+  0x2223a4c5, 0x84a2fc62, 0x21c2b69c, 0x84885258, 0x2161b3a0, 0x846df477, 
+  0x21009c0c, 0x8453e2cf, 
+  0x209f701c, 0x843a1d70, 0x203e300d, 0x8420a46c, 0x1fdcdc1b, 0x840777d0, 
+  0x1f7b7481, 0x83ee97ad, 
+  0x1f19f97b, 0x83d60412, 0x1eb86b46, 0x83bdbd0e, 0x1e56ca1e, 0x83a5c2b0, 
+  0x1df5163f, 0x838e1507, 
+  0x1d934fe5, 0x8376b422, 0x1d31774d, 0x835fa00f, 0x1ccf8cb3, 0x8348d8dc, 
+  0x1c6d9053, 0x83325e97, 
+  0x1c0b826a, 0x831c314e, 0x1ba96335, 0x83065110, 0x1b4732ef, 0x82f0bde8, 
+  0x1ae4f1d6, 0x82db77e5, 
+  0x1a82a026, 0x82c67f14, 0x1a203e1b, 0x82b1d381, 0x19bdcbf3, 0x829d753a, 
+  0x195b49ea, 0x8289644b, 
+  0x18f8b83c, 0x8275a0c0, 0x18961728, 0x82622aa6, 0x183366e9, 0x824f0208, 
+  0x17d0a7bc, 0x823c26f3, 
+  0x176dd9de, 0x82299971, 0x170afd8d, 0x82175990, 0x16a81305, 0x82056758, 
+  0x16451a83, 0x81f3c2d7, 
+  0x15e21445, 0x81e26c16, 0x157f0086, 0x81d16321, 0x151bdf86, 0x81c0a801, 
+  0x14b8b17f, 0x81b03ac2, 
+  0x145576b1, 0x81a01b6d, 0x13f22f58, 0x81904a0c, 0x138edbb1, 0x8180c6a9, 
+  0x132b7bf9, 0x8171914e, 
+  0x12c8106f, 0x8162aa04, 0x1264994e, 0x815410d4, 0x120116d5, 0x8145c5c7, 
+  0x119d8941, 0x8137c8e6, 
+  0x1139f0cf, 0x812a1a3a, 0x10d64dbd, 0x811cb9ca, 0x1072a048, 0x810fa7a0, 
+  0x100ee8ad, 0x8102e3c4, 
+  0xfab272b, 0x80f66e3c, 0xf475bff, 0x80ea4712, 0xee38766, 0x80de6e4c, 
+  0xe7fa99e, 0x80d2e3f2, 
+  0xe1bc2e4, 0x80c7a80a, 0xdb7d376, 0x80bcba9d, 0xd53db92, 0x80b21baf, 
+  0xcefdb76, 0x80a7cb49, 
+  0xc8bd35e, 0x809dc971, 0xc27c389, 0x8094162c, 0xbc3ac35, 0x808ab180, 
+  0xb5f8d9f, 0x80819b74, 
+  0xafb6805, 0x8078d40d, 0xa973ba5, 0x80705b50, 0xa3308bd, 0x80683143, 
+  0x9cecf89, 0x806055eb, 
+  0x96a9049, 0x8058c94c, 0x9064b3a, 0x80518b6b, 0x8a2009a, 0x804a9c4d, 
+  0x83db0a7, 0x8043fbf6, 
+  0x7d95b9e, 0x803daa6a, 0x77501be, 0x8037a7ac, 0x710a345, 0x8031f3c2, 
+  0x6ac406f, 0x802c8ead, 
+  0x647d97c, 0x80277872, 0x5e36ea9, 0x8022b114, 0x57f0035, 0x801e3895, 
+  0x51a8e5c, 0x801a0ef8, 
+  0x4b6195d, 0x80163440, 0x451a177, 0x8012a86f, 0x3ed26e6, 0x800f6b88, 
+  0x388a9ea, 0x800c7d8c, 
+  0x3242abf, 0x8009de7e, 0x2bfa9a4, 0x80078e5e, 0x25b26d7, 0x80058d2f, 
+  0x1f6a297, 0x8003daf1, 
+  0x1921d20, 0x800277a6, 0x12d96b1, 0x8001634e, 0xc90f88, 0x80009dea, 
+  0x6487e3, 0x8000277a, 
+}; 
+ 
+static const q31_t WeightsQ31_2048[4096] = { 
+  0x7fffffff, 0x0, 0x7ffffd88, 0xffe6de05, 0x7ffff621, 0xffcdbc0b, 0x7fffe9cb, 
+  0xffb49a12, 
+  0x7fffd886, 0xff9b781d, 0x7fffc251, 0xff82562c, 0x7fffa72c, 0xff69343f, 
+  0x7fff8719, 0xff501258, 
+  0x7fff6216, 0xff36f078, 0x7fff3824, 0xff1dcea0, 0x7fff0943, 0xff04acd0, 
+  0x7ffed572, 0xfeeb8b0a, 
+  0x7ffe9cb2, 0xfed2694f, 0x7ffe5f03, 0xfeb947a0, 0x7ffe1c65, 0xfea025fd, 
+  0x7ffdd4d7, 0xfe870467, 
+  0x7ffd885a, 0xfe6de2e0, 0x7ffd36ee, 0xfe54c169, 0x7ffce093, 0xfe3ba002, 
+  0x7ffc8549, 0xfe227eac, 
+  0x7ffc250f, 0xfe095d69, 0x7ffbbfe6, 0xfdf03c3a, 0x7ffb55ce, 0xfdd71b1e, 
+  0x7ffae6c7, 0xfdbdfa18, 
+  0x7ffa72d1, 0xfda4d929, 0x7ff9f9ec, 0xfd8bb850, 0x7ff97c18, 0xfd729790, 
+  0x7ff8f954, 0xfd5976e9, 
+  0x7ff871a2, 0xfd40565c, 0x7ff7e500, 0xfd2735ea, 0x7ff75370, 0xfd0e1594, 
+  0x7ff6bcf0, 0xfcf4f55c, 
+  0x7ff62182, 0xfcdbd541, 0x7ff58125, 0xfcc2b545, 0x7ff4dbd9, 0xfca9956a, 
+  0x7ff4319d, 0xfc9075af, 
+  0x7ff38274, 0xfc775616, 0x7ff2ce5b, 0xfc5e36a0, 0x7ff21553, 0xfc45174e, 
+  0x7ff1575d, 0xfc2bf821, 
+  0x7ff09478, 0xfc12d91a, 0x7fefcca4, 0xfbf9ba39, 0x7feeffe1, 0xfbe09b80, 
+  0x7fee2e30, 0xfbc77cf0, 
+  0x7fed5791, 0xfbae5e89, 0x7fec7c02, 0xfb95404d, 0x7feb9b85, 0xfb7c223d, 
+  0x7feab61a, 0xfb630459, 
+  0x7fe9cbc0, 0xfb49e6a3, 0x7fe8dc78, 0xfb30c91b, 0x7fe7e841, 0xfb17abc2, 
+  0x7fe6ef1c, 0xfafe8e9b, 
+  0x7fe5f108, 0xfae571a4, 0x7fe4ee06, 0xfacc54e0, 0x7fe3e616, 0xfab3384f, 
+  0x7fe2d938, 0xfa9a1bf3, 
+  0x7fe1c76b, 0xfa80ffcb, 0x7fe0b0b1, 0xfa67e3da, 0x7fdf9508, 0xfa4ec821, 
+  0x7fde7471, 0xfa35ac9f, 
+  0x7fdd4eec, 0xfa1c9157, 0x7fdc247a, 0xfa037648, 0x7fdaf519, 0xf9ea5b75, 
+  0x7fd9c0ca, 0xf9d140de, 
+  0x7fd8878e, 0xf9b82684, 0x7fd74964, 0xf99f0c68, 0x7fd6064c, 0xf985f28a, 
+  0x7fd4be46, 0xf96cd8ed, 
+  0x7fd37153, 0xf953bf91, 0x7fd21f72, 0xf93aa676, 0x7fd0c8a3, 0xf9218d9e, 
+  0x7fcf6ce8, 0xf908750a, 
+  0x7fce0c3e, 0xf8ef5cbb, 0x7fcca6a7, 0xf8d644b2, 0x7fcb3c23, 0xf8bd2cef, 
+  0x7fc9ccb2, 0xf8a41574, 
+  0x7fc85854, 0xf88afe42, 0x7fc6df08, 0xf871e759, 0x7fc560cf, 0xf858d0bb, 
+  0x7fc3dda9, 0xf83fba68, 
+  0x7fc25596, 0xf826a462, 0x7fc0c896, 0xf80d8ea9, 0x7fbf36aa, 0xf7f4793e, 
+  0x7fbd9fd0, 0xf7db6423, 
+  0x7fbc040a, 0xf7c24f59, 0x7fba6357, 0xf7a93ae0, 0x7fb8bdb8, 0xf79026b9, 
+  0x7fb7132b, 0xf77712e5, 
+  0x7fb563b3, 0xf75dff66, 0x7fb3af4e, 0xf744ec3b, 0x7fb1f5fc, 0xf72bd967, 
+  0x7fb037bf, 0xf712c6ea, 
+  0x7fae7495, 0xf6f9b4c6, 0x7facac7f, 0xf6e0a2fa, 0x7faadf7c, 0xf6c79188, 
+  0x7fa90d8e, 0xf6ae8071, 
+  0x7fa736b4, 0xf6956fb7, 0x7fa55aee, 0xf67c5f59, 0x7fa37a3c, 0xf6634f59, 
+  0x7fa1949e, 0xf64a3fb8, 
+  0x7f9faa15, 0xf6313077, 0x7f9dbaa0, 0xf6182196, 0x7f9bc640, 0xf5ff1318, 
+  0x7f99ccf4, 0xf5e604fc, 
+  0x7f97cebd, 0xf5ccf743, 0x7f95cb9a, 0xf5b3e9f0, 0x7f93c38c, 0xf59add02, 
+  0x7f91b694, 0xf581d07b, 
+  0x7f8fa4b0, 0xf568c45b, 0x7f8d8de1, 0xf54fb8a4, 0x7f8b7227, 0xf536ad56, 
+  0x7f895182, 0xf51da273, 
+  0x7f872bf3, 0xf50497fb, 0x7f850179, 0xf4eb8def, 0x7f82d214, 0xf4d28451, 
+  0x7f809dc5, 0xf4b97b21, 
+  0x7f7e648c, 0xf4a07261, 0x7f7c2668, 0xf4876a10, 0x7f79e35a, 0xf46e6231, 
+  0x7f779b62, 0xf4555ac5, 
+  0x7f754e80, 0xf43c53cb, 0x7f72fcb4, 0xf4234d45, 0x7f70a5fe, 0xf40a4735, 
+  0x7f6e4a5e, 0xf3f1419a, 
+  0x7f6be9d4, 0xf3d83c77, 0x7f698461, 0xf3bf37cb, 0x7f671a05, 0xf3a63398, 
+  0x7f64aabf, 0xf38d2fe0, 
+  0x7f62368f, 0xf3742ca2, 0x7f5fbd77, 0xf35b29e0, 0x7f5d3f75, 0xf342279b, 
+  0x7f5abc8a, 0xf32925d3, 
+  0x7f5834b7, 0xf310248a, 0x7f55a7fa, 0xf2f723c1, 0x7f531655, 0xf2de2379, 
+  0x7f507fc7, 0xf2c523b2, 
+  0x7f4de451, 0xf2ac246e, 0x7f4b43f2, 0xf29325ad, 0x7f489eaa, 0xf27a2771, 
+  0x7f45f47b, 0xf26129ba, 
+  0x7f434563, 0xf2482c8a, 0x7f409164, 0xf22f2fe1, 0x7f3dd87c, 0xf21633c0, 
+  0x7f3b1aad, 0xf1fd3829, 
+  0x7f3857f6, 0xf1e43d1c, 0x7f359057, 0xf1cb429a, 0x7f32c3d1, 0xf1b248a5, 
+  0x7f2ff263, 0xf1994f3d, 
+  0x7f2d1c0e, 0xf1805662, 0x7f2a40d2, 0xf1675e17, 0x7f2760af, 0xf14e665c, 
+  0x7f247ba5, 0xf1356f32, 
+  0x7f2191b4, 0xf11c789a, 0x7f1ea2dc, 0xf1038295, 0x7f1baf1e, 0xf0ea8d24, 
+  0x7f18b679, 0xf0d19848, 
+  0x7f15b8ee, 0xf0b8a401, 0x7f12b67c, 0xf09fb051, 0x7f0faf25, 0xf086bd39, 
+  0x7f0ca2e7, 0xf06dcaba, 
+  0x7f0991c4, 0xf054d8d5, 0x7f067bba, 0xf03be78a, 0x7f0360cb, 0xf022f6da, 
+  0x7f0040f6, 0xf00a06c8, 
+  0x7efd1c3c, 0xeff11753, 0x7ef9f29d, 0xefd8287c, 0x7ef6c418, 0xefbf3a45, 
+  0x7ef390ae, 0xefa64cae, 
+  0x7ef05860, 0xef8d5fb8, 0x7eed1b2c, 0xef747365, 0x7ee9d914, 0xef5b87b5, 
+  0x7ee69217, 0xef429caa, 
+  0x7ee34636, 0xef29b243, 0x7edff570, 0xef10c883, 0x7edc9fc6, 0xeef7df6a, 
+  0x7ed94538, 0xeedef6f9, 
+  0x7ed5e5c6, 0xeec60f31, 0x7ed28171, 0xeead2813, 0x7ecf1837, 0xee9441a0, 
+  0x7ecbaa1a, 0xee7b5bd9, 
+  0x7ec8371a, 0xee6276bf, 0x7ec4bf36, 0xee499253, 0x7ec14270, 0xee30ae96, 
+  0x7ebdc0c6, 0xee17cb88, 
+  0x7eba3a39, 0xedfee92b, 0x7eb6aeca, 0xede60780, 0x7eb31e78, 0xedcd2687, 
+  0x7eaf8943, 0xedb44642, 
+  0x7eabef2c, 0xed9b66b2, 0x7ea85033, 0xed8287d7, 0x7ea4ac58, 0xed69a9b3, 
+  0x7ea1039b, 0xed50cc46, 
+  0x7e9d55fc, 0xed37ef91, 0x7e99a37c, 0xed1f1396, 0x7e95ec1a, 0xed063856, 
+  0x7e922fd6, 0xeced5dd0, 
+  0x7e8e6eb2, 0xecd48407, 0x7e8aa8ac, 0xecbbaafb, 0x7e86ddc6, 0xeca2d2ad, 
+  0x7e830dff, 0xec89fb1e, 
+  0x7e7f3957, 0xec71244f, 0x7e7b5fce, 0xec584e41, 0x7e778166, 0xec3f78f6, 
+  0x7e739e1d, 0xec26a46d, 
+  0x7e6fb5f4, 0xec0dd0a8, 0x7e6bc8eb, 0xebf4fda8, 0x7e67d703, 0xebdc2b6e, 
+  0x7e63e03b, 0xebc359fb, 
+  0x7e5fe493, 0xebaa894f, 0x7e5be40c, 0xeb91b96c, 0x7e57dea7, 0xeb78ea52, 
+  0x7e53d462, 0xeb601c04, 
+  0x7e4fc53e, 0xeb474e81, 0x7e4bb13c, 0xeb2e81ca, 0x7e47985b, 0xeb15b5e1, 
+  0x7e437a9c, 0xeafceac6, 
+  0x7e3f57ff, 0xeae4207a, 0x7e3b3083, 0xeacb56ff, 0x7e37042a, 0xeab28e56, 
+  0x7e32d2f4, 0xea99c67e, 
+  0x7e2e9cdf, 0xea80ff7a, 0x7e2a61ed, 0xea683949, 0x7e26221f, 0xea4f73ee, 
+  0x7e21dd73, 0xea36af69, 
+  0x7e1d93ea, 0xea1debbb, 0x7e194584, 0xea0528e5, 0x7e14f242, 0xe9ec66e8, 
+  0x7e109a24, 0xe9d3a5c5, 
+  0x7e0c3d29, 0xe9bae57d, 0x7e07db52, 0xe9a22610, 0x7e0374a0, 0xe9896781, 
+  0x7dff0911, 0xe970a9ce, 
+  0x7dfa98a8, 0xe957ecfb, 0x7df62362, 0xe93f3107, 0x7df1a942, 0xe92675f4, 
+  0x7ded2a47, 0xe90dbbc2, 
+  0x7de8a670, 0xe8f50273, 0x7de41dc0, 0xe8dc4a07, 0x7ddf9034, 0xe8c39280, 
+  0x7ddafdce, 0xe8aadbde, 
+  0x7dd6668f, 0xe8922622, 0x7dd1ca75, 0xe879714d, 0x7dcd2981, 0xe860bd61, 
+  0x7dc883b4, 0xe8480a5d, 
+  0x7dc3d90d, 0xe82f5844, 0x7dbf298d, 0xe816a716, 0x7dba7534, 0xe7fdf6d4, 
+  0x7db5bc02, 0xe7e5477f, 
+  0x7db0fdf8, 0xe7cc9917, 0x7dac3b15, 0xe7b3eb9f, 0x7da77359, 0xe79b3f16, 
+  0x7da2a6c6, 0xe782937e, 
+  0x7d9dd55a, 0xe769e8d8, 0x7d98ff17, 0xe7513f25, 0x7d9423fc, 0xe7389665, 
+  0x7d8f4409, 0xe71fee99, 
+  0x7d8a5f40, 0xe70747c4, 0x7d85759f, 0xe6eea1e4, 0x7d808728, 0xe6d5fcfc, 
+  0x7d7b93da, 0xe6bd590d, 
+  0x7d769bb5, 0xe6a4b616, 0x7d719eba, 0xe68c141a, 0x7d6c9ce9, 0xe6737319, 
+  0x7d679642, 0xe65ad315, 
+  0x7d628ac6, 0xe642340d, 0x7d5d7a74, 0xe6299604, 0x7d58654d, 0xe610f8f9, 
+  0x7d534b50, 0xe5f85cef, 
+  0x7d4e2c7f, 0xe5dfc1e5, 0x7d4908d9, 0xe5c727dd, 0x7d43e05e, 0xe5ae8ed8, 
+  0x7d3eb30f, 0xe595f6d7, 
+  0x7d3980ec, 0xe57d5fda, 0x7d3449f5, 0xe564c9e3, 0x7d2f0e2b, 0xe54c34f3, 
+  0x7d29cd8c, 0xe533a10a, 
+  0x7d24881b, 0xe51b0e2a, 0x7d1f3dd6, 0xe5027c53, 0x7d19eebf, 0xe4e9eb87, 
+  0x7d149ad5, 0xe4d15bc6, 
+  0x7d0f4218, 0xe4b8cd11, 0x7d09e489, 0xe4a03f69, 0x7d048228, 0xe487b2d0, 
+  0x7cff1af5, 0xe46f2745, 
+  0x7cf9aef0, 0xe4569ccb, 0x7cf43e1a, 0xe43e1362, 0x7ceec873, 0xe4258b0a, 
+  0x7ce94dfb, 0xe40d03c6, 
+  0x7ce3ceb2, 0xe3f47d96, 0x7cde4a98, 0xe3dbf87a, 0x7cd8c1ae, 0xe3c37474, 
+  0x7cd333f3, 0xe3aaf184, 
+  0x7ccda169, 0xe3926fad, 0x7cc80a0f, 0xe379eeed, 0x7cc26de5, 0xe3616f48, 
+  0x7cbcccec, 0xe348f0bd, 
+  0x7cb72724, 0xe330734d, 0x7cb17c8d, 0xe317f6fa, 0x7cabcd28, 0xe2ff7bc3, 
+  0x7ca618f3, 0xe2e701ac, 
+  0x7ca05ff1, 0xe2ce88b3, 0x7c9aa221, 0xe2b610da, 0x7c94df83, 0xe29d9a23, 
+  0x7c8f1817, 0xe285248d, 
+  0x7c894bde, 0xe26cb01b, 0x7c837ad8, 0xe2543ccc, 0x7c7da505, 0xe23bcaa2, 
+  0x7c77ca65, 0xe223599e, 
+  0x7c71eaf9, 0xe20ae9c1, 0x7c6c06c0, 0xe1f27b0b, 0x7c661dbc, 0xe1da0d7e, 
+  0x7c602fec, 0xe1c1a11b, 
+  0x7c5a3d50, 0xe1a935e2, 0x7c5445e9, 0xe190cbd4, 0x7c4e49b7, 0xe17862f3, 
+  0x7c4848ba, 0xe15ffb3f, 
+  0x7c4242f2, 0xe14794ba, 0x7c3c3860, 0xe12f2f63, 0x7c362904, 0xe116cb3d, 
+  0x7c3014de, 0xe0fe6848, 
+  0x7c29fbee, 0xe0e60685, 0x7c23de35, 0xe0cda5f5, 0x7c1dbbb3, 0xe0b54698, 
+  0x7c179467, 0xe09ce871, 
+  0x7c116853, 0xe0848b7f, 0x7c0b3777, 0xe06c2fc4, 0x7c0501d2, 0xe053d541, 
+  0x7bfec765, 0xe03b7bf6, 
+  0x7bf88830, 0xe02323e5, 0x7bf24434, 0xe00acd0e, 0x7bebfb70, 0xdff27773, 
+  0x7be5ade6, 0xdfda2314, 
+  0x7bdf5b94, 0xdfc1cff3, 0x7bd9047c, 0xdfa97e0f, 0x7bd2a89e, 0xdf912d6b, 
+  0x7bcc47fa, 0xdf78de07, 
+  0x7bc5e290, 0xdf608fe4, 0x7bbf7860, 0xdf484302, 0x7bb9096b, 0xdf2ff764, 
+  0x7bb295b0, 0xdf17ad0a, 
+  0x7bac1d31, 0xdeff63f4, 0x7ba59fee, 0xdee71c24, 0x7b9f1de6, 0xdeced59b, 
+  0x7b989719, 0xdeb69059, 
+  0x7b920b89, 0xde9e4c60, 0x7b8b7b36, 0xde8609b1, 0x7b84e61f, 0xde6dc84b, 
+  0x7b7e4c45, 0xde558831, 
+  0x7b77ada8, 0xde3d4964, 0x7b710a49, 0xde250be3, 0x7b6a6227, 0xde0ccfb1, 
+  0x7b63b543, 0xddf494ce, 
+  0x7b5d039e, 0xdddc5b3b, 0x7b564d36, 0xddc422f8, 0x7b4f920e, 0xddabec08, 
+  0x7b48d225, 0xdd93b66a, 
+  0x7b420d7a, 0xdd7b8220, 0x7b3b4410, 0xdd634f2b, 0x7b3475e5, 0xdd4b1d8c, 
+  0x7b2da2fa, 0xdd32ed43, 
+  0x7b26cb4f, 0xdd1abe51, 0x7b1feee5, 0xdd0290b8, 0x7b190dbc, 0xdcea6478, 
+  0x7b1227d3, 0xdcd23993, 
+  0x7b0b3d2c, 0xdcba1008, 0x7b044dc7, 0xdca1e7da, 0x7afd59a4, 0xdc89c109, 
+  0x7af660c2, 0xdc719b96, 
+  0x7aef6323, 0xdc597781, 0x7ae860c7, 0xdc4154cd, 0x7ae159ae, 0xdc293379, 
+  0x7ada4dd8, 0xdc111388, 
+  0x7ad33d45, 0xdbf8f4f8, 0x7acc27f7, 0xdbe0d7cd, 0x7ac50dec, 0xdbc8bc06, 
+  0x7abdef25, 0xdbb0a1a4, 
+  0x7ab6cba4, 0xdb9888a8, 0x7aafa367, 0xdb807114, 0x7aa8766f, 0xdb685ae9, 
+  0x7aa144bc, 0xdb504626, 
+  0x7a9a0e50, 0xdb3832cd, 0x7a92d329, 0xdb2020e0, 0x7a8b9348, 0xdb08105e, 
+  0x7a844eae, 0xdaf00149, 
+  0x7a7d055b, 0xdad7f3a2, 0x7a75b74f, 0xdabfe76a, 0x7a6e648a, 0xdaa7dca1, 
+  0x7a670d0d, 0xda8fd349, 
+  0x7a5fb0d8, 0xda77cb63, 0x7a584feb, 0xda5fc4ef, 0x7a50ea47, 0xda47bfee, 
+  0x7a497feb, 0xda2fbc61, 
+  0x7a4210d8, 0xda17ba4a, 0x7a3a9d0f, 0xd9ffb9a9, 0x7a332490, 0xd9e7ba7f, 
+  0x7a2ba75a, 0xd9cfbccd, 
+  0x7a24256f, 0xd9b7c094, 0x7a1c9ece, 0xd99fc5d4, 0x7a151378, 0xd987cc90, 
+  0x7a0d836d, 0xd96fd4c7, 
+  0x7a05eead, 0xd957de7a, 0x79fe5539, 0xd93fe9ab, 0x79f6b711, 0xd927f65b, 
+  0x79ef1436, 0xd910048a, 
+  0x79e76ca7, 0xd8f81439, 0x79dfc064, 0xd8e0256a, 0x79d80f6f, 0xd8c8381d, 
+  0x79d059c8, 0xd8b04c52, 
+  0x79c89f6e, 0xd898620c, 0x79c0e062, 0xd880794b, 0x79b91ca4, 0xd868920f, 
+  0x79b15435, 0xd850ac5a, 
+  0x79a98715, 0xd838c82d, 0x79a1b545, 0xd820e589, 0x7999dec4, 0xd809046e, 
+  0x79920392, 0xd7f124dd, 
+  0x798a23b1, 0xd7d946d8, 0x79823f20, 0xd7c16a5f, 0x797a55e0, 0xd7a98f73, 
+  0x797267f2, 0xd791b616, 
+  0x796a7554, 0xd779de47, 0x79627e08, 0xd7620808, 0x795a820e, 0xd74a335b, 
+  0x79528167, 0xd732603f, 
+  0x794a7c12, 0xd71a8eb5, 0x79427210, 0xd702bec0, 0x793a6361, 0xd6eaf05f, 
+  0x79325006, 0xd6d32393, 
+  0x792a37fe, 0xd6bb585e, 0x79221b4b, 0xd6a38ec0, 0x7919f9ec, 0xd68bc6ba, 
+  0x7911d3e2, 0xd674004e, 
+  0x7909a92d, 0xd65c3b7b, 0x790179cd, 0xd6447844, 0x78f945c3, 0xd62cb6a8, 
+  0x78f10d0f, 0xd614f6a9, 
+  0x78e8cfb2, 0xd5fd3848, 0x78e08dab, 0xd5e57b85, 0x78d846fb, 0xd5cdc062, 
+  0x78cffba3, 0xd5b606e0, 
+  0x78c7aba2, 0xd59e4eff, 0x78bf56f9, 0xd58698c0, 0x78b6fda8, 0xd56ee424, 
+  0x78ae9fb0, 0xd557312d, 
+  0x78a63d11, 0xd53f7fda, 0x789dd5cb, 0xd527d02e, 0x789569df, 0xd5102228, 
+  0x788cf94c, 0xd4f875ca, 
+  0x78848414, 0xd4e0cb15, 0x787c0a36, 0xd4c92209, 0x78738bb3, 0xd4b17aa8, 
+  0x786b088c, 0xd499d4f2, 
+  0x786280bf, 0xd48230e9, 0x7859f44f, 0xd46a8e8d, 0x7851633b, 0xd452eddf, 
+  0x7848cd83, 0xd43b4ee0, 
+  0x78403329, 0xd423b191, 0x7837942b, 0xd40c15f3, 0x782ef08b, 0xd3f47c06, 
+  0x78264849, 0xd3dce3cd, 
+  0x781d9b65, 0xd3c54d47, 0x7814e9df, 0xd3adb876, 0x780c33b8, 0xd396255a, 
+  0x780378f1, 0xd37e93f4, 
+  0x77fab989, 0xd3670446, 0x77f1f581, 0xd34f764f, 0x77e92cd9, 0xd337ea12, 
+  0x77e05f91, 0xd3205f8f, 
+  0x77d78daa, 0xd308d6c7, 0x77ceb725, 0xd2f14fba, 0x77c5dc01, 0xd2d9ca6a, 
+  0x77bcfc3f, 0xd2c246d8, 
+  0x77b417df, 0xd2aac504, 0x77ab2ee2, 0xd29344f0, 0x77a24148, 0xd27bc69c, 
+  0x77994f11, 0xd2644a0a, 
+  0x7790583e, 0xd24ccf39, 0x77875cce, 0xd235562b, 0x777e5cc3, 0xd21ddee2, 
+  0x7775581d, 0xd206695d, 
+  0x776c4edb, 0xd1eef59e, 0x776340ff, 0xd1d783a6, 0x775a2e89, 0xd1c01375, 
+  0x77511778, 0xd1a8a50d, 
+  0x7747fbce, 0xd191386e, 0x773edb8b, 0xd179cd99, 0x7735b6af, 0xd1626490, 
+  0x772c8d3a, 0xd14afd52, 
+  0x77235f2d, 0xd13397e2, 0x771a2c88, 0xd11c343f, 0x7710f54c, 0xd104d26b, 
+  0x7707b979, 0xd0ed7267, 
+  0x76fe790e, 0xd0d61434, 0x76f5340e, 0xd0beb7d2, 0x76ebea77, 0xd0a75d42, 
+  0x76e29c4b, 0xd0900486, 
+  0x76d94989, 0xd078ad9e, 0x76cff232, 0xd061588b, 0x76c69647, 0xd04a054e, 
+  0x76bd35c7, 0xd032b3e7, 
+  0x76b3d0b4, 0xd01b6459, 0x76aa670d, 0xd00416a3, 0x76a0f8d2, 0xcfeccac7, 
+  0x76978605, 0xcfd580c6, 
+  0x768e0ea6, 0xcfbe389f, 0x768492b4, 0xcfa6f255, 0x767b1231, 0xcf8fade9, 
+  0x76718d1c, 0xcf786b5a, 
+  0x76680376, 0xcf612aaa, 0x765e7540, 0xcf49ebda, 0x7654e279, 0xcf32aeeb, 
+  0x764b4b23, 0xcf1b73de, 
+  0x7641af3d, 0xcf043ab3, 0x76380ec8, 0xceed036b, 0x762e69c4, 0xced5ce08, 
+  0x7624c031, 0xcebe9a8a, 
+  0x761b1211, 0xcea768f2, 0x76115f63, 0xce903942, 0x7607a828, 0xce790b79, 
+  0x75fdec60, 0xce61df99, 
+  0x75f42c0b, 0xce4ab5a2, 0x75ea672a, 0xce338d97, 0x75e09dbd, 0xce1c6777, 
+  0x75d6cfc5, 0xce054343, 
+  0x75ccfd42, 0xcdee20fc, 0x75c32634, 0xcdd700a4, 0x75b94a9c, 0xcdbfe23a, 
+  0x75af6a7b, 0xcda8c5c1, 
+  0x75a585cf, 0xcd91ab39, 0x759b9c9b, 0xcd7a92a2, 0x7591aedd, 0xcd637bfe, 
+  0x7587bc98, 0xcd4c674d, 
+  0x757dc5ca, 0xcd355491, 0x7573ca75, 0xcd1e43ca, 0x7569ca99, 0xcd0734f9, 
+  0x755fc635, 0xccf0281f, 
+  0x7555bd4c, 0xccd91d3d, 0x754bafdc, 0xccc21455, 0x75419de7, 0xccab0d65, 
+  0x7537876c, 0xcc940871, 
+  0x752d6c6c, 0xcc7d0578, 0x75234ce8, 0xcc66047b, 0x751928e0, 0xcc4f057c, 
+  0x750f0054, 0xcc38087b, 
+  0x7504d345, 0xcc210d79, 0x74faa1b3, 0xcc0a1477, 0x74f06b9e, 0xcbf31d75, 
+  0x74e63108, 0xcbdc2876, 
+  0x74dbf1ef, 0xcbc53579, 0x74d1ae55, 0xcbae447f, 0x74c7663a, 0xcb97558a, 
+  0x74bd199f, 0xcb80689a, 
+  0x74b2c884, 0xcb697db0, 0x74a872e8, 0xcb5294ce, 0x749e18cd, 0xcb3badf3, 
+  0x7493ba34, 0xcb24c921, 
+  0x7489571c, 0xcb0de658, 0x747eef85, 0xcaf7059a, 0x74748371, 0xcae026e8, 
+  0x746a12df, 0xcac94a42, 
+  0x745f9dd1, 0xcab26fa9, 0x74552446, 0xca9b971e, 0x744aa63f, 0xca84c0a3, 
+  0x744023bc, 0xca6dec37, 
+  0x74359cbd, 0xca5719db, 0x742b1144, 0xca404992, 0x74208150, 0xca297b5a, 
+  0x7415ece2, 0xca12af37, 
+  0x740b53fb, 0xc9fbe527, 0x7400b69a, 0xc9e51d2d, 0x73f614c0, 0xc9ce5748, 
+  0x73eb6e6e, 0xc9b7937a, 
+  0x73e0c3a3, 0xc9a0d1c5, 0x73d61461, 0xc98a1227, 0x73cb60a8, 0xc97354a4, 
+  0x73c0a878, 0xc95c993a, 
+  0x73b5ebd1, 0xc945dfec, 0x73ab2ab4, 0xc92f28ba, 0x73a06522, 0xc91873a5, 
+  0x73959b1b, 0xc901c0ae, 
+  0x738acc9e, 0xc8eb0fd6, 0x737ff9ae, 0xc8d4611d, 0x73752249, 0xc8bdb485, 
+  0x736a4671, 0xc8a70a0e, 
+  0x735f6626, 0xc89061ba, 0x73548168, 0xc879bb89, 0x73499838, 0xc863177b, 
+  0x733eaa96, 0xc84c7593, 
+  0x7333b883, 0xc835d5d0, 0x7328c1ff, 0xc81f3834, 0x731dc70a, 0xc8089cbf, 
+  0x7312c7a5, 0xc7f20373, 
+  0x7307c3d0, 0xc7db6c50, 0x72fcbb8c, 0xc7c4d757, 0x72f1aed9, 0xc7ae4489, 
+  0x72e69db7, 0xc797b3e7, 
+  0x72db8828, 0xc7812572, 0x72d06e2b, 0xc76a992a, 0x72c54fc1, 0xc7540f11, 
+  0x72ba2cea, 0xc73d8727, 
+  0x72af05a7, 0xc727016d, 0x72a3d9f7, 0xc7107de4, 0x7298a9dd, 0xc6f9fc8d, 
+  0x728d7557, 0xc6e37d69, 
+  0x72823c67, 0xc6cd0079, 0x7276ff0d, 0xc6b685bd, 0x726bbd48, 0xc6a00d37, 
+  0x7260771b, 0xc68996e7, 
+  0x72552c85, 0xc67322ce, 0x7249dd86, 0xc65cb0ed, 0x723e8a20, 0xc6464144, 
+  0x72333251, 0xc62fd3d6, 
+  0x7227d61c, 0xc61968a2, 0x721c7580, 0xc602ffaa, 0x7211107e, 0xc5ec98ee, 
+  0x7205a716, 0xc5d6346f, 
+  0x71fa3949, 0xc5bfd22e, 0x71eec716, 0xc5a9722c, 0x71e35080, 0xc593146a, 
+  0x71d7d585, 0xc57cb8e9, 
+  0x71cc5626, 0xc5665fa9, 0x71c0d265, 0xc55008ab, 0x71b54a41, 0xc539b3f1, 
+  0x71a9bdba, 0xc523617a, 
+  0x719e2cd2, 0xc50d1149, 0x71929789, 0xc4f6c35d, 0x7186fdde, 0xc4e077b8, 
+  0x717b5fd3, 0xc4ca2e5b, 
+  0x716fbd68, 0xc4b3e746, 0x7164169d, 0xc49da27a, 0x71586b74, 0xc4875ff9, 
+  0x714cbbeb, 0xc4711fc2, 
+  0x71410805, 0xc45ae1d7, 0x71354fc0, 0xc444a639, 0x7129931f, 0xc42e6ce8, 
+  0x711dd220, 0xc41835e6, 
+  0x71120cc5, 0xc4020133, 0x7106430e, 0xc3ebced0, 0x70fa74fc, 0xc3d59ebe, 
+  0x70eea28e, 0xc3bf70fd, 
+  0x70e2cbc6, 0xc3a94590, 0x70d6f0a4, 0xc3931c76, 0x70cb1128, 0xc37cf5b0, 
+  0x70bf2d53, 0xc366d140, 
+  0x70b34525, 0xc350af26, 0x70a7589f, 0xc33a8f62, 0x709b67c0, 0xc32471f7, 
+  0x708f728b, 0xc30e56e4, 
+  0x708378ff, 0xc2f83e2a, 0x70777b1c, 0xc2e227cb, 0x706b78e3, 0xc2cc13c7, 
+  0x705f7255, 0xc2b6021f, 
+  0x70536771, 0xc29ff2d4, 0x70475839, 0xc289e5e7, 0x703b44ad, 0xc273db58, 
+  0x702f2ccd, 0xc25dd329, 
+  0x7023109a, 0xc247cd5a, 0x7016f014, 0xc231c9ec, 0x700acb3c, 0xc21bc8e1, 
+  0x6ffea212, 0xc205ca38, 
+  0x6ff27497, 0xc1efcdf3, 0x6fe642ca, 0xc1d9d412, 0x6fda0cae, 0xc1c3dc97, 
+  0x6fcdd241, 0xc1ade781, 
+  0x6fc19385, 0xc197f4d4, 0x6fb5507a, 0xc182048d, 0x6fa90921, 0xc16c16b0, 
+  0x6f9cbd79, 0xc1562b3d, 
+  0x6f906d84, 0xc1404233, 0x6f841942, 0xc12a5b95, 0x6f77c0b3, 0xc1147764, 
+  0x6f6b63d8, 0xc0fe959f, 
+  0x6f5f02b2, 0xc0e8b648, 0x6f529d40, 0xc0d2d960, 0x6f463383, 0xc0bcfee7, 
+  0x6f39c57d, 0xc0a726df, 
+  0x6f2d532c, 0xc0915148, 0x6f20dc92, 0xc07b7e23, 0x6f1461b0, 0xc065ad70, 
+  0x6f07e285, 0xc04fdf32, 
+  0x6efb5f12, 0xc03a1368, 0x6eeed758, 0xc0244a14, 0x6ee24b57, 0xc00e8336, 
+  0x6ed5bb10, 0xbff8bece, 
+  0x6ec92683, 0xbfe2fcdf, 0x6ebc8db0, 0xbfcd3d69, 0x6eaff099, 0xbfb7806c, 
+  0x6ea34f3d, 0xbfa1c5ea, 
+  0x6e96a99d, 0xbf8c0de3, 0x6e89ffb9, 0xbf765858, 0x6e7d5193, 0xbf60a54a, 
+  0x6e709f2a, 0xbf4af4ba, 
+  0x6e63e87f, 0xbf3546a8, 0x6e572d93, 0xbf1f9b16, 0x6e4a6e66, 0xbf09f205, 
+  0x6e3daaf8, 0xbef44b74, 
+  0x6e30e34a, 0xbedea765, 0x6e24175c, 0xbec905d9, 0x6e174730, 0xbeb366d1, 
+  0x6e0a72c5, 0xbe9dca4e, 
+  0x6dfd9a1c, 0xbe88304f, 0x6df0bd35, 0xbe7298d7, 0x6de3dc11, 0xbe5d03e6, 
+  0x6dd6f6b1, 0xbe47717c, 
+  0x6dca0d14, 0xbe31e19b, 0x6dbd1f3c, 0xbe1c5444, 0x6db02d29, 0xbe06c977, 
+  0x6da336dc, 0xbdf14135, 
+  0x6d963c54, 0xbddbbb7f, 0x6d893d93, 0xbdc63856, 0x6d7c3a98, 0xbdb0b7bb, 
+  0x6d6f3365, 0xbd9b39ad, 
+  0x6d6227fa, 0xbd85be30, 0x6d551858, 0xbd704542, 0x6d48047e, 0xbd5acee5, 
+  0x6d3aec6e, 0xbd455b1a, 
+  0x6d2dd027, 0xbd2fe9e2, 0x6d20afac, 0xbd1a7b3d, 0x6d138afb, 0xbd050f2c, 
+  0x6d066215, 0xbcefa5b0, 
+  0x6cf934fc, 0xbcda3ecb, 0x6cec03af, 0xbcc4da7b, 0x6cdece2f, 0xbcaf78c4, 
+  0x6cd1947c, 0xbc9a19a5, 
+  0x6cc45698, 0xbc84bd1f, 0x6cb71482, 0xbc6f6333, 0x6ca9ce3b, 0xbc5a0be2, 
+  0x6c9c83c3, 0xbc44b72c, 
+  0x6c8f351c, 0xbc2f6513, 0x6c81e245, 0xbc1a1598, 0x6c748b3f, 0xbc04c8ba, 
+  0x6c67300b, 0xbbef7e7c, 
+  0x6c59d0a9, 0xbbda36dd, 0x6c4c6d1a, 0xbbc4f1df, 0x6c3f055d, 0xbbafaf82, 
+  0x6c319975, 0xbb9a6fc7, 
+  0x6c242960, 0xbb8532b0, 0x6c16b521, 0xbb6ff83c, 0x6c093cb6, 0xbb5ac06d, 
+  0x6bfbc021, 0xbb458b43, 
+  0x6bee3f62, 0xbb3058c0, 0x6be0ba7b, 0xbb1b28e4, 0x6bd3316a, 0xbb05fbb0, 
+  0x6bc5a431, 0xbaf0d125, 
+  0x6bb812d1, 0xbadba943, 0x6baa7d49, 0xbac6840c, 0x6b9ce39b, 0xbab16180, 
+  0x6b8f45c7, 0xba9c41a0, 
+  0x6b81a3cd, 0xba87246d, 0x6b73fdae, 0xba7209e7, 0x6b66536b, 0xba5cf210, 
+  0x6b58a503, 0xba47dce8, 
+  0x6b4af279, 0xba32ca71, 0x6b3d3bcb, 0xba1dbaaa, 0x6b2f80fb, 0xba08ad95, 
+  0x6b21c208, 0xb9f3a332, 
+  0x6b13fef5, 0xb9de9b83, 0x6b0637c1, 0xb9c99688, 0x6af86c6c, 0xb9b49442, 
+  0x6aea9cf8, 0xb99f94b2, 
+  0x6adcc964, 0xb98a97d8, 0x6acef1b2, 0xb9759db6, 0x6ac115e2, 0xb960a64c, 
+  0x6ab335f4, 0xb94bb19b, 
+  0x6aa551e9, 0xb936bfa4, 0x6a9769c1, 0xb921d067, 0x6a897d7d, 0xb90ce3e6, 
+  0x6a7b8d1e, 0xb8f7fa21, 
+  0x6a6d98a4, 0xb8e31319, 0x6a5fa010, 0xb8ce2ecf, 0x6a51a361, 0xb8b94d44, 
+  0x6a43a29a, 0xb8a46e78, 
+  0x6a359db9, 0xb88f926d, 0x6a2794c1, 0xb87ab922, 0x6a1987b0, 0xb865e299, 
+  0x6a0b7689, 0xb8510ed4, 
+  0x69fd614a, 0xb83c3dd1, 0x69ef47f6, 0xb8276f93, 0x69e12a8c, 0xb812a41a, 
+  0x69d3090e, 0xb7fddb67, 
+  0x69c4e37a, 0xb7e9157a, 0x69b6b9d3, 0xb7d45255, 0x69a88c19, 0xb7bf91f8, 
+  0x699a5a4c, 0xb7aad465, 
+  0x698c246c, 0xb796199b, 0x697dea7b, 0xb781619c, 0x696fac78, 0xb76cac69, 
+  0x69616a65, 0xb757fa01, 
+  0x69532442, 0xb7434a67, 0x6944da10, 0xb72e9d9b, 0x69368bce, 0xb719f39e, 
+  0x6928397e, 0xb7054c6f, 
+  0x6919e320, 0xb6f0a812, 0x690b88b5, 0xb6dc0685, 0x68fd2a3d, 0xb6c767ca, 
+  0x68eec7b9, 0xb6b2cbe2, 
+  0x68e06129, 0xb69e32cd, 0x68d1f68f, 0xb6899c8d, 0x68c387e9, 0xb6750921, 
+  0x68b5153a, 0xb660788c, 
+  0x68a69e81, 0xb64beacd, 0x689823bf, 0xb6375fe5, 0x6889a4f6, 0xb622d7d6, 
+  0x687b2224, 0xb60e529f, 
+  0x686c9b4b, 0xb5f9d043, 0x685e106c, 0xb5e550c1, 0x684f8186, 0xb5d0d41a, 
+  0x6840ee9b, 0xb5bc5a50, 
+  0x683257ab, 0xb5a7e362, 0x6823bcb7, 0xb5936f53, 0x68151dbe, 0xb57efe22, 
+  0x68067ac3, 0xb56a8fd0, 
+  0x67f7d3c5, 0xb556245e, 0x67e928c5, 0xb541bbcd, 0x67da79c3, 0xb52d561e, 
+  0x67cbc6c0, 0xb518f351, 
+  0x67bd0fbd, 0xb5049368, 0x67ae54ba, 0xb4f03663, 0x679f95b7, 0xb4dbdc42, 
+  0x6790d2b6, 0xb4c78507, 
+  0x67820bb7, 0xb4b330b3, 0x677340ba, 0xb49edf45, 0x676471c0, 0xb48a90c0, 
+  0x67559eca, 0xb4764523, 
+  0x6746c7d8, 0xb461fc70, 0x6737ecea, 0xb44db6a8, 0x67290e02, 0xb43973ca, 
+  0x671a2b20, 0xb42533d8, 
+  0x670b4444, 0xb410f6d3, 0x66fc596f, 0xb3fcbcbb, 0x66ed6aa1, 0xb3e88592, 
+  0x66de77dc, 0xb3d45157, 
+  0x66cf8120, 0xb3c0200c, 0x66c0866d, 0xb3abf1b2, 0x66b187c3, 0xb397c649, 
+  0x66a28524, 0xb3839dd3, 
+  0x66937e91, 0xb36f784f, 0x66847408, 0xb35b55bf, 0x6675658c, 0xb3473623, 
+  0x6666531d, 0xb333197c, 
+  0x66573cbb, 0xb31effcc, 0x66482267, 0xb30ae912, 0x66390422, 0xb2f6d550, 
+  0x6629e1ec, 0xb2e2c486, 
+  0x661abbc5, 0xb2ceb6b5, 0x660b91af, 0xb2baabde, 0x65fc63a9, 0xb2a6a402, 
+  0x65ed31b5, 0xb2929f21, 
+  0x65ddfbd3, 0xb27e9d3c, 0x65cec204, 0xb26a9e54, 0x65bf8447, 0xb256a26a, 
+  0x65b0429f, 0xb242a97e, 
+  0x65a0fd0b, 0xb22eb392, 0x6591b38c, 0xb21ac0a6, 0x65826622, 0xb206d0ba, 
+  0x657314cf, 0xb1f2e3d0, 
+  0x6563bf92, 0xb1def9e9, 0x6554666d, 0xb1cb1304, 0x6545095f, 0xb1b72f23, 
+  0x6535a86b, 0xb1a34e47, 
+  0x6526438f, 0xb18f7071, 0x6516dacd, 0xb17b95a0, 0x65076e25, 0xb167bdd7, 
+  0x64f7fd98, 0xb153e915, 
+  0x64e88926, 0xb140175b, 0x64d910d1, 0xb12c48ab, 0x64c99498, 0xb1187d05, 
+  0x64ba147d, 0xb104b46a, 
+  0x64aa907f, 0xb0f0eeda, 0x649b08a0, 0xb0dd2c56, 0x648b7ce0, 0xb0c96ce0, 
+  0x647bed3f, 0xb0b5b077, 
+  0x646c59bf, 0xb0a1f71d, 0x645cc260, 0xb08e40d2, 0x644d2722, 0xb07a8d97, 
+  0x643d8806, 0xb066dd6d, 
+  0x642de50d, 0xb0533055, 0x641e3e38, 0xb03f864f, 0x640e9386, 0xb02bdf5c, 
+  0x63fee4f8, 0xb0183b7d, 
+  0x63ef3290, 0xb0049ab3, 0x63df7c4d, 0xaff0fcfe, 0x63cfc231, 0xafdd625f, 
+  0x63c0043b, 0xafc9cad7, 
+  0x63b0426d, 0xafb63667, 0x63a07cc7, 0xafa2a50f, 0x6390b34a, 0xaf8f16d1, 
+  0x6380e5f6, 0xaf7b8bac, 
+  0x637114cc, 0xaf6803a2, 0x63613fcd, 0xaf547eb3, 0x635166f9, 0xaf40fce1, 
+  0x63418a50, 0xaf2d7e2b, 
+  0x6331a9d4, 0xaf1a0293, 0x6321c585, 0xaf068a1a, 0x6311dd64, 0xaef314c0, 
+  0x6301f171, 0xaedfa285, 
+  0x62f201ac, 0xaecc336c, 0x62e20e17, 0xaeb8c774, 0x62d216b3, 0xaea55e9e, 
+  0x62c21b7e, 0xae91f8eb, 
+  0x62b21c7b, 0xae7e965b, 0x62a219aa, 0xae6b36f0, 0x6292130c, 0xae57daab, 
+  0x628208a1, 0xae44818b, 
+  0x6271fa69, 0xae312b92, 0x6261e866, 0xae1dd8c0, 0x6251d298, 0xae0a8916, 
+  0x6241b8ff, 0xadf73c96, 
+  0x62319b9d, 0xade3f33e, 0x62217a72, 0xadd0ad12, 0x6211557e, 0xadbd6a10, 
+  0x62012cc2, 0xadaa2a3b, 
+  0x61f1003f, 0xad96ed92, 0x61e0cff5, 0xad83b416, 0x61d09be5, 0xad707dc8, 
+  0x61c06410, 0xad5d4aaa, 
+  0x61b02876, 0xad4a1aba, 0x619fe918, 0xad36edfc, 0x618fa5f7, 0xad23c46e, 
+  0x617f5f12, 0xad109e12, 
+  0x616f146c, 0xacfd7ae8, 0x615ec603, 0xacea5af2, 0x614e73da, 0xacd73e30, 
+  0x613e1df0, 0xacc424a3, 
+  0x612dc447, 0xacb10e4b, 0x611d66de, 0xac9dfb29, 0x610d05b7, 0xac8aeb3e, 
+  0x60fca0d2, 0xac77de8b, 
+  0x60ec3830, 0xac64d510, 0x60dbcbd1, 0xac51cecf, 0x60cb5bb7, 0xac3ecbc7, 
+  0x60bae7e1, 0xac2bcbfa, 
+  0x60aa7050, 0xac18cf69, 0x6099f505, 0xac05d613, 0x60897601, 0xabf2dffb, 
+  0x6078f344, 0xabdfed1f, 
+  0x60686ccf, 0xabccfd83, 0x6057e2a2, 0xabba1125, 0x604754bf, 0xaba72807, 
+  0x6036c325, 0xab944229, 
+  0x60262dd6, 0xab815f8d, 0x601594d1, 0xab6e8032, 0x6004f819, 0xab5ba41a, 
+  0x5ff457ad, 0xab48cb46, 
+  0x5fe3b38d, 0xab35f5b5, 0x5fd30bbc, 0xab23236a, 0x5fc26038, 0xab105464, 
+  0x5fb1b104, 0xaafd88a4, 
+  0x5fa0fe1f, 0xaaeac02c, 0x5f90478a, 0xaad7fafb, 0x5f7f8d46, 0xaac53912, 
+  0x5f6ecf53, 0xaab27a73, 
+  0x5f5e0db3, 0xaa9fbf1e, 0x5f4d4865, 0xaa8d0713, 0x5f3c7f6b, 0xaa7a5253, 
+  0x5f2bb2c5, 0xaa67a0e0, 
+  0x5f1ae274, 0xaa54f2ba, 0x5f0a0e77, 0xaa4247e1, 0x5ef936d1, 0xaa2fa056, 
+  0x5ee85b82, 0xaa1cfc1a, 
+  0x5ed77c8a, 0xaa0a5b2e, 0x5ec699e9, 0xa9f7bd92, 0x5eb5b3a2, 0xa9e52347, 
+  0x5ea4c9b3, 0xa9d28c4e, 
+  0x5e93dc1f, 0xa9bff8a8, 0x5e82eae5, 0xa9ad6855, 0x5e71f606, 0xa99adb56, 
+  0x5e60fd84, 0xa98851ac, 
+  0x5e50015d, 0xa975cb57, 0x5e3f0194, 0xa9634858, 0x5e2dfe29, 0xa950c8b0, 
+  0x5e1cf71c, 0xa93e4c5f, 
+  0x5e0bec6e, 0xa92bd367, 0x5dfade20, 0xa9195dc7, 0x5de9cc33, 0xa906eb82, 
+  0x5dd8b6a7, 0xa8f47c97, 
+  0x5dc79d7c, 0xa8e21106, 0x5db680b4, 0xa8cfa8d2, 0x5da5604f, 0xa8bd43fa, 
+  0x5d943c4e, 0xa8aae280, 
+  0x5d8314b1, 0xa8988463, 0x5d71e979, 0xa88629a5, 0x5d60baa7, 0xa873d246, 
+  0x5d4f883b, 0xa8617e48, 
+  0x5d3e5237, 0xa84f2daa, 0x5d2d189a, 0xa83ce06e, 0x5d1bdb65, 0xa82a9693, 
+  0x5d0a9a9a, 0xa818501c, 
+  0x5cf95638, 0xa8060d08, 0x5ce80e41, 0xa7f3cd59, 0x5cd6c2b5, 0xa7e1910f, 
+  0x5cc57394, 0xa7cf582a, 
+  0x5cb420e0, 0xa7bd22ac, 0x5ca2ca99, 0xa7aaf094, 0x5c9170bf, 0xa798c1e5, 
+  0x5c801354, 0xa786969e, 
+  0x5c6eb258, 0xa7746ec0, 0x5c5d4dcc, 0xa7624a4d, 0x5c4be5b0, 0xa7502943, 
+  0x5c3a7a05, 0xa73e0ba5, 
+  0x5c290acc, 0xa72bf174, 0x5c179806, 0xa719daae, 0x5c0621b2, 0xa707c757, 
+  0x5bf4a7d2, 0xa6f5b76d, 
+  0x5be32a67, 0xa6e3aaf2, 0x5bd1a971, 0xa6d1a1e7, 0x5bc024f0, 0xa6bf9c4b, 
+  0x5bae9ce7, 0xa6ad9a21, 
+  0x5b9d1154, 0xa69b9b68, 0x5b8b8239, 0xa689a022, 0x5b79ef96, 0xa677a84e, 
+  0x5b68596d, 0xa665b3ee, 
+  0x5b56bfbd, 0xa653c303, 0x5b452288, 0xa641d58c, 0x5b3381ce, 0xa62feb8b, 
+  0x5b21dd90, 0xa61e0501, 
+  0x5b1035cf, 0xa60c21ee, 0x5afe8a8b, 0xa5fa4252, 0x5aecdbc5, 0xa5e8662f, 
+  0x5adb297d, 0xa5d68d85, 
+  0x5ac973b5, 0xa5c4b855, 0x5ab7ba6c, 0xa5b2e6a0, 0x5aa5fda5, 0xa5a11866, 
+  0x5a943d5e, 0xa58f4da8, 
+  0x5a82799a, 0xa57d8666, 0x5a70b258, 0xa56bc2a2, 0x5a5ee79a, 0xa55a025b, 
+  0x5a4d1960, 0xa5484594, 
+  0x5a3b47ab, 0xa5368c4b, 0x5a29727b, 0xa524d683, 0x5a1799d1, 0xa513243b, 
+  0x5a05bdae, 0xa5017575, 
+  0x59f3de12, 0xa4efca31, 0x59e1faff, 0xa4de2270, 0x59d01475, 0xa4cc7e32, 
+  0x59be2a74, 0xa4badd78, 
+  0x59ac3cfd, 0xa4a94043, 0x599a4c12, 0xa497a693, 0x598857b2, 0xa486106a, 
+  0x59765fde, 0xa4747dc7, 
+  0x59646498, 0xa462eeac, 0x595265df, 0xa4516319, 0x594063b5, 0xa43fdb10, 
+  0x592e5e19, 0xa42e568f, 
+  0x591c550e, 0xa41cd599, 0x590a4893, 0xa40b582e, 0x58f838a9, 0xa3f9de4e, 
+  0x58e62552, 0xa3e867fa, 
+  0x58d40e8c, 0xa3d6f534, 0x58c1f45b, 0xa3c585fb, 0x58afd6bd, 0xa3b41a50, 
+  0x589db5b3, 0xa3a2b234, 
+  0x588b9140, 0xa3914da8, 0x58796962, 0xa37fecac, 0x58673e1b, 0xa36e8f41, 
+  0x58550f6c, 0xa35d3567, 
+  0x5842dd54, 0xa34bdf20, 0x5830a7d6, 0xa33a8c6c, 0x581e6ef1, 0xa3293d4b, 
+  0x580c32a7, 0xa317f1bf, 
+  0x57f9f2f8, 0xa306a9c8, 0x57e7afe4, 0xa2f56566, 0x57d5696d, 0xa2e4249b, 
+  0x57c31f92, 0xa2d2e766, 
+  0x57b0d256, 0xa2c1adc9, 0x579e81b8, 0xa2b077c5, 0x578c2dba, 0xa29f4559, 
+  0x5779d65b, 0xa28e1687, 
+  0x57677b9d, 0xa27ceb4f, 0x57551d80, 0xa26bc3b2, 0x5742bc06, 0xa25a9fb1, 
+  0x5730572e, 0xa2497f4c, 
+  0x571deefa, 0xa2386284, 0x570b8369, 0xa2274959, 0x56f9147e, 0xa21633cd, 
+  0x56e6a239, 0xa20521e0, 
+  0x56d42c99, 0xa1f41392, 0x56c1b3a1, 0xa1e308e4, 0x56af3750, 0xa1d201d7, 
+  0x569cb7a8, 0xa1c0fe6c, 
+  0x568a34a9, 0xa1affea3, 0x5677ae54, 0xa19f027c, 0x566524aa, 0xa18e09fa, 
+  0x565297ab, 0xa17d151b, 
+  0x56400758, 0xa16c23e1, 0x562d73b2, 0xa15b364d, 0x561adcb9, 0xa14a4c5e, 
+  0x5608426e, 0xa1396617, 
+  0x55f5a4d2, 0xa1288376, 0x55e303e6, 0xa117a47e, 0x55d05faa, 0xa106c92f, 
+  0x55bdb81f, 0xa0f5f189, 
+  0x55ab0d46, 0xa0e51d8c, 0x55985f20, 0xa0d44d3b, 0x5585adad, 0xa0c38095, 
+  0x5572f8ed, 0xa0b2b79b, 
+  0x556040e2, 0xa0a1f24d, 0x554d858d, 0xa09130ad, 0x553ac6ee, 0xa08072ba, 
+  0x55280505, 0xa06fb876, 
+  0x55153fd4, 0xa05f01e1, 0x5502775c, 0xa04e4efc, 0x54efab9c, 0xa03d9fc8, 
+  0x54dcdc96, 0xa02cf444, 
+  0x54ca0a4b, 0xa01c4c73, 0x54b734ba, 0xa00ba853, 0x54a45be6, 0x9ffb07e7, 
+  0x54917fce, 0x9fea6b2f, 
+  0x547ea073, 0x9fd9d22a, 0x546bbdd7, 0x9fc93cdb, 0x5458d7f9, 0x9fb8ab41, 
+  0x5445eedb, 0x9fa81d5e, 
+  0x5433027d, 0x9f979331, 0x542012e1, 0x9f870cbc, 0x540d2005, 0x9f7689ff, 
+  0x53fa29ed, 0x9f660afb, 
+  0x53e73097, 0x9f558fb0, 0x53d43406, 0x9f45181f, 0x53c13439, 0x9f34a449, 
+  0x53ae3131, 0x9f24342f, 
+  0x539b2af0, 0x9f13c7d0, 0x53882175, 0x9f035f2e, 0x537514c2, 0x9ef2fa49, 
+  0x536204d7, 0x9ee29922, 
+  0x534ef1b5, 0x9ed23bb9, 0x533bdb5d, 0x9ec1e210, 0x5328c1d0, 0x9eb18c26, 
+  0x5315a50e, 0x9ea139fd, 
+  0x53028518, 0x9e90eb94, 0x52ef61ee, 0x9e80a0ee, 0x52dc3b92, 0x9e705a09, 
+  0x52c91204, 0x9e6016e8, 
+  0x52b5e546, 0x9e4fd78a, 0x52a2b556, 0x9e3f9bf0, 0x528f8238, 0x9e2f641b, 
+  0x527c4bea, 0x9e1f300b, 
+  0x5269126e, 0x9e0effc1, 0x5255d5c5, 0x9dfed33e, 0x524295f0, 0x9deeaa82, 
+  0x522f52ee, 0x9dde858e, 
+  0x521c0cc2, 0x9dce6463, 0x5208c36a, 0x9dbe4701, 0x51f576ea, 0x9dae2d68, 
+  0x51e22740, 0x9d9e179a, 
+  0x51ced46e, 0x9d8e0597, 0x51bb7e75, 0x9d7df75f, 0x51a82555, 0x9d6decf4, 
+  0x5194c910, 0x9d5de656, 
+  0x518169a5, 0x9d4de385, 0x516e0715, 0x9d3de482, 0x515aa162, 0x9d2de94d, 
+  0x5147388c, 0x9d1df1e9, 
+  0x5133cc94, 0x9d0dfe54, 0x51205d7b, 0x9cfe0e8f, 0x510ceb40, 0x9cee229c, 
+  0x50f975e6, 0x9cde3a7b, 
+  0x50e5fd6d, 0x9cce562c, 0x50d281d5, 0x9cbe75b0, 0x50bf031f, 0x9cae9907, 
+  0x50ab814d, 0x9c9ec033, 
+  0x5097fc5e, 0x9c8eeb34, 0x50847454, 0x9c7f1a0a, 0x5070e92f, 0x9c6f4cb6, 
+  0x505d5af1, 0x9c5f8339, 
+  0x5049c999, 0x9c4fbd93, 0x50363529, 0x9c3ffbc5, 0x50229da1, 0x9c303dcf, 
+  0x500f0302, 0x9c2083b3, 
+  0x4ffb654d, 0x9c10cd70, 0x4fe7c483, 0x9c011b08, 0x4fd420a4, 0x9bf16c7a, 
+  0x4fc079b1, 0x9be1c1c8, 
+  0x4faccfab, 0x9bd21af3, 0x4f992293, 0x9bc277fa, 0x4f857269, 0x9bb2d8de, 
+  0x4f71bf2e, 0x9ba33da0, 
+  0x4f5e08e3, 0x9b93a641, 0x4f4a4f89, 0x9b8412c1, 0x4f369320, 0x9b748320, 
+  0x4f22d3aa, 0x9b64f760, 
+  0x4f0f1126, 0x9b556f81, 0x4efb4b96, 0x9b45eb83, 0x4ee782fb, 0x9b366b68, 
+  0x4ed3b755, 0x9b26ef2f, 
+  0x4ebfe8a5, 0x9b1776da, 0x4eac16eb, 0x9b080268, 0x4e984229, 0x9af891db, 
+  0x4e846a60, 0x9ae92533, 
+  0x4e708f8f, 0x9ad9bc71, 0x4e5cb1b9, 0x9aca5795, 0x4e48d0dd, 0x9abaf6a1, 
+  0x4e34ecfc, 0x9aab9993, 
+  0x4e210617, 0x9a9c406e, 0x4e0d1c30, 0x9a8ceb31, 0x4df92f46, 0x9a7d99de, 
+  0x4de53f5a, 0x9a6e4c74, 
+  0x4dd14c6e, 0x9a5f02f5, 0x4dbd5682, 0x9a4fbd61, 0x4da95d96, 0x9a407bb9, 
+  0x4d9561ac, 0x9a313dfc, 
+  0x4d8162c4, 0x9a22042d, 0x4d6d60df, 0x9a12ce4b, 0x4d595bfe, 0x9a039c57, 
+  0x4d455422, 0x99f46e51, 
+  0x4d31494b, 0x99e5443b, 0x4d1d3b7a, 0x99d61e14, 0x4d092ab0, 0x99c6fbde, 
+  0x4cf516ee, 0x99b7dd99, 
+  0x4ce10034, 0x99a8c345, 0x4ccce684, 0x9999ace3, 0x4cb8c9dd, 0x998a9a74, 
+  0x4ca4aa41, 0x997b8bf8, 
+  0x4c9087b1, 0x996c816f, 0x4c7c622d, 0x995d7adc, 0x4c6839b7, 0x994e783d, 
+  0x4c540e4e, 0x993f7993, 
+  0x4c3fdff4, 0x99307ee0, 0x4c2baea9, 0x99218824, 0x4c177a6e, 0x9912955f, 
+  0x4c034345, 0x9903a691, 
+  0x4bef092d, 0x98f4bbbc, 0x4bdacc28, 0x98e5d4e0, 0x4bc68c36, 0x98d6f1fe, 
+  0x4bb24958, 0x98c81316, 
+  0x4b9e0390, 0x98b93828, 0x4b89badd, 0x98aa6136, 0x4b756f40, 0x989b8e40, 
+  0x4b6120bb, 0x988cbf46, 
+  0x4b4ccf4d, 0x987df449, 0x4b387af9, 0x986f2d4a, 0x4b2423be, 0x98606a49, 
+  0x4b0fc99d, 0x9851ab46, 
+  0x4afb6c98, 0x9842f043, 0x4ae70caf, 0x98343940, 0x4ad2a9e2, 0x9825863d, 
+  0x4abe4433, 0x9816d73b, 
+  0x4aa9dba2, 0x98082c3b, 0x4a957030, 0x97f9853d, 0x4a8101de, 0x97eae242, 
+  0x4a6c90ad, 0x97dc4349, 
+  0x4a581c9e, 0x97cda855, 0x4a43a5b0, 0x97bf1165, 0x4a2f2be6, 0x97b07e7a, 
+  0x4a1aaf3f, 0x97a1ef94, 
+  0x4a062fbd, 0x979364b5, 0x49f1ad61, 0x9784dddc, 0x49dd282a, 0x97765b0a, 
+  0x49c8a01b, 0x9767dc41, 
+  0x49b41533, 0x9759617f, 0x499f8774, 0x974aeac6, 0x498af6df, 0x973c7817, 
+  0x49766373, 0x972e0971, 
+  0x4961cd33, 0x971f9ed7, 0x494d341e, 0x97113847, 0x49389836, 0x9702d5c3, 
+  0x4923f97b, 0x96f4774b, 
+  0x490f57ee, 0x96e61ce0, 0x48fab391, 0x96d7c682, 0x48e60c62, 0x96c97432, 
+  0x48d16265, 0x96bb25f0, 
+  0x48bcb599, 0x96acdbbe, 0x48a805ff, 0x969e959b, 0x48935397, 0x96905388, 
+  0x487e9e64, 0x96821585, 
+  0x4869e665, 0x9673db94, 0x48552b9b, 0x9665a5b4, 0x48406e08, 0x965773e7, 
+  0x482badab, 0x9649462d, 
+  0x4816ea86, 0x963b1c86, 0x48022499, 0x962cf6f2, 0x47ed5be6, 0x961ed574, 
+  0x47d8906d, 0x9610b80a, 
+  0x47c3c22f, 0x96029eb6, 0x47aef12c, 0x95f48977, 0x479a1d67, 0x95e67850, 
+  0x478546de, 0x95d86b3f, 
+  0x47706d93, 0x95ca6247, 0x475b9188, 0x95bc5d66, 0x4746b2bc, 0x95ae5c9f, 
+  0x4731d131, 0x95a05ff0, 
+  0x471cece7, 0x9592675c, 0x470805df, 0x958472e2, 0x46f31c1a, 0x95768283, 
+  0x46de2f99, 0x9568963f, 
+  0x46c9405c, 0x955aae17, 0x46b44e65, 0x954cca0c, 0x469f59b4, 0x953eea1e, 
+  0x468a624a, 0x95310e4e, 
+  0x46756828, 0x9523369c, 0x46606b4e, 0x95156308, 0x464b6bbe, 0x95079394, 
+  0x46366978, 0x94f9c83f, 
+  0x4621647d, 0x94ec010b, 0x460c5cce, 0x94de3df8, 0x45f7526b, 0x94d07f05, 
+  0x45e24556, 0x94c2c435, 
+  0x45cd358f, 0x94b50d87, 0x45b82318, 0x94a75afd, 0x45a30df0, 0x9499ac95, 
+  0x458df619, 0x948c0252, 
+  0x4578db93, 0x947e5c33, 0x4563be60, 0x9470ba39, 0x454e9e80, 0x94631c65, 
+  0x45397bf4, 0x945582b7, 
+  0x452456bd, 0x9447ed2f, 0x450f2edb, 0x943a5bcf, 0x44fa0450, 0x942cce96, 
+  0x44e4d71c, 0x941f4585, 
+  0x44cfa740, 0x9411c09e, 0x44ba74bd, 0x94043fdf, 0x44a53f93, 0x93f6c34a, 
+  0x449007c4, 0x93e94adf, 
+  0x447acd50, 0x93dbd6a0, 0x44659039, 0x93ce668b, 0x4450507e, 0x93c0faa3, 
+  0x443b0e21, 0x93b392e6, 
+  0x4425c923, 0x93a62f57, 0x44108184, 0x9398cff5, 0x43fb3746, 0x938b74c1, 
+  0x43e5ea68, 0x937e1dbb, 
+  0x43d09aed, 0x9370cae4, 0x43bb48d4, 0x93637c3d, 0x43a5f41e, 0x935631c5, 
+  0x43909ccd, 0x9348eb7e, 
+  0x437b42e1, 0x933ba968, 0x4365e65b, 0x932e6b84, 0x4350873c, 0x932131d1, 
+  0x433b2585, 0x9313fc51, 
+  0x4325c135, 0x9306cb04, 0x43105a50, 0x92f99deb, 0x42faf0d4, 0x92ec7505, 
+  0x42e584c3, 0x92df5054, 
+  0x42d0161e, 0x92d22fd9, 0x42baa4e6, 0x92c51392, 0x42a5311b, 0x92b7fb82, 
+  0x428fbabe, 0x92aae7a8, 
+  0x427a41d0, 0x929dd806, 0x4264c653, 0x9290cc9b, 0x424f4845, 0x9283c568, 
+  0x4239c7aa, 0x9276c26d, 
+  0x42244481, 0x9269c3ac, 0x420ebecb, 0x925cc924, 0x41f93689, 0x924fd2d7, 
+  0x41e3abbc, 0x9242e0c4, 
+  0x41ce1e65, 0x9235f2ec, 0x41b88e84, 0x9229094f, 0x41a2fc1a, 0x921c23ef, 
+  0x418d6729, 0x920f42cb, 
+  0x4177cfb1, 0x920265e4, 0x416235b2, 0x91f58d3b, 0x414c992f, 0x91e8b8d0, 
+  0x4136fa27, 0x91dbe8a4, 
+  0x4121589b, 0x91cf1cb6, 0x410bb48c, 0x91c25508, 0x40f60dfb, 0x91b5919a, 
+  0x40e064ea, 0x91a8d26d, 
+  0x40cab958, 0x919c1781, 0x40b50b46, 0x918f60d6, 0x409f5ab6, 0x9182ae6d, 
+  0x4089a7a8, 0x91760047, 
+  0x4073f21d, 0x91695663, 0x405e3a16, 0x915cb0c3, 0x40487f94, 0x91500f67, 
+  0x4032c297, 0x91437250, 
+  0x401d0321, 0x9136d97d, 0x40074132, 0x912a44f0, 0x3ff17cca, 0x911db4a9, 
+  0x3fdbb5ec, 0x911128a8, 
+  0x3fc5ec98, 0x9104a0ee, 0x3fb020ce, 0x90f81d7b, 0x3f9a5290, 0x90eb9e50, 
+  0x3f8481dd, 0x90df236e, 
+  0x3f6eaeb8, 0x90d2acd4, 0x3f58d921, 0x90c63a83, 0x3f430119, 0x90b9cc7d, 
+  0x3f2d26a0, 0x90ad62c0, 
+  0x3f1749b8, 0x90a0fd4e, 0x3f016a61, 0x90949c28, 0x3eeb889c, 0x90883f4d, 
+  0x3ed5a46b, 0x907be6be, 
+  0x3ebfbdcd, 0x906f927c, 0x3ea9d4c3, 0x90634287, 0x3e93e950, 0x9056f6df, 
+  0x3e7dfb73, 0x904aaf86, 
+  0x3e680b2c, 0x903e6c7b, 0x3e52187f, 0x90322dbf, 0x3e3c2369, 0x9025f352, 
+  0x3e262bee, 0x9019bd36, 
+  0x3e10320d, 0x900d8b69, 0x3dfa35c8, 0x90015dee, 0x3de4371f, 0x8ff534c4, 
+  0x3dce3614, 0x8fe90fec, 
+  0x3db832a6, 0x8fdcef66, 0x3da22cd7, 0x8fd0d333, 0x3d8c24a8, 0x8fc4bb53, 
+  0x3d761a19, 0x8fb8a7c7, 
+  0x3d600d2c, 0x8fac988f, 0x3d49fde1, 0x8fa08dab, 0x3d33ec39, 0x8f94871d, 
+  0x3d1dd835, 0x8f8884e4, 
+  0x3d07c1d6, 0x8f7c8701, 0x3cf1a91c, 0x8f708d75, 0x3cdb8e09, 0x8f649840, 
+  0x3cc5709e, 0x8f58a761, 
+  0x3caf50da, 0x8f4cbadb, 0x3c992ec0, 0x8f40d2ad, 0x3c830a50, 0x8f34eed8, 
+  0x3c6ce38a, 0x8f290f5c, 
+  0x3c56ba70, 0x8f1d343a, 0x3c408f03, 0x8f115d72, 0x3c2a6142, 0x8f058b04, 
+  0x3c143130, 0x8ef9bcf2, 
+  0x3bfdfecd, 0x8eedf33b, 0x3be7ca1a, 0x8ee22de0, 0x3bd19318, 0x8ed66ce1, 
+  0x3bbb59c7, 0x8ecab040, 
+  0x3ba51e29, 0x8ebef7fb, 0x3b8ee03e, 0x8eb34415, 0x3b78a007, 0x8ea7948c, 
+  0x3b625d86, 0x8e9be963, 
+  0x3b4c18ba, 0x8e904298, 0x3b35d1a5, 0x8e84a02d, 0x3b1f8848, 0x8e790222, 
+  0x3b093ca3, 0x8e6d6877, 
+  0x3af2eeb7, 0x8e61d32e, 0x3adc9e86, 0x8e564246, 0x3ac64c0f, 0x8e4ab5bf, 
+  0x3aaff755, 0x8e3f2d9b, 
+  0x3a99a057, 0x8e33a9da, 0x3a834717, 0x8e282a7b, 0x3a6ceb96, 0x8e1caf80, 
+  0x3a568dd4, 0x8e1138ea, 
+  0x3a402dd2, 0x8e05c6b7, 0x3a29cb91, 0x8dfa58ea, 0x3a136712, 0x8deeef82, 
+  0x39fd0056, 0x8de38a80, 
+  0x39e6975e, 0x8dd829e4, 0x39d02c2a, 0x8dcccdaf, 0x39b9bebc, 0x8dc175e0, 
+  0x39a34f13, 0x8db6227a, 
+  0x398cdd32, 0x8daad37b, 0x39766919, 0x8d9f88e5, 0x395ff2c9, 0x8d9442b8, 
+  0x39497a43, 0x8d8900f3, 
+  0x3932ff87, 0x8d7dc399, 0x391c8297, 0x8d728aa9, 0x39060373, 0x8d675623, 
+  0x38ef821c, 0x8d5c2609, 
+  0x38d8fe93, 0x8d50fa59, 0x38c278d9, 0x8d45d316, 0x38abf0ef, 0x8d3ab03f, 
+  0x389566d6, 0x8d2f91d5, 
+  0x387eda8e, 0x8d2477d8, 0x38684c19, 0x8d196249, 0x3851bb77, 0x8d0e5127, 
+  0x383b28a9, 0x8d034474, 
+  0x382493b0, 0x8cf83c30, 0x380dfc8d, 0x8ced385b, 0x37f76341, 0x8ce238f6, 
+  0x37e0c7cc, 0x8cd73e01, 
+  0x37ca2a30, 0x8ccc477d, 0x37b38a6d, 0x8cc1556a, 0x379ce885, 0x8cb667c8, 
+  0x37864477, 0x8cab7e98, 
+  0x376f9e46, 0x8ca099da, 0x3758f5f2, 0x8c95b98f, 0x37424b7b, 0x8c8addb7, 
+  0x372b9ee3, 0x8c800652, 
+  0x3714f02a, 0x8c753362, 0x36fe3f52, 0x8c6a64e5, 0x36e78c5b, 0x8c5f9ade, 
+  0x36d0d746, 0x8c54d54c, 
+  0x36ba2014, 0x8c4a142f, 0x36a366c6, 0x8c3f5788, 0x368cab5c, 0x8c349f58, 
+  0x3675edd9, 0x8c29eb9f, 
+  0x365f2e3b, 0x8c1f3c5d, 0x36486c86, 0x8c149192, 0x3631a8b8, 0x8c09eb40, 
+  0x361ae2d3, 0x8bff4966, 
+  0x36041ad9, 0x8bf4ac05, 0x35ed50c9, 0x8bea131e, 0x35d684a6, 0x8bdf7eb0, 
+  0x35bfb66e, 0x8bd4eebc, 
+  0x35a8e625, 0x8bca6343, 0x359213c9, 0x8bbfdc44, 0x357b3f5d, 0x8bb559c1, 
+  0x356468e2, 0x8baadbba, 
+  0x354d9057, 0x8ba0622f, 0x3536b5be, 0x8b95ed21, 0x351fd918, 0x8b8b7c8f, 
+  0x3508fa66, 0x8b81107b, 
+  0x34f219a8, 0x8b76a8e4, 0x34db36df, 0x8b6c45cc, 0x34c4520d, 0x8b61e733, 
+  0x34ad6b32, 0x8b578d18, 
+  0x34968250, 0x8b4d377c, 0x347f9766, 0x8b42e661, 0x3468aa76, 0x8b3899c6, 
+  0x3451bb81, 0x8b2e51ab, 
+  0x343aca87, 0x8b240e11, 0x3423d78a, 0x8b19cef8, 0x340ce28b, 0x8b0f9462, 
+  0x33f5eb89, 0x8b055e4d, 
+  0x33def287, 0x8afb2cbb, 0x33c7f785, 0x8af0ffac, 0x33b0fa84, 0x8ae6d720, 
+  0x3399fb85, 0x8adcb318, 
+  0x3382fa88, 0x8ad29394, 0x336bf78f, 0x8ac87894, 0x3354f29b, 0x8abe6219, 
+  0x333debab, 0x8ab45024, 
+  0x3326e2c3, 0x8aaa42b4, 0x330fd7e1, 0x8aa039cb, 0x32f8cb07, 0x8a963567, 
+  0x32e1bc36, 0x8a8c358b, 
+  0x32caab6f, 0x8a823a36, 0x32b398b3, 0x8a784368, 0x329c8402, 0x8a6e5123, 
+  0x32856d5e, 0x8a646365, 
+  0x326e54c7, 0x8a5a7a31, 0x32573a3f, 0x8a509585, 0x32401dc6, 0x8a46b564, 
+  0x3228ff5c, 0x8a3cd9cc, 
+  0x3211df04, 0x8a3302be, 0x31fabcbd, 0x8a29303b, 0x31e39889, 0x8a1f6243, 
+  0x31cc7269, 0x8a1598d6, 
+  0x31b54a5e, 0x8a0bd3f5, 0x319e2067, 0x8a0213a0, 0x3186f487, 0x89f857d8, 
+  0x316fc6be, 0x89eea09d, 
+  0x3158970e, 0x89e4edef, 0x31416576, 0x89db3fcf, 0x312a31f8, 0x89d1963c, 
+  0x3112fc95, 0x89c7f138, 
+  0x30fbc54d, 0x89be50c3, 0x30e48c22, 0x89b4b4dd, 0x30cd5115, 0x89ab1d87, 
+  0x30b61426, 0x89a18ac0, 
+  0x309ed556, 0x8997fc8a, 0x308794a6, 0x898e72e4, 0x30705217, 0x8984edcf, 
+  0x30590dab, 0x897b6d4c, 
+  0x3041c761, 0x8971f15a, 0x302a7f3a, 0x896879fb, 0x30133539, 0x895f072e, 
+  0x2ffbe95d, 0x895598f3, 
+  0x2fe49ba7, 0x894c2f4c, 0x2fcd4c19, 0x8942ca39, 0x2fb5fab2, 0x893969b9, 
+  0x2f9ea775, 0x89300dce, 
+  0x2f875262, 0x8926b677, 0x2f6ffb7a, 0x891d63b5, 0x2f58a2be, 0x89141589, 
+  0x2f41482e, 0x890acbf2, 
+  0x2f29ebcc, 0x890186f2, 0x2f128d99, 0x88f84687, 0x2efb2d95, 0x88ef0ab4, 
+  0x2ee3cbc1, 0x88e5d378, 
+  0x2ecc681e, 0x88dca0d3, 0x2eb502ae, 0x88d372c6, 0x2e9d9b70, 0x88ca4951, 
+  0x2e863267, 0x88c12475, 
+  0x2e6ec792, 0x88b80432, 0x2e575af3, 0x88aee888, 0x2e3fec8b, 0x88a5d177, 
+  0x2e287c5a, 0x889cbf01, 
+  0x2e110a62, 0x8893b125, 0x2df996a3, 0x888aa7e3, 0x2de2211e, 0x8881a33d, 
+  0x2dcaa9d5, 0x8878a332, 
+  0x2db330c7, 0x886fa7c2, 0x2d9bb5f6, 0x8866b0ef, 0x2d843964, 0x885dbeb8, 
+  0x2d6cbb10, 0x8854d11e, 
+  0x2d553afc, 0x884be821, 0x2d3db928, 0x884303c1, 0x2d263596, 0x883a23ff, 
+  0x2d0eb046, 0x883148db, 
+  0x2cf72939, 0x88287256, 0x2cdfa071, 0x881fa06f, 0x2cc815ee, 0x8816d327, 
+  0x2cb089b1, 0x880e0a7f, 
+  0x2c98fbba, 0x88054677, 0x2c816c0c, 0x87fc870f, 0x2c69daa6, 0x87f3cc48, 
+  0x2c52478a, 0x87eb1621, 
+  0x2c3ab2b9, 0x87e2649b, 0x2c231c33, 0x87d9b7b7, 0x2c0b83fa, 0x87d10f75, 
+  0x2bf3ea0d, 0x87c86bd5, 
+  0x2bdc4e6f, 0x87bfccd7, 0x2bc4b120, 0x87b7327d, 0x2bad1221, 0x87ae9cc5, 
+  0x2b957173, 0x87a60bb1, 
+  0x2b7dcf17, 0x879d7f41, 0x2b662b0e, 0x8794f774, 0x2b4e8558, 0x878c744d, 
+  0x2b36ddf7, 0x8783f5ca, 
+  0x2b1f34eb, 0x877b7bec, 0x2b078a36, 0x877306b4, 0x2aefddd8, 0x876a9621, 
+  0x2ad82fd2, 0x87622a35, 
+  0x2ac08026, 0x8759c2ef, 0x2aa8ced3, 0x87516050, 0x2a911bdc, 0x87490258, 
+  0x2a796740, 0x8740a907, 
+  0x2a61b101, 0x8738545e, 0x2a49f920, 0x8730045d, 0x2a323f9e, 0x8727b905, 
+  0x2a1a847b, 0x871f7255, 
+  0x2a02c7b8, 0x8717304e, 0x29eb0957, 0x870ef2f1, 0x29d34958, 0x8706ba3d, 
+  0x29bb87bc, 0x86fe8633, 
+  0x29a3c485, 0x86f656d3, 0x298bffb2, 0x86ee2c1e, 0x29743946, 0x86e60614, 
+  0x295c7140, 0x86dde4b5, 
+  0x2944a7a2, 0x86d5c802, 0x292cdc6d, 0x86cdaffa, 0x29150fa1, 0x86c59c9f, 
+  0x28fd4140, 0x86bd8df0, 
+  0x28e5714b, 0x86b583ee, 0x28cd9fc1, 0x86ad7e99, 0x28b5cca5, 0x86a57df2, 
+  0x289df7f8, 0x869d81f8, 
+  0x288621b9, 0x86958aac, 0x286e49ea, 0x868d980e, 0x2856708d, 0x8685aa20, 
+  0x283e95a1, 0x867dc0e0, 
+  0x2826b928, 0x8675dc4f, 0x280edb23, 0x866dfc6e, 0x27f6fb92, 0x8666213c, 
+  0x27df1a77, 0x865e4abb, 
+  0x27c737d3, 0x865678eb, 0x27af53a6, 0x864eabcb, 0x27976df1, 0x8646e35c, 
+  0x277f86b5, 0x863f1f9e, 
+  0x27679df4, 0x86376092, 0x274fb3ae, 0x862fa638, 0x2737c7e3, 0x8627f091, 
+  0x271fda96, 0x86203f9c, 
+  0x2707ebc7, 0x86189359, 0x26effb76, 0x8610ebca, 0x26d809a5, 0x860948ef, 
+  0x26c01655, 0x8601aac7, 
+  0x26a82186, 0x85fa1153, 0x26902b39, 0x85f27c93, 0x26783370, 0x85eaec88, 
+  0x26603a2c, 0x85e36132, 
+  0x26483f6c, 0x85dbda91, 0x26304333, 0x85d458a6, 0x26184581, 0x85ccdb70, 
+  0x26004657, 0x85c562f1, 
+  0x25e845b6, 0x85bdef28, 0x25d0439f, 0x85b68015, 0x25b84012, 0x85af15b9, 
+  0x25a03b11, 0x85a7b015, 
+  0x2588349d, 0x85a04f28, 0x25702cb7, 0x8598f2f3, 0x2558235f, 0x85919b76, 
+  0x25401896, 0x858a48b1, 
+  0x25280c5e, 0x8582faa5, 0x250ffeb7, 0x857bb152, 0x24f7efa2, 0x85746cb8, 
+  0x24dfdf20, 0x856d2cd7, 
+  0x24c7cd33, 0x8565f1b0, 0x24afb9da, 0x855ebb44, 0x2497a517, 0x85578991, 
+  0x247f8eec, 0x85505c99, 
+  0x24677758, 0x8549345c, 0x244f5e5c, 0x854210db, 0x243743fa, 0x853af214, 
+  0x241f2833, 0x8533d809, 
+  0x24070b08, 0x852cc2bb, 0x23eeec78, 0x8525b228, 0x23d6cc87, 0x851ea652, 
+  0x23beab33, 0x85179f39, 
+  0x23a6887f, 0x85109cdd, 0x238e646a, 0x85099f3e, 0x23763ef7, 0x8502a65c, 
+  0x235e1826, 0x84fbb239, 
+  0x2345eff8, 0x84f4c2d4, 0x232dc66d, 0x84edd82d, 0x23159b88, 0x84e6f244, 
+  0x22fd6f48, 0x84e0111b, 
+  0x22e541af, 0x84d934b1, 0x22cd12bd, 0x84d25d06, 0x22b4e274, 0x84cb8a1b, 
+  0x229cb0d5, 0x84c4bbf0, 
+  0x22847de0, 0x84bdf286, 0x226c4996, 0x84b72ddb, 0x225413f8, 0x84b06df2, 
+  0x223bdd08, 0x84a9b2ca, 
+  0x2223a4c5, 0x84a2fc62, 0x220b6b32, 0x849c4abd, 0x21f3304f, 0x84959dd9, 
+  0x21daf41d, 0x848ef5b7, 
+  0x21c2b69c, 0x84885258, 0x21aa77cf, 0x8481b3bb, 0x219237b5, 0x847b19e1, 
+  0x2179f64f, 0x847484ca, 
+  0x2161b3a0, 0x846df477, 0x21496fa7, 0x846768e7, 0x21312a65, 0x8460e21a, 
+  0x2118e3dc, 0x845a6012, 
+  0x21009c0c, 0x8453e2cf, 0x20e852f6, 0x844d6a50, 0x20d0089c, 0x8446f695, 
+  0x20b7bcfe, 0x844087a0, 
+  0x209f701c, 0x843a1d70, 0x208721f9, 0x8433b806, 0x206ed295, 0x842d5762, 
+  0x205681f1, 0x8426fb84, 
+  0x203e300d, 0x8420a46c, 0x2025dcec, 0x841a521a, 0x200d888d, 0x84140490, 
+  0x1ff532f2, 0x840dbbcc, 
+  0x1fdcdc1b, 0x840777d0, 0x1fc4840a, 0x8401389b, 0x1fac2abf, 0x83fafe2e, 
+  0x1f93d03c, 0x83f4c889, 
+  0x1f7b7481, 0x83ee97ad, 0x1f63178f, 0x83e86b99, 0x1f4ab968, 0x83e2444d, 
+  0x1f325a0b, 0x83dc21cb, 
+  0x1f19f97b, 0x83d60412, 0x1f0197b8, 0x83cfeb22, 0x1ee934c3, 0x83c9d6fc, 
+  0x1ed0d09d, 0x83c3c7a0, 
+  0x1eb86b46, 0x83bdbd0e, 0x1ea004c1, 0x83b7b746, 0x1e879d0d, 0x83b1b649, 
+  0x1e6f342c, 0x83abba17, 
+  0x1e56ca1e, 0x83a5c2b0, 0x1e3e5ee5, 0x839fd014, 0x1e25f282, 0x8399e244, 
+  0x1e0d84f5, 0x8393f940, 
+  0x1df5163f, 0x838e1507, 0x1ddca662, 0x8388359b, 0x1dc4355e, 0x83825afb, 
+  0x1dabc334, 0x837c8528, 
+  0x1d934fe5, 0x8376b422, 0x1d7adb73, 0x8370e7e9, 0x1d6265dd, 0x836b207d, 
+  0x1d49ef26, 0x83655ddf, 
+  0x1d31774d, 0x835fa00f, 0x1d18fe54, 0x8359e70d, 0x1d00843d, 0x835432d8, 
+  0x1ce80906, 0x834e8373, 
+  0x1ccf8cb3, 0x8348d8dc, 0x1cb70f43, 0x83433314, 0x1c9e90b8, 0x833d921b, 
+  0x1c861113, 0x8337f5f1, 
+  0x1c6d9053, 0x83325e97, 0x1c550e7c, 0x832ccc0d, 0x1c3c8b8c, 0x83273e52, 
+  0x1c240786, 0x8321b568, 
+  0x1c0b826a, 0x831c314e, 0x1bf2fc3a, 0x8316b205, 0x1bda74f6, 0x8311378d, 
+  0x1bc1ec9e, 0x830bc1e6, 
+  0x1ba96335, 0x83065110, 0x1b90d8bb, 0x8300e50b, 0x1b784d30, 0x82fb7dd8, 
+  0x1b5fc097, 0x82f61b77, 
+  0x1b4732ef, 0x82f0bde8, 0x1b2ea43a, 0x82eb652b, 0x1b161479, 0x82e61141, 
+  0x1afd83ad, 0x82e0c22a, 
+  0x1ae4f1d6, 0x82db77e5, 0x1acc5ef6, 0x82d63274, 0x1ab3cb0d, 0x82d0f1d5, 
+  0x1a9b361d, 0x82cbb60b, 
+  0x1a82a026, 0x82c67f14, 0x1a6a0929, 0x82c14cf1, 0x1a517128, 0x82bc1fa2, 
+  0x1a38d823, 0x82b6f727, 
+  0x1a203e1b, 0x82b1d381, 0x1a07a311, 0x82acb4b0, 0x19ef0707, 0x82a79ab3, 
+  0x19d669fc, 0x82a2858c, 
+  0x19bdcbf3, 0x829d753a, 0x19a52ceb, 0x829869be, 0x198c8ce7, 0x82936317, 
+  0x1973ebe6, 0x828e6146, 
+  0x195b49ea, 0x8289644b, 0x1942a6f3, 0x82846c26, 0x192a0304, 0x827f78d8, 
+  0x19115e1c, 0x827a8a61, 
+  0x18f8b83c, 0x8275a0c0, 0x18e01167, 0x8270bbf7, 0x18c7699b, 0x826bdc04, 
+  0x18aec0db, 0x826700e9, 
+  0x18961728, 0x82622aa6, 0x187d6c82, 0x825d593a, 0x1864c0ea, 0x82588ca7, 
+  0x184c1461, 0x8253c4eb, 
+  0x183366e9, 0x824f0208, 0x181ab881, 0x824a43fe, 0x1802092c, 0x82458acc, 
+  0x17e958ea, 0x8240d673, 
+  0x17d0a7bc, 0x823c26f3, 0x17b7f5a3, 0x82377c4c, 0x179f429f, 0x8232d67f, 
+  0x17868eb3, 0x822e358b, 
+  0x176dd9de, 0x82299971, 0x17552422, 0x82250232, 0x173c6d80, 0x82206fcc, 
+  0x1723b5f9, 0x821be240, 
+  0x170afd8d, 0x82175990, 0x16f2443e, 0x8212d5b9, 0x16d98a0c, 0x820e56be, 
+  0x16c0cef9, 0x8209dc9e, 
+  0x16a81305, 0x82056758, 0x168f5632, 0x8200f6ef, 0x1676987f, 0x81fc8b60, 
+  0x165dd9f0, 0x81f824ae, 
+  0x16451a83, 0x81f3c2d7, 0x162c5a3b, 0x81ef65dc, 0x16139918, 0x81eb0dbe, 
+  0x15fad71b, 0x81e6ba7c, 
+  0x15e21445, 0x81e26c16, 0x15c95097, 0x81de228d, 0x15b08c12, 0x81d9dde1, 
+  0x1597c6b7, 0x81d59e13, 
+  0x157f0086, 0x81d16321, 0x15663982, 0x81cd2d0c, 0x154d71aa, 0x81c8fbd6, 
+  0x1534a901, 0x81c4cf7d, 
+  0x151bdf86, 0x81c0a801, 0x1503153a, 0x81bc8564, 0x14ea4a1f, 0x81b867a5, 
+  0x14d17e36, 0x81b44ec4, 
+  0x14b8b17f, 0x81b03ac2, 0x149fe3fc, 0x81ac2b9e, 0x148715ae, 0x81a82159, 
+  0x146e4694, 0x81a41bf4, 
+  0x145576b1, 0x81a01b6d, 0x143ca605, 0x819c1fc5, 0x1423d492, 0x819828fd, 
+  0x140b0258, 0x81943715, 
+  0x13f22f58, 0x81904a0c, 0x13d95b93, 0x818c61e3, 0x13c0870a, 0x81887e9a, 
+  0x13a7b1bf, 0x8184a032, 
+  0x138edbb1, 0x8180c6a9, 0x137604e2, 0x817cf201, 0x135d2d53, 0x8179223a, 
+  0x13445505, 0x81755754, 
+  0x132b7bf9, 0x8171914e, 0x1312a230, 0x816dd02a, 0x12f9c7aa, 0x816a13e6, 
+  0x12e0ec6a, 0x81665c84, 
+  0x12c8106f, 0x8162aa04, 0x12af33ba, 0x815efc65, 0x1296564d, 0x815b53a8, 
+  0x127d7829, 0x8157afcd, 
+  0x1264994e, 0x815410d4, 0x124bb9be, 0x815076bd, 0x1232d979, 0x814ce188, 
+  0x1219f880, 0x81495136, 
+  0x120116d5, 0x8145c5c7, 0x11e83478, 0x81423f3a, 0x11cf516a, 0x813ebd90, 
+  0x11b66dad, 0x813b40ca, 
+  0x119d8941, 0x8137c8e6, 0x1184a427, 0x813455e6, 0x116bbe60, 0x8130e7c9, 
+  0x1152d7ed, 0x812d7e8f, 
+  0x1139f0cf, 0x812a1a3a, 0x11210907, 0x8126bac8, 0x11082096, 0x8123603a, 
+  0x10ef377d, 0x81200a90, 
+  0x10d64dbd, 0x811cb9ca, 0x10bd6356, 0x81196de9, 0x10a4784b, 0x811626ec, 
+  0x108b8c9b, 0x8112e4d4, 
+  0x1072a048, 0x810fa7a0, 0x1059b352, 0x810c6f52, 0x1040c5bb, 0x81093be8, 
+  0x1027d784, 0x81060d63, 
+  0x100ee8ad, 0x8102e3c4, 0xff5f938, 0x80ffbf0a, 0xfdd0926, 0x80fc9f35, 
+  0xfc41876, 0x80f98446, 
+  0xfab272b, 0x80f66e3c, 0xf923546, 0x80f35d19, 0xf7942c7, 0x80f050db, 
+  0xf604faf, 0x80ed4984, 
+  0xf475bff, 0x80ea4712, 0xf2e67b8, 0x80e74987, 0xf1572dc, 0x80e450e2, 
+  0xefc7d6b, 0x80e15d24, 
+  0xee38766, 0x80de6e4c, 0xeca90ce, 0x80db845b, 0xeb199a4, 0x80d89f51, 
+  0xe98a1e9, 0x80d5bf2e, 
+  0xe7fa99e, 0x80d2e3f2, 0xe66b0c3, 0x80d00d9d, 0xe4db75b, 0x80cd3c2f, 
+  0xe34bd66, 0x80ca6fa9, 
+  0xe1bc2e4, 0x80c7a80a, 0xe02c7d7, 0x80c4e553, 0xde9cc40, 0x80c22784, 
+  0xdd0d01f, 0x80bf6e9c, 
+  0xdb7d376, 0x80bcba9d, 0xd9ed646, 0x80ba0b85, 0xd85d88f, 0x80b76156, 
+  0xd6cda53, 0x80b4bc0e, 
+  0xd53db92, 0x80b21baf, 0xd3adc4e, 0x80af8039, 0xd21dc87, 0x80ace9ab, 
+  0xd08dc3f, 0x80aa5806, 
+  0xcefdb76, 0x80a7cb49, 0xcd6da2d, 0x80a54376, 0xcbdd865, 0x80a2c08b, 
+  0xca4d620, 0x80a04289, 
+  0xc8bd35e, 0x809dc971, 0xc72d020, 0x809b5541, 0xc59cc68, 0x8098e5fb, 
+  0xc40c835, 0x80967b9f, 
+  0xc27c389, 0x8094162c, 0xc0ebe66, 0x8091b5a2, 0xbf5b8cb, 0x808f5a02, 
+  0xbdcb2bb, 0x808d034c, 
+  0xbc3ac35, 0x808ab180, 0xbaaa53b, 0x8088649e, 0xb919dcf, 0x80861ca6, 
+  0xb7895f0, 0x8083d998, 
+  0xb5f8d9f, 0x80819b74, 0xb4684df, 0x807f623b, 0xb2d7baf, 0x807d2dec, 
+  0xb147211, 0x807afe87, 
+  0xafb6805, 0x8078d40d, 0xae25d8d, 0x8076ae7e, 0xac952aa, 0x80748dd9, 
+  0xab0475c, 0x8072721f, 
+  0xa973ba5, 0x80705b50, 0xa7e2f85, 0x806e496c, 0xa6522fe, 0x806c3c74, 
+  0xa4c1610, 0x806a3466, 
+  0xa3308bd, 0x80683143, 0xa19fb04, 0x8066330c, 0xa00ece8, 0x806439c0, 
+  0x9e7de6a, 0x80624560, 
+  0x9cecf89, 0x806055eb, 0x9b5c048, 0x805e6b62, 0x99cb0a7, 0x805c85c4, 
+  0x983a0a7, 0x805aa512, 
+  0x96a9049, 0x8058c94c, 0x9517f8f, 0x8056f272, 0x9386e78, 0x80552084, 
+  0x91f5d06, 0x80535381, 
+  0x9064b3a, 0x80518b6b, 0x8ed3916, 0x804fc841, 0x8d42699, 0x804e0a04, 
+  0x8bb13c5, 0x804c50b2, 
+  0x8a2009a, 0x804a9c4d, 0x888ed1b, 0x8048ecd5, 0x86fd947, 0x80474248, 
+  0x856c520, 0x80459ca9, 
+  0x83db0a7, 0x8043fbf6, 0x8249bdd, 0x80426030, 0x80b86c2, 0x8040c956, 
+  0x7f27157, 0x803f376a, 
+  0x7d95b9e, 0x803daa6a, 0x7c04598, 0x803c2257, 0x7a72f45, 0x803a9f31, 
+  0x78e18a7, 0x803920f8, 
+  0x77501be, 0x8037a7ac, 0x75bea8c, 0x8036334e, 0x742d311, 0x8034c3dd, 
+  0x729bb4e, 0x80335959, 
+  0x710a345, 0x8031f3c2, 0x6f78af6, 0x80309318, 0x6de7262, 0x802f375d, 
+  0x6c5598a, 0x802de08e, 
+  0x6ac406f, 0x802c8ead, 0x6932713, 0x802b41ba, 0x67a0d76, 0x8029f9b4, 
+  0x660f398, 0x8028b69c, 
+  0x647d97c, 0x80277872, 0x62ebf22, 0x80263f36, 0x615a48b, 0x80250ae7, 
+  0x5fc89b8, 0x8023db86, 
+  0x5e36ea9, 0x8022b114, 0x5ca5361, 0x80218b8f, 0x5b137df, 0x80206af8, 
+  0x5981c26, 0x801f4f4f, 
+  0x57f0035, 0x801e3895, 0x565e40d, 0x801d26c8, 0x54cc7b1, 0x801c19ea, 
+  0x533ab20, 0x801b11fa, 
+  0x51a8e5c, 0x801a0ef8, 0x5017165, 0x801910e4, 0x4e8543e, 0x801817bf, 
+  0x4cf36e5, 0x80172388, 
+  0x4b6195d, 0x80163440, 0x49cfba7, 0x801549e6, 0x483ddc3, 0x8014647b, 
+  0x46abfb3, 0x801383fe, 
+  0x451a177, 0x8012a86f, 0x4388310, 0x8011d1d0, 0x41f6480, 0x8011001f, 
+  0x40645c7, 0x8010335c, 
+  0x3ed26e6, 0x800f6b88, 0x3d407df, 0x800ea8a3, 0x3bae8b2, 0x800deaad, 
+  0x3a1c960, 0x800d31a5, 
+  0x388a9ea, 0x800c7d8c, 0x36f8a51, 0x800bce63, 0x3566a96, 0x800b2427, 
+  0x33d4abb, 0x800a7edb, 
+  0x3242abf, 0x8009de7e, 0x30b0aa4, 0x80094310, 0x2f1ea6c, 0x8008ac90, 
+  0x2d8ca16, 0x80081b00, 
+  0x2bfa9a4, 0x80078e5e, 0x2a68917, 0x800706ac, 0x28d6870, 0x800683e8, 
+  0x27447b0, 0x80060614, 
+  0x25b26d7, 0x80058d2f, 0x24205e8, 0x80051939, 0x228e4e2, 0x8004aa32, 
+  0x20fc3c6, 0x8004401a, 
+  0x1f6a297, 0x8003daf1, 0x1dd8154, 0x80037ab7, 0x1c45ffe, 0x80031f6d, 
+  0x1ab3e97, 0x8002c912, 
+  0x1921d20, 0x800277a6, 0x178fb99, 0x80022b29, 0x15fda03, 0x8001e39b, 
+  0x146b860, 0x8001a0fd, 
+  0x12d96b1, 0x8001634e, 0x11474f6, 0x80012a8e, 0xfb5330, 0x8000f6bd, 
+  0xe23160, 0x8000c7dc, 
+  0xc90f88, 0x80009dea, 0xafeda8, 0x800078e7, 0x96cbc1, 0x800058d4, 0x7da9d4, 
+  0x80003daf, 
+  0x6487e3, 0x8000277a, 0x4b65ee, 0x80001635, 0x3243f5, 0x800009df, 0x1921fb, 
+  0x80000278, 
+}; 
+ 
+/**  
+* \par  
+* cosFactor tables are generated using the formula : <pre>cos_factors[n] = 2 * cos((2n+1)*pi/(4*N))</pre>  
+* \par  
+* C command to generate the table  
+* <pre>  
+* for(i = 0; i< N; i++)  
+* {  
+*   cos_factors[i]= 2 * cos((2*i+1)*c/2);  
+* } </pre>  
+* \par  
+* where <code>N</code> is the number of factors to generate and <code>c</code> is <code>pi/(2*N)</code>  
+* \par  
+* Then converted to q31 format by multiplying with 2^31 and saturated if required.  
+*/ 
+ 
+ 
+static const q31_t cos_factorsQ31_128[128] = { 
+  0x7fff6216, 0x7ffa72d1, 0x7ff09478, 0x7fe1c76b, 0x7fce0c3e, 0x7fb563b3, 
+  0x7f97cebd, 0x7f754e80, 
+  0x7f4de451, 0x7f2191b4, 0x7ef05860, 0x7eba3a39, 0x7e7f3957, 0x7e3f57ff, 
+  0x7dfa98a8, 0x7db0fdf8, 
+  0x7d628ac6, 0x7d0f4218, 0x7cb72724, 0x7c5a3d50, 0x7bf88830, 0x7b920b89, 
+  0x7b26cb4f, 0x7ab6cba4, 
+  0x7a4210d8, 0x79c89f6e, 0x794a7c12, 0x78c7aba2, 0x78403329, 0x77b417df, 
+  0x77235f2d, 0x768e0ea6, 
+  0x75f42c0b, 0x7555bd4c, 0x74b2c884, 0x740b53fb, 0x735f6626, 0x72af05a7, 
+  0x71fa3949, 0x71410805, 
+  0x708378ff, 0x6fc19385, 0x6efb5f12, 0x6e30e34a, 0x6d6227fa, 0x6c8f351c, 
+  0x6bb812d1, 0x6adcc964, 
+  0x69fd614a, 0x6919e320, 0x683257ab, 0x6746c7d8, 0x66573cbb, 0x6563bf92, 
+  0x646c59bf, 0x637114cc, 
+  0x6271fa69, 0x616f146c, 0x60686ccf, 0x5f5e0db3, 0x5e50015d, 0x5d3e5237, 
+  0x5c290acc, 0x5b1035cf, 
+  0x59f3de12, 0x58d40e8c, 0x57b0d256, 0x568a34a9, 0x556040e2, 0x5433027d, 
+  0x53028518, 0x51ced46e, 
+  0x5097fc5e, 0x4f5e08e3, 0x4e210617, 0x4ce10034, 0x4b9e0390, 0x4a581c9e, 
+  0x490f57ee, 0x47c3c22f, 
+  0x46756828, 0x452456bd, 0x43d09aed, 0x427a41d0, 0x4121589b, 0x3fc5ec98, 
+  0x3e680b2c, 0x3d07c1d6, 
+  0x3ba51e29, 0x3a402dd2, 0x38d8fe93, 0x376f9e46, 0x36041ad9, 0x34968250, 
+  0x3326e2c3, 0x31b54a5e, 
+  0x3041c761, 0x2ecc681e, 0x2d553afc, 0x2bdc4e6f, 0x2a61b101, 0x28e5714b, 
+  0x27679df4, 0x25e845b6, 
+  0x24677758, 0x22e541af, 0x2161b3a0, 0x1fdcdc1b, 0x1e56ca1e, 0x1ccf8cb3, 
+  0x1b4732ef, 0x19bdcbf3, 
+  0x183366e9, 0x16a81305, 0x151bdf86, 0x138edbb1, 0x120116d5, 0x1072a048, 
+  0xee38766, 0xd53db92, 
+  0xbc3ac35, 0xa3308bd, 0x8a2009a, 0x710a345, 0x57f0035, 0x3ed26e6, 0x25b26d7, 
+  0xc90f88, 
+}; 
+ 
+static const q31_t cos_factorsQ31_512[512] = { 
+  0x7ffff621, 0x7fffa72c, 0x7fff0943, 0x7ffe1c65, 0x7ffce093, 0x7ffb55ce, 
+  0x7ff97c18, 0x7ff75370, 
+  0x7ff4dbd9, 0x7ff21553, 0x7feeffe1, 0x7feb9b85, 0x7fe7e841, 0x7fe3e616, 
+  0x7fdf9508, 0x7fdaf519, 
+  0x7fd6064c, 0x7fd0c8a3, 0x7fcb3c23, 0x7fc560cf, 0x7fbf36aa, 0x7fb8bdb8, 
+  0x7fb1f5fc, 0x7faadf7c, 
+  0x7fa37a3c, 0x7f9bc640, 0x7f93c38c, 0x7f8b7227, 0x7f82d214, 0x7f79e35a, 
+  0x7f70a5fe, 0x7f671a05, 
+  0x7f5d3f75, 0x7f531655, 0x7f489eaa, 0x7f3dd87c, 0x7f32c3d1, 0x7f2760af, 
+  0x7f1baf1e, 0x7f0faf25, 
+  0x7f0360cb, 0x7ef6c418, 0x7ee9d914, 0x7edc9fc6, 0x7ecf1837, 0x7ec14270, 
+  0x7eb31e78, 0x7ea4ac58, 
+  0x7e95ec1a, 0x7e86ddc6, 0x7e778166, 0x7e67d703, 0x7e57dea7, 0x7e47985b, 
+  0x7e37042a, 0x7e26221f, 
+  0x7e14f242, 0x7e0374a0, 0x7df1a942, 0x7ddf9034, 0x7dcd2981, 0x7dba7534, 
+  0x7da77359, 0x7d9423fc, 
+  0x7d808728, 0x7d6c9ce9, 0x7d58654d, 0x7d43e05e, 0x7d2f0e2b, 0x7d19eebf, 
+  0x7d048228, 0x7ceec873, 
+  0x7cd8c1ae, 0x7cc26de5, 0x7cabcd28, 0x7c94df83, 0x7c7da505, 0x7c661dbc, 
+  0x7c4e49b7, 0x7c362904, 
+  0x7c1dbbb3, 0x7c0501d2, 0x7bebfb70, 0x7bd2a89e, 0x7bb9096b, 0x7b9f1de6, 
+  0x7b84e61f, 0x7b6a6227, 
+  0x7b4f920e, 0x7b3475e5, 0x7b190dbc, 0x7afd59a4, 0x7ae159ae, 0x7ac50dec, 
+  0x7aa8766f, 0x7a8b9348, 
+  0x7a6e648a, 0x7a50ea47, 0x7a332490, 0x7a151378, 0x79f6b711, 0x79d80f6f, 
+  0x79b91ca4, 0x7999dec4, 
+  0x797a55e0, 0x795a820e, 0x793a6361, 0x7919f9ec, 0x78f945c3, 0x78d846fb, 
+  0x78b6fda8, 0x789569df, 
+  0x78738bb3, 0x7851633b, 0x782ef08b, 0x780c33b8, 0x77e92cd9, 0x77c5dc01, 
+  0x77a24148, 0x777e5cc3, 
+  0x775a2e89, 0x7735b6af, 0x7710f54c, 0x76ebea77, 0x76c69647, 0x76a0f8d2, 
+  0x767b1231, 0x7654e279, 
+  0x762e69c4, 0x7607a828, 0x75e09dbd, 0x75b94a9c, 0x7591aedd, 0x7569ca99, 
+  0x75419de7, 0x751928e0, 
+  0x74f06b9e, 0x74c7663a, 0x749e18cd, 0x74748371, 0x744aa63f, 0x74208150, 
+  0x73f614c0, 0x73cb60a8, 
+  0x73a06522, 0x73752249, 0x73499838, 0x731dc70a, 0x72f1aed9, 0x72c54fc1, 
+  0x7298a9dd, 0x726bbd48, 
+  0x723e8a20, 0x7211107e, 0x71e35080, 0x71b54a41, 0x7186fdde, 0x71586b74, 
+  0x7129931f, 0x70fa74fc, 
+  0x70cb1128, 0x709b67c0, 0x706b78e3, 0x703b44ad, 0x700acb3c, 0x6fda0cae, 
+  0x6fa90921, 0x6f77c0b3, 
+  0x6f463383, 0x6f1461b0, 0x6ee24b57, 0x6eaff099, 0x6e7d5193, 0x6e4a6e66, 
+  0x6e174730, 0x6de3dc11, 
+  0x6db02d29, 0x6d7c3a98, 0x6d48047e, 0x6d138afb, 0x6cdece2f, 0x6ca9ce3b, 
+  0x6c748b3f, 0x6c3f055d, 
+  0x6c093cb6, 0x6bd3316a, 0x6b9ce39b, 0x6b66536b, 0x6b2f80fb, 0x6af86c6c, 
+  0x6ac115e2, 0x6a897d7d, 
+  0x6a51a361, 0x6a1987b0, 0x69e12a8c, 0x69a88c19, 0x696fac78, 0x69368bce, 
+  0x68fd2a3d, 0x68c387e9, 
+  0x6889a4f6, 0x684f8186, 0x68151dbe, 0x67da79c3, 0x679f95b7, 0x676471c0, 
+  0x67290e02, 0x66ed6aa1, 
+  0x66b187c3, 0x6675658c, 0x66390422, 0x65fc63a9, 0x65bf8447, 0x65826622, 
+  0x6545095f, 0x65076e25, 
+  0x64c99498, 0x648b7ce0, 0x644d2722, 0x640e9386, 0x63cfc231, 0x6390b34a, 
+  0x635166f9, 0x6311dd64, 
+  0x62d216b3, 0x6292130c, 0x6251d298, 0x6211557e, 0x61d09be5, 0x618fa5f7, 
+  0x614e73da, 0x610d05b7, 
+  0x60cb5bb7, 0x60897601, 0x604754bf, 0x6004f819, 0x5fc26038, 0x5f7f8d46, 
+  0x5f3c7f6b, 0x5ef936d1, 
+  0x5eb5b3a2, 0x5e71f606, 0x5e2dfe29, 0x5de9cc33, 0x5da5604f, 0x5d60baa7, 
+  0x5d1bdb65, 0x5cd6c2b5, 
+  0x5c9170bf, 0x5c4be5b0, 0x5c0621b2, 0x5bc024f0, 0x5b79ef96, 0x5b3381ce, 
+  0x5aecdbc5, 0x5aa5fda5, 
+  0x5a5ee79a, 0x5a1799d1, 0x59d01475, 0x598857b2, 0x594063b5, 0x58f838a9, 
+  0x58afd6bd, 0x58673e1b, 
+  0x581e6ef1, 0x57d5696d, 0x578c2dba, 0x5742bc06, 0x56f9147e, 0x56af3750, 
+  0x566524aa, 0x561adcb9, 
+  0x55d05faa, 0x5585adad, 0x553ac6ee, 0x54efab9c, 0x54a45be6, 0x5458d7f9, 
+  0x540d2005, 0x53c13439, 
+  0x537514c2, 0x5328c1d0, 0x52dc3b92, 0x528f8238, 0x524295f0, 0x51f576ea, 
+  0x51a82555, 0x515aa162, 
+  0x510ceb40, 0x50bf031f, 0x5070e92f, 0x50229da1, 0x4fd420a4, 0x4f857269, 
+  0x4f369320, 0x4ee782fb, 
+  0x4e984229, 0x4e48d0dd, 0x4df92f46, 0x4da95d96, 0x4d595bfe, 0x4d092ab0, 
+  0x4cb8c9dd, 0x4c6839b7, 
+  0x4c177a6e, 0x4bc68c36, 0x4b756f40, 0x4b2423be, 0x4ad2a9e2, 0x4a8101de, 
+  0x4a2f2be6, 0x49dd282a, 
+  0x498af6df, 0x49389836, 0x48e60c62, 0x48935397, 0x48406e08, 0x47ed5be6, 
+  0x479a1d67, 0x4746b2bc, 
+  0x46f31c1a, 0x469f59b4, 0x464b6bbe, 0x45f7526b, 0x45a30df0, 0x454e9e80, 
+  0x44fa0450, 0x44a53f93, 
+  0x4450507e, 0x43fb3746, 0x43a5f41e, 0x4350873c, 0x42faf0d4, 0x42a5311b, 
+  0x424f4845, 0x41f93689, 
+  0x41a2fc1a, 0x414c992f, 0x40f60dfb, 0x409f5ab6, 0x40487f94, 0x3ff17cca, 
+  0x3f9a5290, 0x3f430119, 
+  0x3eeb889c, 0x3e93e950, 0x3e3c2369, 0x3de4371f, 0x3d8c24a8, 0x3d33ec39, 
+  0x3cdb8e09, 0x3c830a50, 
+  0x3c2a6142, 0x3bd19318, 0x3b78a007, 0x3b1f8848, 0x3ac64c0f, 0x3a6ceb96, 
+  0x3a136712, 0x39b9bebc, 
+  0x395ff2c9, 0x39060373, 0x38abf0ef, 0x3851bb77, 0x37f76341, 0x379ce885, 
+  0x37424b7b, 0x36e78c5b, 
+  0x368cab5c, 0x3631a8b8, 0x35d684a6, 0x357b3f5d, 0x351fd918, 0x34c4520d, 
+  0x3468aa76, 0x340ce28b, 
+  0x33b0fa84, 0x3354f29b, 0x32f8cb07, 0x329c8402, 0x32401dc6, 0x31e39889, 
+  0x3186f487, 0x312a31f8, 
+  0x30cd5115, 0x30705217, 0x30133539, 0x2fb5fab2, 0x2f58a2be, 0x2efb2d95, 
+  0x2e9d9b70, 0x2e3fec8b, 
+  0x2de2211e, 0x2d843964, 0x2d263596, 0x2cc815ee, 0x2c69daa6, 0x2c0b83fa, 
+  0x2bad1221, 0x2b4e8558, 
+  0x2aefddd8, 0x2a911bdc, 0x2a323f9e, 0x29d34958, 0x29743946, 0x29150fa1, 
+  0x28b5cca5, 0x2856708d, 
+  0x27f6fb92, 0x27976df1, 0x2737c7e3, 0x26d809a5, 0x26783370, 0x26184581, 
+  0x25b84012, 0x2558235f, 
+  0x24f7efa2, 0x2497a517, 0x243743fa, 0x23d6cc87, 0x23763ef7, 0x23159b88, 
+  0x22b4e274, 0x225413f8, 
+  0x21f3304f, 0x219237b5, 0x21312a65, 0x20d0089c, 0x206ed295, 0x200d888d, 
+  0x1fac2abf, 0x1f4ab968, 
+  0x1ee934c3, 0x1e879d0d, 0x1e25f282, 0x1dc4355e, 0x1d6265dd, 0x1d00843d, 
+  0x1c9e90b8, 0x1c3c8b8c, 
+  0x1bda74f6, 0x1b784d30, 0x1b161479, 0x1ab3cb0d, 0x1a517128, 0x19ef0707, 
+  0x198c8ce7, 0x192a0304, 
+  0x18c7699b, 0x1864c0ea, 0x1802092c, 0x179f429f, 0x173c6d80, 0x16d98a0c, 
+  0x1676987f, 0x16139918, 
+  0x15b08c12, 0x154d71aa, 0x14ea4a1f, 0x148715ae, 0x1423d492, 0x13c0870a, 
+  0x135d2d53, 0x12f9c7aa, 
+  0x1296564d, 0x1232d979, 0x11cf516a, 0x116bbe60, 0x11082096, 0x10a4784b, 
+  0x1040c5bb, 0xfdd0926, 
+  0xf7942c7, 0xf1572dc, 0xeb199a4, 0xe4db75b, 0xde9cc40, 0xd85d88f, 0xd21dc87, 
+  0xcbdd865, 
+  0xc59cc68, 0xbf5b8cb, 0xb919dcf, 0xb2d7baf, 0xac952aa, 0xa6522fe, 0xa00ece8, 
+  0x99cb0a7, 
+  0x9386e78, 0x8d42699, 0x86fd947, 0x80b86c2, 0x7a72f45, 0x742d311, 0x6de7262, 
+  0x67a0d76, 
+  0x615a48b, 0x5b137df, 0x54cc7b1, 0x4e8543e, 0x483ddc3, 0x41f6480, 0x3bae8b2, 
+  0x3566a96, 
+  0x2f1ea6c, 0x28d6870, 0x228e4e2, 0x1c45ffe, 0x15fda03, 0xfb5330, 0x96cbc1, 
+  0x3243f5, 
+}; 
+ 
+static const q31_t cos_factorsQ31_2048[2048] = { 
+  0x7fffff62, 0x7ffffa73, 0x7ffff094, 0x7fffe1c6, 0x7fffce09, 0x7fffb55c, 
+  0x7fff97c1, 0x7fff7536, 
+  0x7fff4dbb, 0x7fff2151, 0x7ffeeff8, 0x7ffeb9b0, 0x7ffe7e79, 0x7ffe3e52, 
+  0x7ffdf93c, 0x7ffdaf37, 
+  0x7ffd6042, 0x7ffd0c5f, 0x7ffcb38c, 0x7ffc55ca, 0x7ffbf319, 0x7ffb8b78, 
+  0x7ffb1ee9, 0x7ffaad6a, 
+  0x7ffa36fc, 0x7ff9bba0, 0x7ff93b54, 0x7ff8b619, 0x7ff82bef, 0x7ff79cd6, 
+  0x7ff708ce, 0x7ff66fd7, 
+  0x7ff5d1f1, 0x7ff52f1d, 0x7ff48759, 0x7ff3daa6, 0x7ff32905, 0x7ff27275, 
+  0x7ff1b6f6, 0x7ff0f688, 
+  0x7ff0312c, 0x7fef66e1, 0x7fee97a7, 0x7fedc37e, 0x7fecea67, 0x7fec0c62, 
+  0x7feb296d, 0x7fea418b, 
+  0x7fe954ba, 0x7fe862fa, 0x7fe76c4c, 0x7fe670b0, 0x7fe57025, 0x7fe46aac, 
+  0x7fe36045, 0x7fe250ef, 
+  0x7fe13cac, 0x7fe0237a, 0x7fdf055a, 0x7fdde24d, 0x7fdcba51, 0x7fdb8d67, 
+  0x7fda5b8f, 0x7fd924ca, 
+  0x7fd7e917, 0x7fd6a875, 0x7fd562e7, 0x7fd4186a, 0x7fd2c900, 0x7fd174a8, 
+  0x7fd01b63, 0x7fcebd31, 
+  0x7fcd5a11, 0x7fcbf203, 0x7fca8508, 0x7fc91320, 0x7fc79c4b, 0x7fc62089, 
+  0x7fc49fda, 0x7fc31a3d, 
+  0x7fc18fb4, 0x7fc0003e, 0x7fbe6bdb, 0x7fbcd28b, 0x7fbb344e, 0x7fb99125, 
+  0x7fb7e90f, 0x7fb63c0d, 
+  0x7fb48a1e, 0x7fb2d343, 0x7fb1177b, 0x7faf56c7, 0x7fad9127, 0x7fabc69b, 
+  0x7fa9f723, 0x7fa822bf, 
+  0x7fa6496e, 0x7fa46b32, 0x7fa2880b, 0x7fa09ff7, 0x7f9eb2f8, 0x7f9cc10d, 
+  0x7f9aca37, 0x7f98ce76, 
+  0x7f96cdc9, 0x7f94c831, 0x7f92bdad, 0x7f90ae3f, 0x7f8e99e6, 0x7f8c80a1, 
+  0x7f8a6272, 0x7f883f58, 
+  0x7f861753, 0x7f83ea64, 0x7f81b88a, 0x7f7f81c6, 0x7f7d4617, 0x7f7b057e, 
+  0x7f78bffb, 0x7f76758e, 
+  0x7f742637, 0x7f71d1f6, 0x7f6f78cb, 0x7f6d1ab6, 0x7f6ab7b8, 0x7f684fd0, 
+  0x7f65e2ff, 0x7f637144, 
+  0x7f60faa0, 0x7f5e7f13, 0x7f5bfe9d, 0x7f59793e, 0x7f56eef5, 0x7f545fc5, 
+  0x7f51cbab, 0x7f4f32a9, 
+  0x7f4c94be, 0x7f49f1eb, 0x7f474a30, 0x7f449d8c, 0x7f41ec01, 0x7f3f358d, 
+  0x7f3c7a31, 0x7f39b9ee, 
+  0x7f36f4c3, 0x7f342ab1, 0x7f315bb7, 0x7f2e87d6, 0x7f2baf0d, 0x7f28d15d, 
+  0x7f25eec7, 0x7f230749, 
+  0x7f201ae5, 0x7f1d299a, 0x7f1a3368, 0x7f173850, 0x7f143852, 0x7f11336d, 
+  0x7f0e29a3, 0x7f0b1af2, 
+  0x7f08075c, 0x7f04eedf, 0x7f01d17d, 0x7efeaf36, 0x7efb8809, 0x7ef85bf7, 
+  0x7ef52b00, 0x7ef1f524, 
+  0x7eeeba62, 0x7eeb7abc, 0x7ee83632, 0x7ee4ecc3, 0x7ee19e6f, 0x7ede4b38, 
+  0x7edaf31c, 0x7ed7961c, 
+  0x7ed43438, 0x7ed0cd70, 0x7ecd61c5, 0x7ec9f137, 0x7ec67bc5, 0x7ec3016f, 
+  0x7ebf8237, 0x7ebbfe1c, 
+  0x7eb8751e, 0x7eb4e73d, 0x7eb1547a, 0x7eadbcd4, 0x7eaa204c, 0x7ea67ee2, 
+  0x7ea2d896, 0x7e9f2d68, 
+  0x7e9b7d58, 0x7e97c867, 0x7e940e94, 0x7e904fe0, 0x7e8c8c4b, 0x7e88c3d5, 
+  0x7e84f67e, 0x7e812447, 
+  0x7e7d4d2f, 0x7e797136, 0x7e75905d, 0x7e71aaa4, 0x7e6dc00c, 0x7e69d093, 
+  0x7e65dc3b, 0x7e61e303, 
+  0x7e5de4ec, 0x7e59e1f5, 0x7e55da20, 0x7e51cd6c, 0x7e4dbbd9, 0x7e49a567, 
+  0x7e458a17, 0x7e4169e9, 
+  0x7e3d44dd, 0x7e391af3, 0x7e34ec2b, 0x7e30b885, 0x7e2c8002, 0x7e2842a2, 
+  0x7e240064, 0x7e1fb94a, 
+  0x7e1b6d53, 0x7e171c7f, 0x7e12c6ce, 0x7e0e6c42, 0x7e0a0cd9, 0x7e05a894, 
+  0x7e013f74, 0x7dfcd178, 
+  0x7df85ea0, 0x7df3e6ee, 0x7def6a60, 0x7deae8f7, 0x7de662b3, 0x7de1d795, 
+  0x7ddd479d, 0x7dd8b2ca, 
+  0x7dd4191d, 0x7dcf7a96, 0x7dcad736, 0x7dc62efc, 0x7dc181e8, 0x7dbccffc, 
+  0x7db81936, 0x7db35d98, 
+  0x7dae9d21, 0x7da9d7d2, 0x7da50dab, 0x7da03eab, 0x7d9b6ad3, 0x7d969224, 
+  0x7d91b49e, 0x7d8cd240, 
+  0x7d87eb0a, 0x7d82fefe, 0x7d7e0e1c, 0x7d791862, 0x7d741dd2, 0x7d6f1e6c, 
+  0x7d6a1a31, 0x7d65111f, 
+  0x7d600338, 0x7d5af07b, 0x7d55d8e9, 0x7d50bc82, 0x7d4b9b46, 0x7d467536, 
+  0x7d414a51, 0x7d3c1a98, 
+  0x7d36e60b, 0x7d31acaa, 0x7d2c6e76, 0x7d272b6e, 0x7d21e393, 0x7d1c96e5, 
+  0x7d174564, 0x7d11ef11, 
+  0x7d0c93eb, 0x7d0733f3, 0x7d01cf29, 0x7cfc658d, 0x7cf6f720, 0x7cf183e1, 
+  0x7cec0bd1, 0x7ce68ef0, 
+  0x7ce10d3f, 0x7cdb86bd, 0x7cd5fb6a, 0x7cd06b48, 0x7ccad656, 0x7cc53c94, 
+  0x7cbf9e03, 0x7cb9faa2, 
+  0x7cb45272, 0x7caea574, 0x7ca8f3a7, 0x7ca33d0c, 0x7c9d81a3, 0x7c97c16b, 
+  0x7c91fc66, 0x7c8c3294, 
+  0x7c8663f4, 0x7c809088, 0x7c7ab84e, 0x7c74db48, 0x7c6ef976, 0x7c6912d7, 
+  0x7c63276d, 0x7c5d3737, 
+  0x7c574236, 0x7c514869, 0x7c4b49d2, 0x7c45466f, 0x7c3f3e42, 0x7c39314b, 
+  0x7c331f8a, 0x7c2d08ff, 
+  0x7c26edab, 0x7c20cd8d, 0x7c1aa8a6, 0x7c147ef6, 0x7c0e507e, 0x7c081d3d, 
+  0x7c01e534, 0x7bfba863, 
+  0x7bf566cb, 0x7bef206b, 0x7be8d544, 0x7be28556, 0x7bdc30a1, 0x7bd5d726, 
+  0x7bcf78e5, 0x7bc915dd, 
+  0x7bc2ae10, 0x7bbc417e, 0x7bb5d026, 0x7baf5a09, 0x7ba8df28, 0x7ba25f82, 
+  0x7b9bdb18, 0x7b9551ea, 
+  0x7b8ec3f8, 0x7b883143, 0x7b8199ca, 0x7b7afd8f, 0x7b745c91, 0x7b6db6d0, 
+  0x7b670c4d, 0x7b605d09, 
+  0x7b59a902, 0x7b52f03a, 0x7b4c32b1, 0x7b457068, 0x7b3ea95d, 0x7b37dd92, 
+  0x7b310d07, 0x7b2a37bc, 
+  0x7b235db2, 0x7b1c7ee8, 0x7b159b5f, 0x7b0eb318, 0x7b07c612, 0x7b00d44d, 
+  0x7af9ddcb, 0x7af2e28b, 
+  0x7aebe28d, 0x7ae4ddd2, 0x7addd45b, 0x7ad6c626, 0x7acfb336, 0x7ac89b89, 
+  0x7ac17f20, 0x7aba5dfc, 
+  0x7ab3381d, 0x7aac0d82, 0x7aa4de2d, 0x7a9daa1d, 0x7a967153, 0x7a8f33d0, 
+  0x7a87f192, 0x7a80aa9c, 
+  0x7a795eec, 0x7a720e84, 0x7a6ab963, 0x7a635f8a, 0x7a5c00f9, 0x7a549db0, 
+  0x7a4d35b0, 0x7a45c8f9, 
+  0x7a3e578b, 0x7a36e166, 0x7a2f668c, 0x7a27e6fb, 0x7a2062b5, 0x7a18d9b9, 
+  0x7a114c09, 0x7a09b9a4, 
+  0x7a02228a, 0x79fa86bc, 0x79f2e63a, 0x79eb4105, 0x79e3971c, 0x79dbe880, 
+  0x79d43532, 0x79cc7d31, 
+  0x79c4c07e, 0x79bcff19, 0x79b53903, 0x79ad6e3c, 0x79a59ec3, 0x799dca9a, 
+  0x7995f1c1, 0x798e1438, 
+  0x798631ff, 0x797e4b16, 0x79765f7f, 0x796e6f39, 0x79667a44, 0x795e80a1, 
+  0x79568250, 0x794e7f52, 
+  0x794677a6, 0x793e6b4e, 0x79365a49, 0x792e4497, 0x79262a3a, 0x791e0b31, 
+  0x7915e77c, 0x790dbf1d, 
+  0x79059212, 0x78fd605d, 0x78f529fe, 0x78eceef6, 0x78e4af44, 0x78dc6ae8, 
+  0x78d421e4, 0x78cbd437, 
+  0x78c381e2, 0x78bb2ae5, 0x78b2cf41, 0x78aa6ef5, 0x78a20a03, 0x7899a06a, 
+  0x7891322a, 0x7888bf45, 
+  0x788047ba, 0x7877cb89, 0x786f4ab4, 0x7866c53a, 0x785e3b1c, 0x7855ac5a, 
+  0x784d18f4, 0x784480ea, 
+  0x783be43e, 0x783342ef, 0x782a9cfe, 0x7821f26b, 0x78194336, 0x78108f60, 
+  0x7807d6e9, 0x77ff19d1, 
+  0x77f65819, 0x77ed91c0, 0x77e4c6c9, 0x77dbf732, 0x77d322fc, 0x77ca4a27, 
+  0x77c16cb4, 0x77b88aa3, 
+  0x77afa3f5, 0x77a6b8a9, 0x779dc8c0, 0x7794d43b, 0x778bdb19, 0x7782dd5c, 
+  0x7779db03, 0x7770d40f, 
+  0x7767c880, 0x775eb857, 0x7755a394, 0x774c8a36, 0x77436c40, 0x773a49b0, 
+  0x77312287, 0x7727f6c6, 
+  0x771ec66e, 0x7715917d, 0x770c57f5, 0x770319d6, 0x76f9d721, 0x76f08fd5, 
+  0x76e743f4, 0x76ddf37c, 
+  0x76d49e70, 0x76cb44cf, 0x76c1e699, 0x76b883d0, 0x76af1c72, 0x76a5b082, 
+  0x769c3ffe, 0x7692cae8, 
+  0x7689513f, 0x767fd304, 0x76765038, 0x766cc8db, 0x76633ced, 0x7659ac6f, 
+  0x76501760, 0x76467dc2, 
+  0x763cdf94, 0x76333cd8, 0x7629958c, 0x761fe9b3, 0x7616394c, 0x760c8457, 
+  0x7602cad5, 0x75f90cc7, 
+  0x75ef4a2c, 0x75e58305, 0x75dbb753, 0x75d1e715, 0x75c8124d, 0x75be38fa, 
+  0x75b45b1d, 0x75aa78b6, 
+  0x75a091c6, 0x7596a64d, 0x758cb64c, 0x7582c1c2, 0x7578c8b0, 0x756ecb18, 
+  0x7564c8f8, 0x755ac251, 
+  0x7550b725, 0x7546a772, 0x753c933a, 0x75327a7d, 0x75285d3b, 0x751e3b75, 
+  0x7514152b, 0x7509ea5d, 
+  0x74ffbb0d, 0x74f58739, 0x74eb4ee3, 0x74e1120c, 0x74d6d0b2, 0x74cc8ad8, 
+  0x74c2407d, 0x74b7f1a1, 
+  0x74ad9e46, 0x74a3466b, 0x7498ea11, 0x748e8938, 0x748423e0, 0x7479ba0b, 
+  0x746f4bb8, 0x7464d8e8, 
+  0x745a619b, 0x744fe5d2, 0x7445658d, 0x743ae0cc, 0x74305790, 0x7425c9da, 
+  0x741b37a9, 0x7410a0fe, 
+  0x740605d9, 0x73fb663c, 0x73f0c226, 0x73e61997, 0x73db6c91, 0x73d0bb13, 
+  0x73c6051f, 0x73bb4ab3, 
+  0x73b08bd1, 0x73a5c87a, 0x739b00ad, 0x7390346b, 0x738563b5, 0x737a8e8a, 
+  0x736fb4ec, 0x7364d6da, 
+  0x7359f456, 0x734f0d5f, 0x734421f6, 0x7339321b, 0x732e3dcf, 0x73234512, 
+  0x731847e5, 0x730d4648, 
+  0x7302403c, 0x72f735c0, 0x72ec26d6, 0x72e1137d, 0x72d5fbb7, 0x72cadf83, 
+  0x72bfbee3, 0x72b499d6, 
+  0x72a9705c, 0x729e4277, 0x72931027, 0x7287d96c, 0x727c9e47, 0x72715eb8, 
+  0x72661abf, 0x725ad25d, 
+  0x724f8593, 0x72443460, 0x7238dec5, 0x722d84c4, 0x7222265b, 0x7216c38c, 
+  0x720b5c57, 0x71fff0bc, 
+  0x71f480bc, 0x71e90c57, 0x71dd938f, 0x71d21662, 0x71c694d2, 0x71bb0edf, 
+  0x71af848a, 0x71a3f5d2, 
+  0x719862b9, 0x718ccb3f, 0x71812f65, 0x71758f29, 0x7169ea8f, 0x715e4194, 
+  0x7152943b, 0x7146e284, 
+  0x713b2c6e, 0x712f71fb, 0x7123b32b, 0x7117effe, 0x710c2875, 0x71005c90, 
+  0x70f48c50, 0x70e8b7b5, 
+  0x70dcdec0, 0x70d10171, 0x70c51fc8, 0x70b939c7, 0x70ad4f6d, 0x70a160ba, 
+  0x70956db1, 0x70897650, 
+  0x707d7a98, 0x70717a8a, 0x70657626, 0x70596d6d, 0x704d6060, 0x70414efd, 
+  0x70353947, 0x70291f3e, 
+  0x701d00e1, 0x7010de32, 0x7004b731, 0x6ff88bde, 0x6fec5c3b, 0x6fe02846, 
+  0x6fd3f001, 0x6fc7b36d, 
+  0x6fbb728a, 0x6faf2d57, 0x6fa2e3d7, 0x6f969608, 0x6f8a43ed, 0x6f7ded84, 
+  0x6f7192cf, 0x6f6533ce, 
+  0x6f58d082, 0x6f4c68eb, 0x6f3ffd09, 0x6f338cde, 0x6f271868, 0x6f1a9faa, 
+  0x6f0e22a3, 0x6f01a155, 
+  0x6ef51bbe, 0x6ee891e1, 0x6edc03bc, 0x6ecf7152, 0x6ec2daa2, 0x6eb63fad, 
+  0x6ea9a073, 0x6e9cfcf5, 
+  0x6e905534, 0x6e83a92f, 0x6e76f8e7, 0x6e6a445d, 0x6e5d8b91, 0x6e50ce84, 
+  0x6e440d37, 0x6e3747a9, 
+  0x6e2a7ddb, 0x6e1dafce, 0x6e10dd82, 0x6e0406f8, 0x6df72c30, 0x6dea4d2b, 
+  0x6ddd69e9, 0x6dd0826a, 
+  0x6dc396b0, 0x6db6a6ba, 0x6da9b28a, 0x6d9cba1f, 0x6d8fbd7a, 0x6d82bc9d, 
+  0x6d75b786, 0x6d68ae37, 
+  0x6d5ba0b0, 0x6d4e8ef2, 0x6d4178fd, 0x6d345ed1, 0x6d274070, 0x6d1a1dda, 
+  0x6d0cf70f, 0x6cffcc0f, 
+  0x6cf29cdc, 0x6ce56975, 0x6cd831dc, 0x6ccaf610, 0x6cbdb613, 0x6cb071e4, 
+  0x6ca32985, 0x6c95dcf6, 
+  0x6c888c36, 0x6c7b3748, 0x6c6dde2b, 0x6c6080e0, 0x6c531f67, 0x6c45b9c1, 
+  0x6c384fef, 0x6c2ae1f0, 
+  0x6c1d6fc6, 0x6c0ff971, 0x6c027ef1, 0x6bf50047, 0x6be77d74, 0x6bd9f677, 
+  0x6bcc6b53, 0x6bbedc06, 
+  0x6bb14892, 0x6ba3b0f7, 0x6b961536, 0x6b88754f, 0x6b7ad142, 0x6b6d2911, 
+  0x6b5f7cbc, 0x6b51cc42, 
+  0x6b4417a6, 0x6b365ee7, 0x6b28a206, 0x6b1ae103, 0x6b0d1bdf, 0x6aff529a, 
+  0x6af18536, 0x6ae3b3b2, 
+  0x6ad5de0f, 0x6ac8044e, 0x6aba266e, 0x6aac4472, 0x6a9e5e58, 0x6a907423, 
+  0x6a8285d1, 0x6a749365, 
+  0x6a669cdd, 0x6a58a23c, 0x6a4aa381, 0x6a3ca0ad, 0x6a2e99c0, 0x6a208ebb, 
+  0x6a127f9f, 0x6a046c6c, 
+  0x69f65523, 0x69e839c4, 0x69da1a50, 0x69cbf6c7, 0x69bdcf29, 0x69afa378, 
+  0x69a173b5, 0x69933fde, 
+  0x698507f6, 0x6976cbfc, 0x69688bf1, 0x695a47d6, 0x694bffab, 0x693db371, 
+  0x692f6328, 0x69210ed1, 
+  0x6912b66c, 0x690459fb, 0x68f5f97d, 0x68e794f3, 0x68d92c5d, 0x68cabfbd, 
+  0x68bc4f13, 0x68adda5f, 
+  0x689f61a1, 0x6890e4dc, 0x6882640e, 0x6873df38, 0x6865565c, 0x6856c979, 
+  0x68483891, 0x6839a3a4, 
+  0x682b0ab1, 0x681c6dbb, 0x680dccc1, 0x67ff27c4, 0x67f07ec5, 0x67e1d1c4, 
+  0x67d320c1, 0x67c46bbe, 
+  0x67b5b2bb, 0x67a6f5b8, 0x679834b6, 0x67896fb6, 0x677aa6b8, 0x676bd9bd, 
+  0x675d08c4, 0x674e33d0, 
+  0x673f5ae0, 0x67307df5, 0x67219d10, 0x6712b831, 0x6703cf58, 0x66f4e287, 
+  0x66e5f1be, 0x66d6fcfd, 
+  0x66c80445, 0x66b90797, 0x66aa06f3, 0x669b0259, 0x668bf9cb, 0x667ced49, 
+  0x666ddcd3, 0x665ec86b, 
+  0x664fb010, 0x664093c3, 0x66317385, 0x66224f56, 0x66132738, 0x6603fb2a, 
+  0x65f4cb2d, 0x65e59742, 
+  0x65d65f69, 0x65c723a3, 0x65b7e3f1, 0x65a8a052, 0x659958c9, 0x658a0d54, 
+  0x657abdf6, 0x656b6aae, 
+  0x655c137d, 0x654cb863, 0x653d5962, 0x652df679, 0x651e8faa, 0x650f24f5, 
+  0x64ffb65b, 0x64f043dc, 
+  0x64e0cd78, 0x64d15331, 0x64c1d507, 0x64b252fa, 0x64a2cd0c, 0x6493433c, 
+  0x6483b58c, 0x647423fb, 
+  0x64648e8c, 0x6454f53d, 0x64455810, 0x6435b706, 0x6426121e, 0x6416695a, 
+  0x6406bcba, 0x63f70c3f, 
+  0x63e757ea, 0x63d79fba, 0x63c7e3b1, 0x63b823cf, 0x63a86015, 0x63989884, 
+  0x6388cd1b, 0x6378fddc, 
+  0x63692ac7, 0x635953dd, 0x6349791f, 0x63399a8d, 0x6329b827, 0x6319d1ef, 
+  0x6309e7e4, 0x62f9fa09, 
+  0x62ea085c, 0x62da12df, 0x62ca1992, 0x62ba1c77, 0x62aa1b8d, 0x629a16d5, 
+  0x628a0e50, 0x627a01fe, 
+  0x6269f1e1, 0x6259ddf8, 0x6249c645, 0x6239aac7, 0x62298b81, 0x62196871, 
+  0x62094199, 0x61f916f9, 
+  0x61e8e893, 0x61d8b666, 0x61c88074, 0x61b846bc, 0x61a80940, 0x6197c800, 
+  0x618782fd, 0x61773a37, 
+  0x6166edb0, 0x61569d67, 0x6146495d, 0x6135f193, 0x6125960a, 0x611536c2, 
+  0x6104d3bc, 0x60f46cf9, 
+  0x60e40278, 0x60d3943b, 0x60c32243, 0x60b2ac8f, 0x60a23322, 0x6091b5fa, 
+  0x60813519, 0x6070b080, 
+  0x6060282f, 0x604f9c27, 0x603f0c69, 0x602e78f4, 0x601de1ca, 0x600d46ec, 
+  0x5ffca859, 0x5fec0613, 
+  0x5fdb601b, 0x5fcab670, 0x5fba0914, 0x5fa95807, 0x5f98a34a, 0x5f87eade, 
+  0x5f772ec2, 0x5f666ef9, 
+  0x5f55ab82, 0x5f44e45e, 0x5f34198e, 0x5f234b12, 0x5f1278eb, 0x5f01a31a, 
+  0x5ef0c99f, 0x5edfec7b, 
+  0x5ecf0baf, 0x5ebe273b, 0x5ead3f1f, 0x5e9c535e, 0x5e8b63f7, 0x5e7a70ea, 
+  0x5e697a39, 0x5e587fe5, 
+  0x5e4781ed, 0x5e368053, 0x5e257b17, 0x5e147239, 0x5e0365bb, 0x5df2559e, 
+  0x5de141e1, 0x5dd02a85, 
+  0x5dbf0f8c, 0x5dadf0f5, 0x5d9ccec2, 0x5d8ba8f3, 0x5d7a7f88, 0x5d695283, 
+  0x5d5821e4, 0x5d46edac, 
+  0x5d35b5db, 0x5d247a72, 0x5d133b72, 0x5d01f8dc, 0x5cf0b2af, 0x5cdf68ed, 
+  0x5cce1b97, 0x5cbccaac, 
+  0x5cab762f, 0x5c9a1e1e, 0x5c88c27c, 0x5c776348, 0x5c660084, 0x5c549a30, 
+  0x5c43304d, 0x5c31c2db, 
+  0x5c2051db, 0x5c0edd4e, 0x5bfd6534, 0x5bebe98e, 0x5bda6a5d, 0x5bc8e7a2, 
+  0x5bb7615d, 0x5ba5d78e, 
+  0x5b944a37, 0x5b82b958, 0x5b7124f2, 0x5b5f8d06, 0x5b4df193, 0x5b3c529c, 
+  0x5b2ab020, 0x5b190a20, 
+  0x5b07609d, 0x5af5b398, 0x5ae40311, 0x5ad24f09, 0x5ac09781, 0x5aaedc78, 
+  0x5a9d1df1, 0x5a8b5bec, 
+  0x5a799669, 0x5a67cd69, 0x5a5600ec, 0x5a4430f5, 0x5a325d82, 0x5a208695, 
+  0x5a0eac2e, 0x59fcce4f, 
+  0x59eaecf8, 0x59d90829, 0x59c71fe3, 0x59b53427, 0x59a344f6, 0x59915250, 
+  0x597f5c36, 0x596d62a9, 
+  0x595b65aa, 0x59496538, 0x59376155, 0x59255a02, 0x59134f3e, 0x5901410c, 
+  0x58ef2f6b, 0x58dd1a5d, 
+  0x58cb01e1, 0x58b8e5f9, 0x58a6c6a5, 0x5894a3e7, 0x58827dbe, 0x5870542c, 
+  0x585e2730, 0x584bf6cd, 
+  0x5839c302, 0x58278bd1, 0x58155139, 0x5803133c, 0x57f0d1da, 0x57de8d15, 
+  0x57cc44ec, 0x57b9f960, 
+  0x57a7aa73, 0x57955825, 0x57830276, 0x5770a968, 0x575e4cfa, 0x574bed2f, 
+  0x57398a05, 0x5727237f, 
+  0x5714b99d, 0x57024c5f, 0x56efdbc7, 0x56dd67d4, 0x56caf088, 0x56b875e4, 
+  0x56a5f7e7, 0x56937694, 
+  0x5680f1ea, 0x566e69ea, 0x565bde95, 0x56494fec, 0x5636bdef, 0x5624289f, 
+  0x56118ffe, 0x55fef40a, 
+  0x55ec54c6, 0x55d9b232, 0x55c70c4f, 0x55b4631d, 0x55a1b69d, 0x558f06d0, 
+  0x557c53b6, 0x55699d51, 
+  0x5556e3a1, 0x554426a7, 0x55316663, 0x551ea2d6, 0x550bdc01, 0x54f911e5, 
+  0x54e64482, 0x54d373d9, 
+  0x54c09feb, 0x54adc8b8, 0x549aee42, 0x54881089, 0x54752f8d, 0x54624b50, 
+  0x544f63d2, 0x543c7914, 
+  0x54298b17, 0x541699db, 0x5403a561, 0x53f0adaa, 0x53ddb2b6, 0x53cab486, 
+  0x53b7b31c, 0x53a4ae77, 
+  0x5391a699, 0x537e9b82, 0x536b8d33, 0x53587bad, 0x534566f0, 0x53324efd, 
+  0x531f33d5, 0x530c1579, 
+  0x52f8f3e9, 0x52e5cf27, 0x52d2a732, 0x52bf7c0b, 0x52ac4db4, 0x52991c2d, 
+  0x5285e777, 0x5272af92, 
+  0x525f7480, 0x524c3640, 0x5238f4d4, 0x5225b03d, 0x5212687b, 0x51ff1d8f, 
+  0x51ebcf7a, 0x51d87e3c, 
+  0x51c529d7, 0x51b1d24a, 0x519e7797, 0x518b19bf, 0x5177b8c2, 0x516454a0, 
+  0x5150ed5c, 0x513d82f4, 
+  0x512a156b, 0x5116a4c1, 0x510330f7, 0x50efba0d, 0x50dc4005, 0x50c8c2de, 
+  0x50b5429a, 0x50a1bf39, 
+  0x508e38bd, 0x507aaf25, 0x50672273, 0x505392a8, 0x503fffc4, 0x502c69c8, 
+  0x5018d0b4, 0x5005348a, 
+  0x4ff1954b, 0x4fddf2f6, 0x4fca4d8d, 0x4fb6a510, 0x4fa2f981, 0x4f8f4ae0, 
+  0x4f7b992d, 0x4f67e46a, 
+  0x4f542c98, 0x4f4071b6, 0x4f2cb3c7, 0x4f18f2c9, 0x4f052ec0, 0x4ef167aa, 
+  0x4edd9d89, 0x4ec9d05e, 
+  0x4eb60029, 0x4ea22ceb, 0x4e8e56a5, 0x4e7a7d58, 0x4e66a105, 0x4e52c1ab, 
+  0x4e3edf4d, 0x4e2af9ea, 
+  0x4e171184, 0x4e03261b, 0x4def37b0, 0x4ddb4644, 0x4dc751d8, 0x4db35a6c, 
+  0x4d9f6001, 0x4d8b6298, 
+  0x4d776231, 0x4d635ece, 0x4d4f5870, 0x4d3b4f16, 0x4d2742c2, 0x4d133374, 
+  0x4cff212e, 0x4ceb0bf0, 
+  0x4cd6f3bb, 0x4cc2d88f, 0x4caeba6e, 0x4c9a9958, 0x4c86754e, 0x4c724e50, 
+  0x4c5e2460, 0x4c49f77f, 
+  0x4c35c7ac, 0x4c2194e9, 0x4c0d5f37, 0x4bf92697, 0x4be4eb08, 0x4bd0ac8d, 
+  0x4bbc6b25, 0x4ba826d1, 
+  0x4b93df93, 0x4b7f956b, 0x4b6b485a, 0x4b56f861, 0x4b42a580, 0x4b2e4fb8, 
+  0x4b19f70a, 0x4b059b77, 
+  0x4af13d00, 0x4adcdba5, 0x4ac87767, 0x4ab41046, 0x4a9fa645, 0x4a8b3963, 
+  0x4a76c9a2, 0x4a625701, 
+  0x4a4de182, 0x4a396926, 0x4a24edee, 0x4a106fda, 0x49fbeeea, 0x49e76b21, 
+  0x49d2e47e, 0x49be5b02, 
+  0x49a9ceaf, 0x49953f84, 0x4980ad84, 0x496c18ae, 0x49578103, 0x4942e684, 
+  0x492e4933, 0x4919a90f, 
+  0x4905061a, 0x48f06054, 0x48dbb7be, 0x48c70c59, 0x48b25e25, 0x489dad25, 
+  0x4888f957, 0x487442be, 
+  0x485f8959, 0x484acd2a, 0x48360e32, 0x48214c71, 0x480c87e8, 0x47f7c099, 
+  0x47e2f682, 0x47ce29a7, 
+  0x47b95a06, 0x47a487a2, 0x478fb27b, 0x477ada91, 0x4765ffe6, 0x4751227a, 
+  0x473c424e, 0x47275f63, 
+  0x471279ba, 0x46fd9154, 0x46e8a631, 0x46d3b852, 0x46bec7b8, 0x46a9d464, 
+  0x4694de56, 0x467fe590, 
+  0x466aea12, 0x4655ebdd, 0x4640eaf2, 0x462be751, 0x4616e0fc, 0x4601d7f3, 
+  0x45eccc37, 0x45d7bdc9, 
+  0x45c2acaa, 0x45ad98da, 0x4598825a, 0x4583692c, 0x456e4d4f, 0x45592ec6, 
+  0x45440d90, 0x452ee9ae, 
+  0x4519c321, 0x450499eb, 0x44ef6e0b, 0x44da3f83, 0x44c50e53, 0x44afda7d, 
+  0x449aa400, 0x44856adf, 
+  0x44702f19, 0x445af0b0, 0x4445afa4, 0x44306bf6, 0x441b25a8, 0x4405dcb9, 
+  0x43f0912b, 0x43db42fe, 
+  0x43c5f234, 0x43b09ecc, 0x439b48c9, 0x4385f02a, 0x437094f1, 0x435b371f, 
+  0x4345d6b3, 0x433073b0, 
+  0x431b0e15, 0x4305a5e5, 0x42f03b1e, 0x42dacdc3, 0x42c55dd4, 0x42afeb53, 
+  0x429a763f, 0x4284fe99, 
+  0x426f8463, 0x425a079e, 0x42448849, 0x422f0667, 0x421981f7, 0x4203fafb, 
+  0x41ee7174, 0x41d8e561, 
+  0x41c356c5, 0x41adc5a0, 0x419831f3, 0x41829bbe, 0x416d0302, 0x415767c1, 
+  0x4141c9fb, 0x412c29b1, 
+  0x411686e4, 0x4100e194, 0x40eb39c3, 0x40d58f71, 0x40bfe29f, 0x40aa334e, 
+  0x4094817f, 0x407ecd32, 
+  0x40691669, 0x40535d24, 0x403da165, 0x4027e32b, 0x40122278, 0x3ffc5f4d, 
+  0x3fe699aa, 0x3fd0d191, 
+  0x3fbb0702, 0x3fa539fd, 0x3f8f6a85, 0x3f799899, 0x3f63c43b, 0x3f4ded6b, 
+  0x3f38142a, 0x3f22387a, 
+  0x3f0c5a5a, 0x3ef679cc, 0x3ee096d1, 0x3ecab169, 0x3eb4c995, 0x3e9edf57, 
+  0x3e88f2ae, 0x3e73039d, 
+  0x3e5d1222, 0x3e471e41, 0x3e3127f9, 0x3e1b2f4a, 0x3e053437, 0x3def36c0, 
+  0x3dd936e6, 0x3dc334a9, 
+  0x3dad300b, 0x3d97290b, 0x3d811fac, 0x3d6b13ee, 0x3d5505d2, 0x3d3ef559, 
+  0x3d28e282, 0x3d12cd51, 
+  0x3cfcb5c4, 0x3ce69bde, 0x3cd07f9f, 0x3cba6107, 0x3ca44018, 0x3c8e1cd3, 
+  0x3c77f737, 0x3c61cf48, 
+  0x3c4ba504, 0x3c35786d, 0x3c1f4983, 0x3c091849, 0x3bf2e4be, 0x3bdcaee3, 
+  0x3bc676b9, 0x3bb03c42, 
+  0x3b99ff7d, 0x3b83c06c, 0x3b6d7f10, 0x3b573b69, 0x3b40f579, 0x3b2aad3f, 
+  0x3b1462be, 0x3afe15f6, 
+  0x3ae7c6e7, 0x3ad17593, 0x3abb21fb, 0x3aa4cc1e, 0x3a8e7400, 0x3a78199f, 
+  0x3a61bcfd, 0x3a4b5e1b, 
+  0x3a34fcf9, 0x3a1e9999, 0x3a0833fc, 0x39f1cc21, 0x39db620b, 0x39c4f5ba, 
+  0x39ae872f, 0x3998166a, 
+  0x3981a36d, 0x396b2e38, 0x3954b6cd, 0x393e3d2c, 0x3927c155, 0x3911434b, 
+  0x38fac30e, 0x38e4409e, 
+  0x38cdbbfc, 0x38b7352a, 0x38a0ac29, 0x388a20f8, 0x38739399, 0x385d040d, 
+  0x38467255, 0x382fde72, 
+  0x38194864, 0x3802b02c, 0x37ec15cb, 0x37d57943, 0x37beda93, 0x37a839be, 
+  0x379196c3, 0x377af1a3, 
+  0x37644a60, 0x374da0fa, 0x3736f573, 0x372047ca, 0x37099802, 0x36f2e61a, 
+  0x36dc3214, 0x36c57bf0, 
+  0x36aec3b0, 0x36980954, 0x36814cde, 0x366a8e4d, 0x3653cda3, 0x363d0ae2, 
+  0x36264609, 0x360f7f19, 
+  0x35f8b614, 0x35e1eafa, 0x35cb1dcc, 0x35b44e8c, 0x359d7d39, 0x3586a9d5, 
+  0x356fd461, 0x3558fcde, 
+  0x3542234c, 0x352b47ad, 0x35146a00, 0x34fd8a48, 0x34e6a885, 0x34cfc4b7, 
+  0x34b8dee1, 0x34a1f702, 
+  0x348b0d1c, 0x3474212f, 0x345d333c, 0x34464345, 0x342f5149, 0x34185d4b, 
+  0x3401674a, 0x33ea6f48, 
+  0x33d37546, 0x33bc7944, 0x33a57b44, 0x338e7b46, 0x3377794b, 0x33607554, 
+  0x33496f62, 0x33326776, 
+  0x331b5d91, 0x330451b3, 0x32ed43de, 0x32d63412, 0x32bf2250, 0x32a80e99, 
+  0x3290f8ef, 0x3279e151, 
+  0x3262c7c1, 0x324bac40, 0x32348ecf, 0x321d6f6e, 0x32064e1e, 0x31ef2ae1, 
+  0x31d805b7, 0x31c0dea1, 
+  0x31a9b5a0, 0x31928ab4, 0x317b5de0, 0x31642f23, 0x314cfe7f, 0x3135cbf4, 
+  0x311e9783, 0x3107612e, 
+  0x30f028f4, 0x30d8eed8, 0x30c1b2da, 0x30aa74fa, 0x3093353a, 0x307bf39b, 
+  0x3064b01d, 0x304d6ac1, 
+  0x30362389, 0x301eda75, 0x30078f86, 0x2ff042bd, 0x2fd8f41b, 0x2fc1a3a0, 
+  0x2faa514f, 0x2f92fd26, 
+  0x2f7ba729, 0x2f644f56, 0x2f4cf5b0, 0x2f359a37, 0x2f1e3ced, 0x2f06ddd1, 
+  0x2eef7ce5, 0x2ed81a29, 
+  0x2ec0b5a0, 0x2ea94f49, 0x2e91e725, 0x2e7a7d36, 0x2e63117c, 0x2e4ba3f8, 
+  0x2e3434ac, 0x2e1cc397, 
+  0x2e0550bb, 0x2deddc19, 0x2dd665b2, 0x2dbeed86, 0x2da77397, 0x2d8ff7e5, 
+  0x2d787a72, 0x2d60fb3e, 
+  0x2d497a4a, 0x2d31f797, 0x2d1a7325, 0x2d02ecf7, 0x2ceb650d, 0x2cd3db67, 
+  0x2cbc5006, 0x2ca4c2ed, 
+  0x2c8d341a, 0x2c75a390, 0x2c5e114f, 0x2c467d58, 0x2c2ee7ad, 0x2c17504d, 
+  0x2bffb73a, 0x2be81c74, 
+  0x2bd07ffe, 0x2bb8e1d7, 0x2ba14200, 0x2b89a07b, 0x2b71fd48, 0x2b5a5868, 
+  0x2b42b1dd, 0x2b2b09a6, 
+  0x2b135fc6, 0x2afbb43c, 0x2ae4070a, 0x2acc5831, 0x2ab4a7b1, 0x2a9cf58c, 
+  0x2a8541c3, 0x2a6d8c55, 
+  0x2a55d545, 0x2a3e1c93, 0x2a266240, 0x2a0ea64d, 0x29f6e8bb, 0x29df298b, 
+  0x29c768be, 0x29afa654, 
+  0x2997e24f, 0x29801caf, 0x29685576, 0x29508ca4, 0x2938c23a, 0x2920f63a, 
+  0x290928a3, 0x28f15978, 
+  0x28d988b8, 0x28c1b666, 0x28a9e281, 0x28920d0a, 0x287a3604, 0x28625d6d, 
+  0x284a8349, 0x2832a796, 
+  0x281aca57, 0x2802eb8c, 0x27eb0b36, 0x27d32956, 0x27bb45ed, 0x27a360fc, 
+  0x278b7a84, 0x27739285, 
+  0x275ba901, 0x2743bdf9, 0x272bd16d, 0x2713e35f, 0x26fbf3ce, 0x26e402bd, 
+  0x26cc102d, 0x26b41c1d, 
+  0x269c268f, 0x26842f84, 0x266c36fe, 0x26543cfb, 0x263c417f, 0x26244489, 
+  0x260c461b, 0x25f44635, 
+  0x25dc44d9, 0x25c44207, 0x25ac3dc0, 0x25943806, 0x257c30d8, 0x25642839, 
+  0x254c1e28, 0x253412a8, 
+  0x251c05b8, 0x2503f75a, 0x24ebe78f, 0x24d3d657, 0x24bbc3b4, 0x24a3afa6, 
+  0x248b9a2f, 0x2473834f, 
+  0x245b6b07, 0x24435158, 0x242b3644, 0x241319ca, 0x23fafbec, 0x23e2dcac, 
+  0x23cabc09, 0x23b29a05, 
+  0x239a76a0, 0x238251dd, 0x236a2bba, 0x2352043b, 0x2339db5e, 0x2321b126, 
+  0x23098593, 0x22f158a7, 
+  0x22d92a61, 0x22c0fac4, 0x22a8c9cf, 0x22909785, 0x227863e5, 0x22602ef1, 
+  0x2247f8aa, 0x222fc111, 
+  0x22178826, 0x21ff4dea, 0x21e71260, 0x21ced586, 0x21b6975f, 0x219e57eb, 
+  0x2186172b, 0x216dd521, 
+  0x215591cc, 0x213d4d2f, 0x21250749, 0x210cc01d, 0x20f477aa, 0x20dc2df2, 
+  0x20c3e2f5, 0x20ab96b5, 
+  0x20934933, 0x207afa6f, 0x2062aa6b, 0x204a5927, 0x203206a4, 0x2019b2e4, 
+  0x20015de7, 0x1fe907ae, 
+  0x1fd0b03a, 0x1fb8578b, 0x1f9ffda4, 0x1f87a285, 0x1f6f462f, 0x1f56e8a2, 
+  0x1f3e89e0, 0x1f2629ea, 
+  0x1f0dc8c0, 0x1ef56664, 0x1edd02d6, 0x1ec49e17, 0x1eac3829, 0x1e93d10c, 
+  0x1e7b68c2, 0x1e62ff4a, 
+  0x1e4a94a7, 0x1e3228d9, 0x1e19bbe0, 0x1e014dbf, 0x1de8de75, 0x1dd06e04, 
+  0x1db7fc6d, 0x1d9f89b1, 
+  0x1d8715d0, 0x1d6ea0cc, 0x1d562aa6, 0x1d3db35e, 0x1d253af5, 0x1d0cc16c, 
+  0x1cf446c5, 0x1cdbcb00, 
+  0x1cc34e1f, 0x1caad021, 0x1c925109, 0x1c79d0d6, 0x1c614f8b, 0x1c48cd27, 
+  0x1c3049ac, 0x1c17c51b, 
+  0x1bff3f75, 0x1be6b8ba, 0x1bce30ec, 0x1bb5a80c, 0x1b9d1e1a, 0x1b849317, 
+  0x1b6c0705, 0x1b5379e5, 
+  0x1b3aebb6, 0x1b225c7b, 0x1b09cc34, 0x1af13ae3, 0x1ad8a887, 0x1ac01522, 
+  0x1aa780b6, 0x1a8eeb42, 
+  0x1a7654c8, 0x1a5dbd49, 0x1a4524c6, 0x1a2c8b3f, 0x1a13f0b6, 0x19fb552c, 
+  0x19e2b8a2, 0x19ca1b17, 
+  0x19b17c8f, 0x1998dd09, 0x19803c86, 0x19679b07, 0x194ef88e, 0x1936551b, 
+  0x191db0af, 0x19050b4b, 
+  0x18ec64f0, 0x18d3bda0, 0x18bb155a, 0x18a26c20, 0x1889c1f3, 0x187116d4, 
+  0x18586ac3, 0x183fbdc3, 
+  0x18270fd3, 0x180e60f4, 0x17f5b129, 0x17dd0070, 0x17c44ecd, 0x17ab9c3e, 
+  0x1792e8c6, 0x177a3466, 
+  0x17617f1d, 0x1748c8ee, 0x173011d9, 0x171759df, 0x16fea102, 0x16e5e741, 
+  0x16cd2c9f, 0x16b4711b, 
+  0x169bb4b7, 0x1682f774, 0x166a3953, 0x16517a55, 0x1638ba7a, 0x161ff9c4, 
+  0x16073834, 0x15ee75cb, 
+  0x15d5b288, 0x15bcee6f, 0x15a4297f, 0x158b63b9, 0x15729d1f, 0x1559d5b1, 
+  0x15410d70, 0x1528445d, 
+  0x150f7a7a, 0x14f6afc7, 0x14dde445, 0x14c517f4, 0x14ac4ad7, 0x14937cee, 
+  0x147aae3a, 0x1461debc, 
+  0x14490e74, 0x14303d65, 0x14176b8e, 0x13fe98f1, 0x13e5c58e, 0x13ccf167, 
+  0x13b41c7d, 0x139b46d0, 
+  0x13827062, 0x13699933, 0x1350c144, 0x1337e897, 0x131f0f2c, 0x13063505, 
+  0x12ed5a21, 0x12d47e83, 
+  0x12bba22b, 0x12a2c51b, 0x1289e752, 0x127108d2, 0x1258299c, 0x123f49b2, 
+  0x12266913, 0x120d87c1, 
+  0x11f4a5bd, 0x11dbc307, 0x11c2dfa2, 0x11a9fb8d, 0x119116c9, 0x11783159, 
+  0x115f4b3c, 0x11466473, 
+  0x112d7d00, 0x111494e4, 0x10fbac1e, 0x10e2c2b2, 0x10c9d89e, 0x10b0ede5, 
+  0x10980287, 0x107f1686, 
+  0x106629e1, 0x104d3c9b, 0x10344eb4, 0x101b602d, 0x10027107, 0xfe98143, 
+  0xfd090e1, 0xfb79fe4, 
+  0xf9eae4c, 0xf85bc19, 0xf6cc94e, 0xf53d5ea, 0xf3ae1ee, 0xf21ed5d, 0xf08f836, 
+  0xef0027b, 
+  0xed70c2c, 0xebe154b, 0xea51dd8, 0xe8c25d5, 0xe732d42, 0xe5a3421, 0xe413a72, 
+  0xe284036, 
+  0xe0f456f, 0xdf64a1c, 0xddd4e40, 0xdc451dc, 0xdab54ef, 0xd92577b, 0xd795982, 
+  0xd605b03, 
+  0xd475c00, 0xd2e5c7b, 0xd155c73, 0xcfc5bea, 0xce35ae1, 0xcca5959, 0xcb15752, 
+  0xc9854cf, 
+  0xc7f51cf, 0xc664e53, 0xc4d4a5d, 0xc3445ee, 0xc1b4107, 0xc023ba7, 0xbe935d2, 
+  0xbd02f87, 
+  0xbb728c7, 0xb9e2193, 0xb8519ed, 0xb6c11d5, 0xb53094d, 0xb3a0055, 0xb20f6ee, 
+  0xb07ed19, 
+  0xaeee2d7, 0xad5d829, 0xabccd11, 0xaa3c18e, 0xa8ab5a2, 0xa71a94f, 0xa589c94, 
+  0xa3f8f73, 
+  0xa2681ed, 0xa0d7403, 0x9f465b5, 0x9db5706, 0x9c247f5, 0x9a93884, 0x99028b3, 
+  0x9771884, 
+  0x95e07f8, 0x944f70f, 0x92be5ca, 0x912d42c, 0x8f9c233, 0x8e0afe2, 0x8c79d3a, 
+  0x8ae8a3a, 
+  0x89576e5, 0x87c633c, 0x8634f3e, 0x84a3aee, 0x831264c, 0x8181159, 0x7fefc16, 
+  0x7e5e685, 
+  0x7ccd0a5, 0x7b3ba78, 0x79aa400, 0x7818d3c, 0x768762e, 0x74f5ed7, 0x7364738, 
+  0x71d2f52, 
+  0x7041726, 0x6eafeb4, 0x6d1e5fe, 0x6b8cd05, 0x69fb3c9, 0x6869a4c, 0x66d808f, 
+  0x6546692, 
+  0x63b4c57, 0x62231de, 0x6091729, 0x5effc38, 0x5d6e10c, 0x5bdc5a7, 0x5a4aa09, 
+  0x58b8e34, 
+  0x5727228, 0x55955e6, 0x540396f, 0x5271cc4, 0x50dffe7, 0x4f4e2d8, 0x4dbc597, 
+  0x4c2a827, 
+  0x4a98a88, 0x4906cbb, 0x4774ec1, 0x45e309a, 0x4451249, 0x42bf3cd, 0x412d528, 
+  0x3f9b65b, 
+  0x3e09767, 0x3c7784d, 0x3ae590d, 0x39539a9, 0x37c1a22, 0x362fa78, 0x349daac, 
+  0x330bac1, 
+  0x3179ab5, 0x2fe7a8c, 0x2e55a44, 0x2cc39e1, 0x2b31961, 0x299f8c7, 0x280d813, 
+  0x267b747, 
+  0x24e9662, 0x2357567, 0x21c5457, 0x2033331, 0x1ea11f7, 0x1d0f0ab, 0x1b7cf4d, 
+  0x19eaddd, 
+  0x1858c5e, 0x16c6ad0, 0x1534934, 0x13a278a, 0x12105d5, 0x107e414, 0xeec249, 
+  0xd5a075, 
+  0xbc7e99, 0xa35cb5, 0x8a3acb, 0x7118dc, 0x57f6e9, 0x3ed4f2, 0x25b2f8, 
+  0xc90fe, 
+ 
+}; 
+ 
+/**  
+ * @brief  Initialization function for the Q31 DCT4/IDCT4. 
+ * @param[in,out] *S         points to an instance of Q31 DCT4/IDCT4 structure. 
+ * @param[in]     *S_RFFT    points to an instance of Q31 RFFT/RIFFT structure 
+ * @param[in]     *S_CFFT    points to an instance of Q31 CFFT/CIFFT structure 
+ * @param[in]     N          length of the DCT4. 
+ * @param[in]     Nby2       half of the length of the DCT4. 
+ * @param[in]     normalize  normalizing factor. 
+ * @return		  arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length. 
+ * \par Normalizing factor:  
+ * The normalizing factor is <code>sqrt(2/N)</code>, which depends on the size of transform <code>N</code>.  
+ * Normalizing factors in 1.31 format are mentioned in the table below for different DCT sizes:  
+ * \image html dct4NormalizingQ31Table.gif  
+ */ 
+ 
+arm_status arm_dct4_init_q31( 
+  arm_dct4_instance_q31 * S, 
+  arm_rfft_instance_q31 * S_RFFT, 
+  arm_cfft_radix4_instance_q31 * S_CFFT, 
+  uint16_t N, 
+  uint16_t Nby2, 
+  q31_t normalize) 
+{ 
+  /*  Initialise the default arm status */ 
+  arm_status status = ARM_MATH_SUCCESS; 
+ 
+  /* Initializing the pointer array with the weight table base addresses of different lengths */ 
+  q31_t *twiddlePtr[3] = { (q31_t *) WeightsQ31_128, (q31_t *) WeightsQ31_512, 
+    (q31_t *) WeightsQ31_2048 
+  }; 
+ 
+  /* Initializing the pointer array with the cos factor table base addresses of different lengths */ 
+  q31_t *pCosFactor[3] = 
+    { (q31_t *) cos_factorsQ31_128, (q31_t *) cos_factorsQ31_512, 
+    (q31_t *) cos_factorsQ31_2048 
+  }; 
+ 
+  /* Initialize the DCT4 length */ 
+  S->N = N; 
+ 
+  /* Initialize the half of DCT4 length */ 
+  S->Nby2 = Nby2; 
+ 
+  /* Initialize the DCT4 Normalizing factor */ 
+  S->normalize = normalize; 
+ 
+  /* Initialize Real FFT Instance */ 
+  S->pRfft = S_RFFT; 
+ 
+  /* Initialize Complex FFT Instance */ 
+  S->pCfft = S_CFFT; 
+ 
+  switch (N) 
+  { 
+    /* Initialize the table modifier values */ 
+  case 2048u: 
+    S->pTwiddle = twiddlePtr[2]; 
+    S->pCosFactor = pCosFactor[2]; 
+    break; 
+  case 512u: 
+    S->pTwiddle = twiddlePtr[1]; 
+    S->pCosFactor = pCosFactor[1]; 
+    break; 
+  case 128u: 
+    S->pTwiddle = twiddlePtr[0]; 
+    S->pCosFactor = pCosFactor[0]; 
+    break; 
+  default: 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+  } 
+ 
+  /* Initialize the RFFT/RIFFT Function */ 
+  arm_rfft_init_q31(S->pRfft, S->pCfft, S->N, 0, 1); 
+ 
+  /* return the status of DCT4 Init function */ 
+  return (status); 
+} 
+ 
+/**  
+   * @} end of DCT4_IDCT4 group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_dct4_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,265 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_dct4_q15.c  
+*  
+* Description:	Processing function of DCT4 & IDCT4 Q15.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @addtogroup DCT4_IDCT4  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q15 DCT4/IDCT4. 
+ * @param[in]       *S             points to an instance of the Q15 DCT4 structure. 
+ * @param[in]       *pState        points to state buffer. 
+ * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer. 
+ * @return none. 
+ *   
+ * \par Input an output formats:  
+ * Internally inputs are downscaled in the RFFT process function to avoid overflows.  
+ * Number of bits downscaled, depends on the size of the transform.  
+ * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:   
+ *  
+ * \image html dct4FormatsQ15Table.gif  
+ */ 
+ 
+void arm_dct4_q15( 
+  const arm_dct4_instance_q15 * S, 
+  q15_t * pState, 
+  q15_t * pInlineBuffer) 
+{ 
+  uint32_t i;                                    /* Loop counter */ 
+  q15_t *weights = S->pTwiddle;                  /* Pointer to the Weights table */ 
+  q15_t *cosFact = S->pCosFactor;                /* Pointer to the cos factors table */ 
+  q15_t *pS1, *pS2, *pbuff;                      /* Temporary pointers for input buffer and pState buffer */ 
+  q15_t in;                                      /* Temporary variable */ 
+ 
+ 
+  /* DCT4 computation involves DCT2 (which is calculated using RFFT)  
+   * along with some pre-processing and post-processing.  
+   * Computational procedure is explained as follows:  
+   * (a) Pre-processing involves multiplying input with cos factor,  
+   *     r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))  
+   *              where,  
+   *                 r(n) -- output of preprocessing  
+   *                 u(n) -- input to preprocessing(actual Source buffer)  
+   * (b) Calculation of DCT2 using FFT is divided into three steps:  
+   *                  Step1: Re-ordering of even and odd elements of input.  
+   *                  Step2: Calculating FFT of the re-ordered input.  
+   *                  Step3: Taking the real part of the product of FFT output and weights.  
+   * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:  
+   *                   Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)  
+   *                        where,  
+   *                           Y4 -- DCT4 output,   Y2 -- DCT2 output  
+   * (d) Multiplying the output with the normalizing factor sqrt(2/N).  
+   */ 
+ 
+        /*-------- Pre-processing ------------*/ 
+  /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */ 
+  arm_mult_q15(pInlineBuffer, cosFact, pInlineBuffer, S->N); 
+  arm_shift_q15(pInlineBuffer, 1, pInlineBuffer, S->N); 
+ 
+  /* ----------------------------------------------------------------  
+   * Step1: Re-ordering of even and odd elements as  
+   *             pState[i] =  pInlineBuffer[2*i] and  
+   *             pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2  
+   ---------------------------------------------------------------------*/ 
+ 
+  /* pS1 initialized to pState */ 
+  pS1 = pState; 
+ 
+  /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */ 
+  pS2 = pState + (S->N - 1u); 
+ 
+  /* pbuff initialized to input buffer */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */ 
+  i = (uint32_t) S->Nby2 >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  do 
+  { 
+    /* Re-ordering of even and odd elements */ 
+    /* pState[i] =  pInlineBuffer[2*i] */ 
+    *pS1++ = *pbuff++; 
+    /* pState[N-i-1] = pInlineBuffer[2*i+1] */ 
+    *pS2-- = *pbuff++; 
+ 
+    *pS1++ = *pbuff++; 
+    *pS2-- = *pbuff++; 
+ 
+    *pS1++ = *pbuff++; 
+    *pS2-- = *pbuff++; 
+ 
+    *pS1++ = *pbuff++; 
+    *pS2-- = *pbuff++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+  /* pbuff initialized to input buffer */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* pS1 initialized to pState */ 
+  pS1 = pState; 
+ 
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */ 
+  i = (uint32_t) S->N >> 2u; 
+ 
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.  
+   * Compute 4 outputs at a time */ 
+  do 
+  { 
+    /* Writing the re-ordered output back to inplace input buffer */ 
+    *pbuff++ = *pS1++; 
+    *pbuff++ = *pS1++; 
+    *pbuff++ = *pS1++; 
+    *pbuff++ = *pS1++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+ 
+  /* ---------------------------------------------------------  
+   *     Step2: Calculate RFFT for N-point input  
+   * ---------------------------------------------------------- */ 
+  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ 
+  arm_rfft_q15(S->pRfft, pInlineBuffer, pState); 
+ 
+ /*----------------------------------------------------------------------  
+  *  Step3: Multiply the FFT output with the weights.  
+  *----------------------------------------------------------------------*/ 
+  arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N); 
+ 
+  /* The output of complex multiplication is in 3.13 format.  
+   * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */ 
+  arm_shift_q15(pState, 2, pState, S->N * 2); 
+ 
+  /* ----------- Post-processing ---------- */ 
+  /* DCT-IV can be obtained from DCT-II by the equation,  
+   *       Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)  
+   *       Hence, Y4(0) = Y2(0)/2  */ 
+  /* Getting only real part from the output and Converting to DCT-IV */ 
+ 
+  /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */ 
+  i = ((uint32_t) S->N - 1u) >> 2u; 
+ 
+  /* pbuff initialized to input buffer. */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* pS1 initialized to pState */ 
+  pS1 = pState; 
+ 
+  /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ 
+  in = *pS1++ >> 1u; 
+  /* input buffer acts as inplace, so output values are stored in the input itself. */ 
+  *pbuff++ = in; 
+ 
+  /* pState pointer is incremented twice as the real values are located alternatively in the array */ 
+  pS1++; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  do 
+  { 
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    /* points to the next real value */ 
+    pS1++; 
+ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    pS1++; 
+ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    pS1++; 
+ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    pS1++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  i = ((uint32_t) S->N - 1u) % 0x4u; 
+ 
+  while(i > 0u) 
+  { 
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    /* points to the next real value */ 
+    pS1++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+ 
+ 
+   /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ 
+ 
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */ 
+  i = (uint32_t) S->N >> 2u; 
+ 
+  /* pbuff initialized to the pInlineBuffer(now contains the output values) */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.  Compute 4 outputs at a time */ 
+  do 
+  { 
+    /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ 
+    in = *pbuff; 
+    *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); 
+ 
+    in = *pbuff; 
+    *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); 
+ 
+    in = *pbuff; 
+    *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); 
+ 
+    in = *pbuff; 
+    *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+} 
+ 
+/**  
+   * @} end of DCT4_IDCT4 group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_dct4_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,266 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_dct4_q31.c  
+*  
+* Description:	Processing function of DCT4 & IDCT4 Q31.  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @addtogroup DCT4_IDCT4  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q31 DCT4/IDCT4. 
+ * @param[in]       *S             points to an instance of the Q31 DCT4 structure. 
+ * @param[in]       *pState        points to state buffer. 
+ * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer. 
+ * @return none. 
+ * \par Input an output formats:  
+ * Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process,  
+ * as the conversion from DCT2 to DCT4 involves one subtraction.  
+ * Internally inputs are downscaled in the RFFT process function to avoid overflows.  
+ * Number of bits downscaled, depends on the size of the transform.  
+ * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:   
+ *  
+ * \image html dct4FormatsQ31Table.gif  
+ */ 
+ 
+void arm_dct4_q31( 
+  const arm_dct4_instance_q31 * S, 
+  q31_t * pState, 
+  q31_t * pInlineBuffer) 
+{ 
+  uint16_t i;                                    /* Loop counter */ 
+  q31_t *weights = S->pTwiddle;                  /* Pointer to the Weights table */ 
+  q31_t *cosFact = S->pCosFactor;                /* Pointer to the cos factors table */ 
+  q31_t *pS1, *pS2, *pbuff;                      /* Temporary pointers for input buffer and pState buffer */ 
+  q31_t in;                                      /* Temporary variable */ 
+ 
+ 
+  /* DCT4 computation involves DCT2 (which is calculated using RFFT)  
+   * along with some pre-processing and post-processing.  
+   * Computational procedure is explained as follows:  
+   * (a) Pre-processing involves multiplying input with cos factor,  
+   *     r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))  
+   *              where,  
+   *                 r(n) -- output of preprocessing  
+   *                 u(n) -- input to preprocessing(actual Source buffer)  
+   * (b) Calculation of DCT2 using FFT is divided into three steps:  
+   *                  Step1: Re-ordering of even and odd elements of input.  
+   *                  Step2: Calculating FFT of the re-ordered input.  
+   *                  Step3: Taking the real part of the product of FFT output and weights.  
+   * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:  
+   *                   Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)  
+   *                        where,  
+   *                           Y4 -- DCT4 output,   Y2 -- DCT2 output  
+   * (d) Multiplying the output with the normalizing factor sqrt(2/N).  
+   */ 
+ 
+        /*-------- Pre-processing ------------*/ 
+  /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */ 
+  arm_mult_q31(pInlineBuffer, cosFact, pInlineBuffer, S->N); 
+  arm_shift_q31(pInlineBuffer, 1, pInlineBuffer, S->N); 
+ 
+  /* ----------------------------------------------------------------  
+   * Step1: Re-ordering of even and odd elements as  
+   *             pState[i] =  pInlineBuffer[2*i] and  
+   *             pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2  
+   ---------------------------------------------------------------------*/ 
+ 
+  /* pS1 initialized to pState */ 
+  pS1 = pState; 
+ 
+  /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */ 
+  pS2 = pState + (S->N - 1u); 
+ 
+  /* pbuff initialized to input buffer */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */ 
+  i = S->Nby2 >> 2u; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  do 
+  { 
+    /* Re-ordering of even and odd elements */ 
+    /* pState[i] =  pInlineBuffer[2*i] */ 
+    *pS1++ = *pbuff++; 
+    /* pState[N-i-1] = pInlineBuffer[2*i+1] */ 
+    *pS2-- = *pbuff++; 
+ 
+    *pS1++ = *pbuff++; 
+    *pS2-- = *pbuff++; 
+ 
+    *pS1++ = *pbuff++; 
+    *pS2-- = *pbuff++; 
+ 
+    *pS1++ = *pbuff++; 
+    *pS2-- = *pbuff++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+  /* pbuff initialized to input buffer */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* pS1 initialized to pState */ 
+  pS1 = pState; 
+ 
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */ 
+  i = S->N >> 2u; 
+ 
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.  
+   * Compute 4 outputs at a time */ 
+  do 
+  { 
+    /* Writing the re-ordered output back to inplace input buffer */ 
+    *pbuff++ = *pS1++; 
+    *pbuff++ = *pS1++; 
+    *pbuff++ = *pS1++; 
+    *pbuff++ = *pS1++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+ 
+  /* ---------------------------------------------------------  
+   *     Step2: Calculate RFFT for N-point input  
+   * ---------------------------------------------------------- */ 
+  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ 
+  arm_rfft_q31(S->pRfft, pInlineBuffer, pState); 
+ 
+  /*----------------------------------------------------------------------  
+   *  Step3: Multiply the FFT output with the weights.  
+   *----------------------------------------------------------------------*/ 
+  arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N); 
+ 
+  /* The output of complex multiplication is in 3.29 format.  
+   * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */ 
+  arm_shift_q31(pState, 2, pState, S->N * 2); 
+ 
+  /* ----------- Post-processing ---------- */ 
+  /* DCT-IV can be obtained from DCT-II by the equation,  
+   *       Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)  
+   *       Hence, Y4(0) = Y2(0)/2  */ 
+  /* Getting only real part from the output and Converting to DCT-IV */ 
+ 
+  /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */ 
+  i = (S->N - 1u) >> 2u; 
+ 
+  /* pbuff initialized to input buffer. */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* pS1 initialized to pState */ 
+  pS1 = pState; 
+ 
+  /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ 
+  in = *pS1++ >> 1u; 
+  /* input buffer acts as inplace, so output values are stored in the input itself. */ 
+  *pbuff++ = in; 
+ 
+  /* pState pointer is incremented twice as the real values are located alternatively in the array */ 
+  pS1++; 
+ 
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
+   ** a second loop below computes the remaining 1 to 3 samples. */ 
+  do 
+  { 
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    /* points to the next real value */ 
+    pS1++; 
+ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    pS1++; 
+ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    pS1++; 
+ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    pS1++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
+   ** No loop unrolling is used. */ 
+  i = (S->N - 1u) % 0x4u; 
+ 
+  while(i > 0u) 
+  { 
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 
+    in = *pS1++ - in; 
+    *pbuff++ = in; 
+    /* points to the next real value */ 
+    pS1++; 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } 
+ 
+ 
+        /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ 
+ 
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */ 
+  i = S->N >> 2u; 
+ 
+  /* pbuff initialized to the pInlineBuffer(now contains the output values) */ 
+  pbuff = pInlineBuffer; 
+ 
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.  Compute 4 outputs at a time */ 
+  do 
+  { 
+    /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ 
+    in = *pbuff; 
+    *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); 
+ 
+    in = *pbuff; 
+    *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); 
+ 
+    in = *pbuff; 
+    *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); 
+ 
+    in = *pbuff; 
+    *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); 
+ 
+    /* Decrement the loop counter */ 
+    i--; 
+  } while(i > 0u); 
+ 
+} 
+ 
+/**  
+   * @} end of DCT4_IDCT4 group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_rfft_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,380 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_rfft_f32.c  
+*  
+* Description:	RFFT & RIFFT Floating point process function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @defgroup RFFT_RIFFT Real FFT Functions  
+ *  
+ * \par  
+ * Complex FFT/IFFT typically assumes complex input and output. However many applications use real valued data in time domain.   
+ * Real FFT/IFFT efficiently process real valued sequences with the advantage of requirement of low memory and with less complexity.  
+ *  
+ * \par  
+ * This set of functions implements Real Fast Fourier Transforms(RFFT) and Real Inverse Fast Fourier Transform(RIFFT)  
+ * for Q15, Q31, and floating-point data types.    
+ *  
+ *  
+ * \par Algorithm:  
+ *  
+ * <b>Real Fast Fourier Transform:</b>  
+ * \par  
+ * Real FFT of N-point is calculated using CFFT of N/2-point and Split RFFT process as shown below figure.  
+ * \par  
+ * \image html RFFT.gif "Real Fast Fourier Transform"  
+ * \par  
+ * The RFFT functions operate on blocks of input and output data and each call to the function processes  
+ * <code>fftLenR</code> samples through the transform.  <code>pSrc</code>  points to input array containing <code>fftLenR</code> values.  
+ * <code>pDst</code>  points to output array containing <code>2*fftLenR</code> values. \n 
+ * Input for real FFT is in the order of   
+ * <pre>{real[0], real[1], real[2], real[3], ..}</pre>  
+ * Output for real FFT is complex and are in the order of  
+ * <pre>{real(0), imag(0), real(1), imag(1), ...}</pre>   
+ *  
+ * <b>Real Inverse Fast Fourier Transform:</b>  
+ * \par  
+ * Real IFFT of N-point is calculated using Split RIFFT process and CFFT of N/2-point as shown below figure.  
+ * \par  
+ * \image html RIFFT.gif "Real Inverse Fast Fourier Transform"  
+ * \par  
+ * The RIFFT functions operate on blocks of input and output data and each call to the function processes  
+ * <code>2*fftLenR</code> samples through the transform.  <code>pSrc</code>  points to input array containing <code>2*fftLenR</code> values.  
+ * <code>pDst</code>  points to output array containing <code>fftLenR</code> values. \n  
+ * Input for real IFFT is complex and are in the order of 
+ * <pre>{real(0), imag(0), real(1), imag(1), ...}</pre> 
+ *  Output for real IFFT is real and in the order of   
+ * <pre>{real[0], real[1], real[2], real[3], ..}</pre> 
+ *  
+ * \par Lengths supported by the transform: 
+ * \par  
+ * Real FFT/IFFT supports the lengths [128, 512, 2048], as it internally uses CFFT/CIFFT.  
+ *  
+ * \par Instance Structure  
+ * A separate instance structure must be defined for each Instance but the twiddle factors can be reused.  
+ * There are separate instance structure declarations for each of the 3 supported data types.  
+ *  
+ * \par Initialization Functions  
+ * There is also an associated initialization function for each data type.  
+ * The initialization function performs the following operations:  
+ * - Sets the values of the internal structure fields.  
+ * - Initializes twiddle factor tables. 
+ * - Initializes CFFT data structure fields.   
+ * \par  
+ * Use of the initialization function is optional.  
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.  
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.  
+ * Manually initialize the instance structure as follows:  
+ * <pre>  
+ *arm_rfft_instance_f32 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};  
+ *arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};  
+ *arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};  
+ * </pre>  
+ * where <code>fftLenReal</code> length of RFFT/RIFFT; <code>fftLenBy2</code> length of CFFT/CIFFT.   
+ * <code>ifftFlagR</code> Flag for selection of RFFT or RIFFT(Set ifftFlagR to calculate RIFFT otherwise calculates RFFT);  
+ * <code>bitReverseFlagR</code> Flag for selection of output order(Set bitReverseFlagR to output in normal order otherwise output in bit reversed order);   
+ * <code>twidCoefRModifier</code> modifier for twiddle factor table which supports 128, 512, 2048 RFFT lengths with same table;  
+ * <code>pTwiddleAReal</code>points to A array of twiddle coefficients; <code>pTwiddleBReal</code>points to B array of twiddle coefficients;  
+ * <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure also needs to be initialized, refer to arm_cfft_radix4_f32() for details regarding  
+ * static initialization of cfft structure.  
+ *  
+ * \par Fixed-Point Behavior  
+ * Care must be taken when using the fixed-point versions of the RFFT/RIFFT function.  
+ * Refer to the function specific documentation below for usage guidelines.  
+ */ 
+ 
+/*--------------------------------------------------------------------  
+ *		Internal functions prototypes  
+ *--------------------------------------------------------------------*/ 
+ 
+void arm_split_rfft_f32( 
+  float32_t * pSrc, 
+  uint32_t fftLen, 
+  float32_t * pATable, 
+  float32_t * pBTable, 
+  float32_t * pDst, 
+  uint32_t modifier); 
+void arm_split_rifft_f32( 
+  float32_t * pSrc, 
+  uint32_t fftLen, 
+  float32_t * pATable, 
+  float32_t * pBTable, 
+  float32_t * pDst, 
+  uint32_t modifier); 
+ 
+/**  
+ * @addtogroup RFFT_RIFFT  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the floating-point RFFT/RIFFT. 
+ * @param[in]  *S    points to an instance of the floating-point RFFT/RIFFT structure. 
+ * @param[in]  *pSrc points to the input buffer. 
+ * @param[out] *pDst points to the output buffer. 
+ * @return none. 
+ */ 
+ 
+void arm_rfft_f32( 
+  const arm_rfft_instance_f32 * S, 
+  float32_t * pSrc, 
+  float32_t * pDst) 
+{ 
+  const arm_cfft_radix4_instance_f32 *S_CFFT = S->pCfft; 
+ 
+ 
+  /* Calculation of Real IFFT of input */ 
+  if(S->ifftFlagR == 1u) 
+  { 
+    /*  Real IFFT core process */ 
+    arm_split_rifft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal, 
+                        S->pTwiddleBReal, pDst, S->twidCoefRModifier); 
+ 
+ 
+    /* Complex radix-4 IFFT process */ 
+    arm_radix4_butterfly_inverse_f32(pDst, S_CFFT->fftLen, 
+                                     S_CFFT->pTwiddle, 
+                                     S_CFFT->twidCoefModifier, 
+                                     S_CFFT->onebyfftLen); 
+ 
+    /* Bit reversal process */ 
+    if(S->bitReverseFlagR == 1u) 
+    { 
+      arm_bitreversal_f32(pDst, S_CFFT->fftLen, 
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable); 
+    } 
+  } 
+  else 
+  { 
+ 
+    /* Calculation of RFFT of input */ 
+ 
+    /* Complex radix-4 FFT process */ 
+    arm_radix4_butterfly_f32(pSrc, S_CFFT->fftLen, 
+                             S_CFFT->pTwiddle, S_CFFT->twidCoefModifier); 
+ 
+    /* Bit reversal process */ 
+    if(S->bitReverseFlagR == 1u) 
+    { 
+      arm_bitreversal_f32(pSrc, S_CFFT->fftLen, 
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable); 
+    } 
+ 
+ 
+    /*  Real FFT core process */ 
+    arm_split_rfft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal, 
+                       S->pTwiddleBReal, pDst, S->twidCoefRModifier); 
+  } 
+ 
+} 
+ 
+/**  
+   * @} end of RFFT_RIFFT group  
+   */ 
+ 
+/**  
+ * @brief  Core Real FFT process  
+ * @param[in]   *pSrc 				points to the input buffer.  
+ * @param[in]   fftLen  			length of FFT.  
+ * @param[in]   *pATable 			points to the twiddle Coef A buffer.  
+ * @param[in]   *pBTable 			points to the twiddle Coef B buffer.  
+ * @param[out]  *pDst 				points to the output buffer.  
+ * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
+ * @return none.  
+ */ 
+ 
+void arm_split_rfft_f32( 
+  float32_t * pSrc, 
+  uint32_t fftLen, 
+  float32_t * pATable, 
+  float32_t * pBTable, 
+  float32_t * pDst, 
+  uint32_t modifier) 
+{ 
+  uint32_t i;                                    /* Loop Counter */ 
+  float32_t outR, outI;                          /* Temporary variables for output */ 
+  float32_t *pCoefA, *pCoefB;                    /* Temporary pointers for twiddle factors */ 
+  float32_t CoefA1, CoefA2, CoefB1;              /* Temporary variables for twiddle coefficients */ 
+  float32_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4u * fftLen) - 1u];      /* temp pointers for output buffer */ 
+  float32_t *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2u * fftLen) - 1u];      /* temp pointers for input buffer */ 
+ 
+ 
+  pSrc[2u * fftLen] = pSrc[0]; 
+  pSrc[(2u * fftLen) + 1u] = pSrc[1]; 
+ 
+  /* Init coefficient pointers */ 
+  pCoefA = &pATable[modifier * 2u]; 
+  pCoefB = &pBTable[modifier * 2u]; 
+ 
+  i = fftLen - 1u; 
+ 
+  while(i > 0u) 
+  { 
+    /*  
+       outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]  
+       + pSrc[2 * n - 2 * i] * pBTable[2 * i] +  
+       pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);  
+     */ 
+ 
+    /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +  
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */ 
+ 
+    /* read pATable[2 * i] */ 
+    CoefA1 = *pCoefA++; 
+    /* pATable[2 * i + 1] */ 
+    CoefA2 = *pCoefA; 
+ 
+    /* pSrc[2 * i] * pATable[2 * i] */ 
+    outR = *pSrc1 * CoefA1; 
+    /* pSrc[2 * i] * CoefA2 */ 
+    outI = *pSrc1++ * CoefA2; 
+ 
+    /* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */ 
+    outR -= (*pSrc1 + *pSrc2) * CoefA2; 
+    /* pSrc[2 * i + 1] * CoefA1 */ 
+    outI += *pSrc1++ * CoefA1; 
+ 
+    CoefB1 = *pCoefB; 
+ 
+    /* pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */ 
+    outI -= *pSrc2-- * CoefB1; 
+    /* pSrc[2 * fftLen - 2 * i] * CoefA2 */ 
+    outI -= *pSrc2 * CoefA2; 
+ 
+    /* pSrc[2 * fftLen - 2 * i] * CoefB1 */ 
+    outR += *pSrc2-- * CoefB1; 
+ 
+    /* write output */ 
+    *pDst1++ = outR; 
+    *pDst1++ = outI; 
+ 
+    /* write complex conjugate output */ 
+    *pDst2-- = -outI; 
+    *pDst2-- = outR; 
+ 
+    /* update coefficient pointer */ 
+    pCoefB = pCoefB + (modifier * 2u); 
+    pCoefA = pCoefA + ((modifier * 2u) - 1u); 
+ 
+    i--; 
+ 
+  } 
+ 
+  pDst[2u * fftLen] = pSrc[0] - pSrc[1]; 
+  pDst[(2u * fftLen) + 1u] = 0.0f; 
+ 
+  pDst[0] = pSrc[0] + pSrc[1]; 
+  pDst[1] = 0.0f; 
+ 
+} 
+ 
+ 
+/**  
+ * @brief  Core Real IFFT process  
+ * @param[in]   *pSrc 				points to the input buffer.  
+ * @param[in]   fftLen  			length of FFT. 
+ * @param[in]   *pATable 			points to the twiddle Coef A buffer. 
+ * @param[in]   *pBTable 			points to the twiddle Coef B buffer. 
+ * @param[out]  *pDst 				points to the output buffer. 
+ * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.  
+ * @return none.  
+ */ 
+ 
+void arm_split_rifft_f32( 
+  float32_t * pSrc, 
+  uint32_t fftLen, 
+  float32_t * pATable, 
+  float32_t * pBTable, 
+  float32_t * pDst, 
+  uint32_t modifier) 
+{ 
+  float32_t outR, outI;                          /* Temporary variables for output */ 
+  float32_t *pCoefA, *pCoefB;                    /* Temporary pointers for twiddle factors */ 
+  float32_t CoefA1, CoefA2, CoefB1;              /* Temporary variables for twiddle coefficients */ 
+  float32_t *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2u * fftLen) + 1u]; 
+ 
+  pCoefA = &pATable[0]; 
+  pCoefB = &pBTable[0]; 
+ 
+  while(fftLen > 0u) 
+  { 
+    /*  
+       outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +  
+       pIn[2 * n - 2 * i] * pBTable[2 * i] -  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);  
+ 
+       outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -  
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);  
+ 
+     */ 
+ 
+    CoefA1 = *pCoefA++; 
+    CoefA2 = *pCoefA; 
+ 
+    /* outR = (pSrc[2 * i] * CoefA1 */ 
+    outR = *pSrc1 * CoefA1; 
+ 
+    /* - pSrc[2 * i] * CoefA2 */ 
+    outI = -(*pSrc1++) * CoefA2; 
+ 
+    /* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */ 
+    outR += (*pSrc1 + *pSrc2) * CoefA2; 
+ 
+    /* pSrc[2 * i + 1] * CoefA1 */ 
+    outI += (*pSrc1++) * CoefA1; 
+ 
+    CoefB1 = *pCoefB; 
+ 
+    /* - pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */ 
+    outI -= *pSrc2-- * CoefB1; 
+ 
+    /* pSrc[2 * fftLen - 2 * i] * CoefB1 */ 
+    outR += *pSrc2 * CoefB1; 
+ 
+    /* pSrc[2 * fftLen - 2 * i] * CoefA2 */ 
+    outI += *pSrc2-- * CoefA2; 
+ 
+    /* write output */ 
+    *pDst++ = outR; 
+    *pDst++ = outI; 
+ 
+    /* update coefficient pointer */ 
+    pCoefB = pCoefB + (modifier * 2u); 
+    pCoefA = pCoefA + ((modifier * 2u) - 1u); 
+ 
+    /* Decrement loop count */ 
+    fftLen--; 
+  } 
+ 
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_rfft_init_f32.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,1704 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_rfft_init_f32.c  
+*  
+* Description:	RFFT & RIFFT Floating point initialisation function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @addtogroup RFFT_RIFFT  
+ * @{  
+ */ 
+ 
+/**  
+* \par  
+* Generation of realCoefA array:  
+* \par  
+* 	n = 1024  
+* <pre>for (i = 0; i < n; i++)  
+*  {  
+*    pATable[2 * i] = 0.5 * (1.0 - sin (2 * PI / (double) (2 * n) * (double) i));  
+*    pATable[2 * i + 1] = 0.5 * (-1.0 * cos (2 * PI / (double) (2 * n) * (double) i));  
+*  } </pre>  
+*/ 
+ 
+ 
+ 
+static const float32_t realCoefA[2048] = { 
+  0.500000000000000000f, -0.500000000000000000f, 0.498466014862060550f, 
+  -0.499997645616531370f, 0.496932059526443480f, -0.499990582466125490f, 
+  0.495398133993148800f, -0.499978810548782350f, 
+  0.493864238262176510f, -0.499962359666824340f, 0.492330402135849000f, 
+  -0.499941170215606690f, 0.490796625614166260f, -0.499915301799774170f, 
+  0.489262968301773070f, -0.499884694814682010f, 
+  0.487729400396347050f, -0.499849408864974980f, 0.486195921897888180f, 
+  -0.499809414148330690f, 0.484662592411041260f, -0.499764710664749150f, 
+  0.483129411935806270f, -0.499715298414230350f, 
+  0.481596380472183230f, -0.499661177396774290f, 0.480063527822494510f, 
+  -0.499602377414703370f, 0.478530883789062500f, -0.499538868665695190f, 
+  0.476998418569564820f, -0.499470651149749760f, 
+  0.475466161966323850f, -0.499397724866867070f, 0.473934143781661990f, 
+  -0.499320119619369510f, 0.472402364015579220f, -0.499237775802612300f, 
+  0.470870882272720340f, -0.499150782823562620f, 
+  0.469339638948440550f, -0.499059051275253300f, 0.467808693647384640f, 
+  -0.498962640762329100f, 0.466278046369552610f, -0.498861521482467650f, 
+  0.464747726917266850f, -0.498755723237991330f, 
+  0.463217705488204960f, -0.498645216226577760f, 0.461688071489334110f, 
+  -0.498530030250549320f, 0.460158795118331910f, -0.498410135507583620f, 
+  0.458629876375198360f, -0.498285561800003050f, 
+  0.457101345062255860f, -0.498156309127807620f, 0.455573230981826780f, 
+  -0.498022347688674930f, 0.454045534133911130f, -0.497883707284927370f, 
+  0.452518254518508910f, -0.497740387916564940f, 
+  0.450991421937942500f, -0.497592359781265260f, 0.449465066194534300f, 
+  -0.497439652681350710f, 0.447939187288284300f, -0.497282296419143680f, 
+  0.446413785219192500f, -0.497120231389999390f, 
+  0.444888889789581300f, -0.496953487396240230f, 0.443364530801773070f, 
+  -0.496782064437866210f, 0.441840678453445430f, -0.496605962514877320f, 
+  0.440317392349243160f, -0.496425211429595950f, 
+  0.438794672489166260f, -0.496239781379699710f, 0.437272518873214720f, 
+  -0.496049642562866210f, 0.435750931501388550f, -0.495854884386062620f, 
+  0.434229999780654910f, -0.495655417442321780f, 
+  0.432709634304046630f, -0.495451331138610840f, 0.431189924478530880f, 
+  -0.495242536067962650f, 0.429670870304107670f, -0.495029091835021970f, 
+  0.428152471780776980f, -0.494810998439788820f, 
+  0.426634758710861210f, -0.494588255882263180f, 0.425117731094360350f, 
+  -0.494360834360122680f, 0.423601418733596800f, -0.494128793478012080f, 
+  0.422085791826248170f, -0.493892073631286620f, 
+  0.420570939779281620f, -0.493650704622268680f, 0.419056802988052370f, 
+  -0.493404686450958250f, 0.417543441057205200f, -0.493154048919677730f, 
+  0.416030853986740110f, -0.492898762226104740f, 
+  0.414519041776657100f, -0.492638826370239260f, 0.413008064031600950f, 
+  -0.492374241352081300f, 0.411497890949249270f, -0.492105036973953250f, 
+  0.409988552331924440f, -0.491831213235855100f, 
+  0.408480048179626460f, -0.491552740335464480f, 0.406972438097000120f, 
+  -0.491269648075103760f, 0.405465662479400630f, -0.490981936454772950f, 
+  0.403959810733795170f, -0.490689605474472050f, 
+  0.402454853057861330f, -0.490392625331878660f, 0.400950789451599120f, 
+  -0.490091055631637570f, 0.399447679519653320f, -0.489784896373748780f, 
+  0.397945523262023930f, -0.489474087953567500f, 
+  0.396444320678710940f, -0.489158689975738530f, 0.394944071769714360f, 
+  -0.488838672637939450f, 0.393444836139678960f, -0.488514065742492680f, 
+  0.391946613788604740f, -0.488184869289398190f, 
+  0.390449374914169310f, -0.487851053476333620f, 0.388953179121017460f, 
+  -0.487512677907943730f, 0.387458056211471560f, -0.487169682979583740f, 
+  0.385963946580886840f, -0.486822128295898440f, 
+  0.384470939636230470f, -0.486469984054565430f, 0.382979035377502440f, 
+  -0.486113250255584720f, 0.381488204002380370f, -0.485751956701278690f, 
+  0.379998475313186650f, -0.485386073589324950f, 
+  0.378509908914566040f, -0.485015630722045900f, 0.377022475004196170f, 
+  -0.484640628099441530f, 0.375536203384399410f, -0.484261035919189450f, 
+  0.374051094055175780f, -0.483876913785934450f, 
+  0.372567176818847660f, -0.483488231897354130f, 0.371084451675415040f, 
+  -0.483094990253448490f, 0.369602948427200320f, -0.482697218656539920f, 
+  0.368122667074203490f, -0.482294887304306030f, 
+  0.366643607616424560f, -0.481888025999069210f, 0.365165829658508300f, 
+  -0.481476634740829470f, 0.363689333200454710f, -0.481060713529586790f, 
+  0.362214088439941410f, -0.480640232563018800f, 
+  0.360740154981613160f, -0.480215251445770260f, 0.359267532825469970f, 
+  -0.479785770177841190f, 0.357796221971511840f, -0.479351729154586790f, 
+  0.356326282024383540f, -0.478913217782974240f, 
+  0.354857653379440310f, -0.478470176458358760f, 0.353390425443649290f, 
+  -0.478022634983062740f, 0.351924568414688110f, -0.477570593357086180f, 
+  0.350460082292556760f, -0.477114051580429080f, 
+  0.348997026681900020f, -0.476653009653091430f, 0.347535371780395510f, 
+  -0.476187497377395630f, 0.346075177192687990f, -0.475717514753341670f, 
+  0.344616413116455080f, -0.475243031978607180f, 
+  0.343159139156341550f, -0.474764078855514530f, 0.341703325510025020f, 
+  -0.474280685186386110f, 0.340248972177505490f, -0.473792791366577150f, 
+  0.338796168565750120f, -0.473300457000732420f, 
+  0.337344855070114140f, -0.472803652286529540f, 0.335895091295242310f, 
+  -0.472302407026290890f, 0.334446847438812260f, -0.471796721220016480f, 
+  0.333000183105468750f, -0.471286594867706300f, 
+  0.331555068492889400f, -0.470772027969360350f, 0.330111563205718990f, 
+  -0.470253020524978640f, 0.328669637441635130f, -0.469729602336883540f, 
+  0.327229350805282590f, -0.469201773405075070f, 
+  0.325790673494338990f, -0.468669503927230830f, 0.324353635311126710f, 
+  -0.468132823705673220f, 0.322918236255645750f, -0.467591762542724610f, 
+  0.321484506130218510f, -0.467046260833740230f, 
+  0.320052474737167360f, -0.466496407985687260f, 0.318622142076492310f, 
+  -0.465942144393920900f, 0.317193508148193360f, -0.465383470058441160f, 
+  0.315766572952270510f, -0.464820444583892820f, 
+  0.314341396093368530f, -0.464253038167953490f, 0.312917977571487430f, 
+  -0.463681250810623170f, 0.311496287584304810f, -0.463105112314224240f, 
+  0.310076385736465450f, -0.462524622678756710f, 
+  0.308658272027969360f, -0.461939752101898190f, 0.307241976261138920f, 
+  -0.461350560188293460f, 0.305827468633651730f, -0.460757017135620120f, 
+  0.304414808750152590f, -0.460159152746200560f, 
+  0.303003966808319090f, -0.459556937217712400f, 0.301595002412796020f, 
+  -0.458950400352478030f, 0.300187885761260990f, -0.458339542150497440f, 
+  0.298782676458358760f, -0.457724362611770630f, 
+  0.297379344701766970f, -0.457104891538620000f, 0.295977920293807980f, 
+  -0.456481099128723140f, 0.294578403234481810f, -0.455853015184402470f, 
+  0.293180853128433230f, -0.455220639705657960f, 
+  0.291785210371017460f, -0.454584002494812010f, 0.290391564369201660f, 
+  -0.453943043947219850f, 0.288999855518341060f, -0.453297853469848630f, 
+  0.287610173225402830f, -0.452648371458053590f, 
+  0.286222457885742190f, -0.451994657516479490f, 0.284836769104003910f, 
+  -0.451336652040481570f, 0.283453077077865600f, -0.450674414634704590f, 
+  0.282071471214294430f, -0.450007945299148560f, 
+  0.280691891908645630f, -0.449337244033813480f, 0.279314368963241580f, 
+  -0.448662281036376950f, 0.277938932180404660f, -0.447983115911483760f, 
+  0.276565581560134890f, -0.447299748659133910f, 
+  0.275194346904754640f, -0.446612149477005000f, 0.273825198411941530f, 
+  -0.445920348167419430f, 0.272458195686340330f, -0.445224374532699580f, 
+  0.271093338727951050f, -0.444524168968200680f, 
+  0.269730657339096070f, -0.443819820880889890f, 0.268370121717453000f, 
+  -0.443111270666122440f, 0.267011761665344240f, -0.442398548126220700f, 
+  0.265655577182769780f, -0.441681683063507080f, 
+  0.264301627874374390f, -0.440960645675659180f, 0.262949883937835690f, 
+  -0.440235435962677000f, 0.261600375175476070f, -0.439506113529205320f, 
+  0.260253131389617920f, -0.438772648572921750f, 
+  0.258908122777938840f, -0.438035041093826290f, 0.257565379142761230f, 
+  -0.437293320894241330f, 0.256224930286407470f, -0.436547487974166870f, 
+  0.254886746406555180f, -0.435797542333602910f, 
+  0.253550916910171510f, -0.435043483972549440f, 0.252217382192611690f, 
+  -0.434285342693328860f, 0.250886172056198120f, -0.433523118495941160f, 
+  0.249557301402091980f, -0.432756811380386350f, 
+  0.248230814933776860f, -0.431986421346664430f, 0.246906682848930360f, 
+  -0.431211978197097780f, 0.245584934949874880f, -0.430433481931686400f, 
+  0.244265571236610410f, -0.429650902748107910f, 
+  0.242948621511459350f, -0.428864300251007080f, 0.241634100675582890f, 
+  -0.428073674440383910f, 0.240322008728981020f, -0.427278995513916020f, 
+  0.239012360572814940f, -0.426480293273925780f, 
+  0.237705156207084660f, -0.425677597522735600f, 0.236400425434112550f, 
+  -0.424870878458023070f, 0.235098183155059810f, -0.424060165882110600f, 
+  0.233798429369926450f, -0.423245459794998170f, 
+  0.232501193881034850f, -0.422426789999008180f, 0.231206461787223820f, 
+  -0.421604126691818240f, 0.229914262890815730f, -0.420777499675750730f, 
+  0.228624612092971800f, -0.419946908950805660f, 
+  0.227337509393692020f, -0.419112354516983030f, 0.226052969694137570f, 
+  -0.418273866176605220f, 0.224771007895469670f, -0.417431443929672240f, 
+  0.223491653800010680f, -0.416585087776184080f, 
+  0.222214877605438230f, -0.415734797716140750f, 0.220940738916397090f, 
+  -0.414880603551864620f, 0.219669207930564880f, -0.414022535085678100f, 
+  0.218400329351425170f, -0.413160532712936400f, 
+  0.217134088277816770f, -0.412294656038284300f, 0.215870529413223270f, 
+  -0.411424905061721800f, 0.214609622955322270f, -0.410551249980926510f, 
+  0.213351413607597350f, -0.409673750400543210f, 
+  0.212095901370048520f, -0.408792406320571900f, 0.210843101143836980f, 
+  -0.407907217741012570f, 0.209593027830123900f, -0.407018154859542850f, 
+  0.208345666527748110f, -0.406125307083129880f, 
+  0.207101076841354370f, -0.405228585004806520f, 0.205859228968620300f, 
+  -0.404328078031539920f, 0.204620152711868290f, -0.403423786163330080f, 
+  0.203383848071098330f, -0.402515679597854610f, 
+  0.202150344848632810f, -0.401603758335113530f, 0.200919643044471740f, 
+  -0.400688081979751590f, 0.199691757559776310f, -0.399768620729446410f, 
+  0.198466703295707700f, -0.398845434188842770f, 
+  0.197244480252265930f, -0.397918462753295900f, 0.196025103330612180f, 
+  -0.396987736225128170f, 0.194808602333068850f, -0.396053284406661990f, 
+  0.193594962358474730f, -0.395115107297897340f, 
+  0.192384198307991030f, -0.394173204898834230f, 0.191176339983940120f, 
+  -0.393227607011795040f, 0.189971387386322020f, -0.392278283834457400f, 
+  0.188769355416297910f, -0.391325294971466060f, 
+  0.187570258975028990f, -0.390368610620498660f, 0.186374098062515260f, 
+  -0.389408260583877560f, 0.185180887579917910f, -0.388444244861602780f, 
+  0.183990627527236940f, -0.387476563453674320f, 
+  0.182803362607955930f, -0.386505216360092160f, 0.181619063019752500f, 
+  -0.385530263185501100f, 0.180437773466110230f, -0.384551674127578740f, 
+  0.179259493947029110f, -0.383569449186325070f, 
+  0.178084224462509160f, -0.382583618164062500f, 0.176911994814872740f, 
+  -0.381594210863113400f, 0.175742805004119870f, -0.380601197481155400f, 
+  0.174576655030250550f, -0.379604607820510860f, 
+  0.173413574695587160f, -0.378604412078857420f, 0.172253578901290890f, 
+  -0.377600699663162230f, 0.171096652746200560f, -0.376593410968780520f, 
+  0.169942826032638550f, -0.375582575798034670f, 
+  0.168792113661766050f, -0.374568194150924680f, 0.167644515633583070f, 
+  -0.373550295829772950f, 0.166500031948089600f, -0.372528880834579470f, 
+  0.165358707308769230f, -0.371503978967666630f, 
+  0.164220526814460750f, -0.370475560426712040f, 0.163085505366325380f, 
+  -0.369443655014038090f, 0.161953642964363100f, -0.368408292531967160f, 
+  0.160824984312057500f, -0.367369443178176880f, 
+  0.159699499607086180f, -0.366327136754989620f, 0.158577233552932740f, 
+  -0.365281373262405400f, 0.157458171248435970f, -0.364232182502746580f, 
+  0.156342327594757080f, -0.363179564476013180f, 
+  0.155229732394218440f, -0.362123548984527590f, 0.154120370745658870f, 
+  -0.361064106225967410f, 0.153014272451400760f, -0.360001266002655030f, 
+  0.151911437511444090f, -0.358935028314590450f, 
+  0.150811880826950070f, -0.357865422964096070f, 0.149715602397918700f, 
+  -0.356792420148849490f, 0.148622632026672360f, -0.355716109275817870f, 
+  0.147532954812049870f, -0.354636400938034060f, 
+  0.146446615457534790f, -0.353553384542465210f, 0.145363584160804750f, 
+  -0.352467030286788940f, 0.144283905625343320f, -0.351377367973327640f, 
+  0.143207564949989320f, -0.350284397602081300f, 
+  0.142134591937065120f, -0.349188119173049930f, 0.141064971685409550f, 
+  -0.348088562488555910f, 0.139998748898506160f, -0.346985727548599240f, 
+  0.138935908675193790f, -0.345879614353179930f, 
+  0.137876465916633610f, -0.344770282506942750f, 0.136820420622825620f, 
+  -0.343657672405242920f, 0.135767802596092220f, -0.342541843652725220f, 
+  0.134718611836433410f, -0.341422766447067260f, 
+  0.133672863245010380f, -0.340300500392913820f, 0.132630556821823120f, 
+  -0.339175015687942500f, 0.131591722369194030f, -0.338046342134475710f, 
+  0.130556344985961910f, -0.336914509534835820f, 
+  0.129524439573287960f, -0.335779488086700440f, 0.128496021032333370f, 
+  -0.334641307592391970f, 0.127471104264259340f, -0.333499968051910400f, 
+  0.126449704170227050f, -0.332355499267578130f, 
+  0.125431805849075320f, -0.331207901239395140f, 0.124417431652545930f, 
+  -0.330057173967361450f, 0.123406603932380680f, -0.328903347253799440f, 
+  0.122399315237998960f, -0.327746421098709110f, 
+  0.121395580470561980f, -0.326586425304412840f, 0.120395407080650330f, 
+  -0.325423330068588260f, 0.119398809969425200f, -0.324257194995880130f, 
+  0.118405789136886600f, -0.323088020086288450f, 
+  0.117416366934776310f, -0.321915775537490840f, 0.116430543363094330f, 
+  -0.320740520954132080f, 0.115448333323001860f, -0.319562226533889770f, 
+  0.114469736814498900f, -0.318380922079086300f, 
+  0.113494776189327240f, -0.317196637392044070f, 0.112523443996906280f, 
+  -0.316009372472763060f, 0.111555770039558410f, -0.314819127321243290f, 
+  0.110591746866703030f, -0.313625901937484740f, 
+  0.109631389379501340f, -0.312429755926132200f, 0.108674705028533940f, 
+  -0.311230629682540890f, 0.107721701264381410f, -0.310028612613677980f, 
+  0.106772392988204960f, -0.308823645114898680f, 
+  0.105826787650585170f, -0.307615786790847780f, 0.104884892702102660f, 
+  -0.306405037641525270f, 0.103946708142757420f, -0.305191397666931150f, 
+  0.103012263774871830f, -0.303974896669387820f, 
+  0.102081544697284700f, -0.302755534648895260f, 0.101154580712318420f, 
+  -0.301533311605453490f, 0.100231364369392400f, -0.300308227539062500f, 
+  0.099311910569667816f, -0.299080342054367070f, 
+  0.098396234214305878f, -0.297849655151367190f, 0.097484335303306580f, 
+  -0.296616137027740480f, 0.096576221287250519f, -0.295379847288131710f, 
+  0.095671907067298889f, -0.294140785932540890f, 
+  0.094771400094032288f, -0.292898923158645630f, 0.093874707818031311f, 
+  -0.291654318571090700f, 0.092981837689876556f, -0.290406972169876100f, 
+  0.092092797160148621f, -0.289156883955001830f, 
+  0.091207593679428101f, -0.287904083728790280f, 0.090326242148876190f, 
+  -0.286648571491241460f, 0.089448742568492889f, -0.285390377044677730f, 
+  0.088575109839439392f, -0.284129470586776730f, 
+  0.087705351412296295f, -0.282865911722183230f, 0.086839467287063599f, 
+  -0.281599670648574830f, 0.085977479815483093f, -0.280330777168273930f, 
+  0.085119381546974182f, -0.279059261083602910f, 
+  0.084265194833278656f, -0.277785122394561770f, 0.083414919674396515f, 
+  -0.276508361101150510f, 0.082568563520908356f, -0.275228977203369140f, 
+  0.081726133823394775f, -0.273947030305862430f, 
+  0.080887645483016968f, -0.272662490606307980f, 0.080053105950355530f, 
+  -0.271375387907028200f, 0.079222507774829865f, -0.270085722208023070f, 
+  0.078395880758762360f, -0.268793523311614990f, 
+  0.077573217451572418f, -0.267498821020126340f, 0.076754532754421234f, 
+  -0.266201555728912350f, 0.075939826667308807f, -0.264901816844940190f, 
+  0.075129114091396332f, -0.263599574565887450f, 
+  0.074322402477264404f, -0.262294828891754150f, 0.073519699275493622f, 
+  -0.260987639427185060f, 0.072721004486083984f, -0.259678006172180180f, 
+  0.071926333010196686f, -0.258365899324417110f, 
+  0.071135692298412323f, -0.257051378488540650f, 0.070349089801311493f, 
+  -0.255734413862228390f, 0.069566532969474792f, -0.254415065050125120f, 
+  0.068788021802902222f, -0.253093332052230830f, 
+  0.068013571202754974f, -0.251769185066223140f, 0.067243188619613647f, 
+  -0.250442683696746830f, 0.066476874053478241f, -0.249113827943801880f, 
+  0.065714649856090546f, -0.247782632708549500f, 
+  0.064956501126289368f, -0.246449097990989690f, 0.064202457666397095f, 
+  -0.245113238692283630f, 0.063452512025833130f, -0.243775084614753720f, 
+  0.062706671655178070f, -0.242434620857238770f, 
+  0.061964951455593109f, -0.241091892123222350f, 0.061227355152368546f, 
+  -0.239746883511543270f, 0.060493886470794678f, -0.238399609923362730f, 
+  0.059764556586742401f, -0.237050101161003110f, 
+  0.059039369225502014f, -0.235698372125625610f, 0.058318331837654114f, 
+  -0.234344407916069030f, 0.057601451873779297f, -0.232988253235816960f, 
+  0.056888736784458160f, -0.231629893183708190f, 
+  0.056180190294981003f, -0.230269357562065120f, 0.055475823581218719f, 
+  -0.228906646370887760f, 0.054775636643171310f, -0.227541789412498470f, 
+  0.054079644381999969f, -0.226174786686897280f, 
+  0.053387850522994995f, -0.224805667996406560f, 0.052700258791446686f, 
+  -0.223434418439865110f, 0.052016876637935638f, -0.222061067819595340f, 
+  0.051337707787752151f, -0.220685631036758420f, 
+  0.050662767142057419f, -0.219308122992515560f, 0.049992054700851440f, 
+  -0.217928543686866760f, 0.049325577914714813f, -0.216546908020973210f, 
+  0.048663340508937836f, -0.215163245797157290f, 
+  0.048005353659391403f, -0.213777542114257810f, 0.047351621091365814f, 
+  -0.212389841675758360f, 0.046702146530151367f, -0.211000129580497740f, 
+  0.046056941151618958f, -0.209608450531959530f, 
+  0.045416008681058884f, -0.208214774727821350f, 0.044779352843761444f, 
+  -0.206819161772727970f, 0.044146984815597534f, -0.205421581864356990f, 
+  0.043518904596567154f, -0.204022079706192020f, 
+  0.042895123362541199f, -0.202620655298233030f, 0.042275641113519669f, 
+  -0.201217323541641240f, 0.041660469025373459f, -0.199812099337577820f, 
+  0.041049610823392868f, -0.198404997587203980f, 
+  0.040443073958158493f, -0.196996018290519710f, 0.039840862154960632f, 
+  -0.195585191249847410f, 0.039242979139089584f, -0.194172516465187070f, 
+  0.038649436086416245f, -0.192758023738861080f, 
+  0.038060232996940613f, -0.191341713070869450f, 0.037475381046533585f, 
+  -0.189923599362373350f, 0.036894880235195160f, -0.188503712415695190f, 
+  0.036318738013505936f, -0.187082037329673770f, 
+  0.035746958106756210f, -0.185658603906631470f, 0.035179551690816879f, 
+  -0.184233412146568300f, 0.034616518765687943f, -0.182806491851806640f, 
+  0.034057866781949997f, -0.181377857923507690f, 
+  0.033503599464893341f, -0.179947525262832640f, 0.032953724265098572f, 
+  -0.178515478968620300f, 0.032408244907855988f, -0.177081763744354250f, 
+  0.031867165118455887f, -0.175646379590034480f, 
+  0.031330492347478867f, -0.174209341406822200f, 0.030798232182860374f, 
+  -0.172770664095878600f, 0.030270388349890709f, -0.171330362558364870f, 
+  0.029746964573860168f, -0.169888436794281010f, 
+  0.029227968305349350f, -0.168444931507110600f, 0.028713401407003403f, 
+  -0.166999831795692440f, 0.028203271329402924f, -0.165553152561187740f, 
+  0.027697581797838211f, -0.164104923605918880f, 
+  0.027196336537599564f, -0.162655144929885860f, 0.026699542999267578f, 
+  -0.161203846335411070f, 0.026207204908132553f, -0.159751012921333310f, 
+  0.025719324126839638f, -0.158296689391136170f, 
+  0.025235909968614578f, -0.156840875744819640f, 0.024756962433457375f, 
+  -0.155383571982383730f, 0.024282488971948624f, -0.153924822807312010f, 
+  0.023812493309378624f, -0.152464613318443300f, 
+  0.023346979171037674f, -0.151002973318099980f, 0.022885952144861221f, 
+  -0.149539917707443240f, 0.022429415956139565f, -0.148075446486473080f, 
+  0.021977374330163002f, -0.146609574556350710f, 
+  0.021529832854866982f, -0.145142331719398500f, 0.021086793392896652f, 
+  -0.143673732876777650f, 0.020648263394832611f, -0.142203763127326970f, 
+  0.020214242860674858f, -0.140732467174530030f, 
+  0.019784741103649139f, -0.139259845018386840f, 0.019359756261110306f, 
+  -0.137785911560058590f, 0.018939297646284103f, -0.136310681700706480f, 
+  0.018523367121815681f, -0.134834155440330510f, 
+  0.018111966550350189f, -0.133356377482414250f, 0.017705103382468224f, 
+  -0.131877332925796510f, 0.017302779480814934f, -0.130397051572799680f, 
+  0.016904998570680618f, -0.128915548324584960f, 
+  0.016511764377355576f, -0.127432823181152340f, 0.016123080626130104f, 
+  -0.125948905944824220f, 0.015738952904939651f, -0.124463804066181180f, 
+  0.015359382145106792f, -0.122977524995803830f, 
+  0.014984373003244400f, -0.121490091085433960f, 0.014613929204642773f, 
+  -0.120001509785652160f, 0.014248054474592209f, -0.118511803448200230f, 
+  0.013886751607060432f, -0.117020979523658750f, 
+  0.013530024327337742f, -0.115529052913188930f, 0.013177875429391861f, 
+  -0.114036038517951970f, 0.012830308638513088f, -0.112541958689689640f, 
+  0.012487327679991722f, -0.111046813428401950f, 
+  0.012148935347795486f, -0.109550617635250090f, 0.011815134435892105f, 
+  -0.108053401112556460f, 0.011485928669571877f, -0.106555156409740450f, 
+  0.011161320842802525f, -0.105055920779705050f, 
+  0.010841314680874348f, -0.103555686771869660f, 0.010525912046432495f, 
+  -0.102054484188556670f, 0.010215117596089840f, -0.100552320480346680f, 
+  0.009908932261168957f, -0.099049203097820282f, 
+  0.009607359766960144f, -0.097545161843299866f, 0.009310402907431126f, 
+  -0.096040196716785431f, 0.009018065407872200f, -0.094534330070018768f, 
+  0.008730349130928516f, -0.093027576804161072f, 
+  0.008447255939245224f, -0.091519944369792938f, 0.008168790489435196f, 
+  -0.090011447668075562f, 0.007894953712821007f, -0.088502109050750732f, 
+  0.007625748869031668f, -0.086991935968399048f, 
+  0.007361178752034903f, -0.085480943322181702f, 0.007101245224475861f, 
+  -0.083969146013259888f, 0.006845951545983553f, -0.082456558942794800f, 
+  0.006595299113541842f, -0.080943197011947632f, 
+  0.006349290721118450f, -0.079429075121879578f, 0.006107929162681103f, 
+  -0.077914200723171234f, 0.005871216300874949f, -0.076398596167564392f, 
+  0.005639153998345137f, -0.074882268905639648f, 
+  0.005411745049059391f, -0.073365233838558197f, 0.005188991315662861f, 
+  -0.071847513318061829f, 0.004970894660800695f, -0.070329122245311737f, 
+  0.004757457878440619f, -0.068810060620307922f, 
+  0.004548682365566492f, -0.067290350794792175f, 0.004344569984823465f, 
+  -0.065770015120506287f, 0.004145123064517975f, -0.064249053597450256f, 
+  0.003950343467295170f, -0.062727488577365875f, 
+  0.003760232590138912f, -0.061205338686704636f, 0.003574792761355639f, 
+  -0.059682607650756836f, 0.003394025377929211f, -0.058159314095973969f, 
+  0.003217932302504778f, -0.056635476648807526f, 
+  0.003046514932066202f, -0.055111102759838104f, 0.002879775362089276f, 
+  -0.053586211055517197f, 0.002717714523896575f, -0.052060816437005997f, 
+  0.002560334512963891f, -0.050534930080175400f, 
+  0.002407636726275086f, -0.049008570611476898f, 0.002259622327983379f, 
+  -0.047481749206781387f, 0.002116292715072632f, -0.045954477041959763f, 
+  0.001977649517357349f, -0.044426776468753815f, 
+  0.001843693898990750f, -0.042898654937744141f, 0.001714427140541375f, 
+  -0.041370131075382233f, 0.001589850406162441f, -0.039841219782829285f, 
+  0.001469964860007167f, -0.038311932235956192f, 
+  0.001354771666228771f, -0.036782283335924149f, 0.001244271872565150f, 
+  -0.035252287983894348f, 0.001138466643169522f, -0.033721961081027985f, 
+  0.001037356909364462f, -0.032191313803195953f, 
+  0.000940943544264883f, -0.030660368502140045f, 0.000849227537401021f, 
+  -0.029129132628440857f, 0.000762209703680128f, -0.027597622945904732f, 
+  0.000679890916217119f, -0.026065852493047714f, 
+  0.000602271873503923f, -0.024533838033676147f, 0.000529353390447795f, 
+  -0.023001590743660927f, 0.000461136136436835f, -0.021469129249453545f, 
+  0.000397620693547651f, -0.019936462864279747f, 
+  0.000338807702064514f, -0.018403612077236176f, 0.000284697714960203f, 
+  -0.016870586201548576f, 0.000235291256103665f, -0.015337402001023293f, 
+  0.000190588747500442f, -0.013804072514176369f, 
+  0.000150590654811822f, -0.012270614504814148f, 0.000115297327283770f, 
+  -0.010737040080130100f, 0.000084709099610336f, -0.009203365072607994f, 
+  0.000058826273743762f, -0.007669602986425161f, 
+  0.000037649078876711f, -0.006135769188404083f, 0.000021177724192967f, 
+  -0.004601877182722092f, 0.000009412358849659f, -0.003067942336201668f, 
+  0.000002353095169383f, -0.001533978385850787f, 
+  0.000000000000000000f, -0.000000000000023345f, 0.000002353095169383f, 
+  0.001533978385850787f, 0.000009412358849659f, 0.003067942336201668f, 
+  0.000021177724192967f, 0.004601877182722092f, 
+  0.000037649078876711f, 0.006135769188404083f, 0.000058826273743762f, 
+  0.007669602986425161f, 0.000084709099610336f, 0.009203365072607994f, 
+  0.000115297327283770f, 0.010737040080130100f, 
+  0.000150590654811822f, 0.012270614504814148f, 0.000190588747500442f, 
+  0.013804072514176369f, 0.000235291256103665f, 0.015337402001023293f, 
+  0.000284697714960203f, 0.016870586201548576f, 
+  0.000338807702064514f, 0.018403612077236176f, 0.000397620693547651f, 
+  0.019936462864279747f, 0.000461136136436835f, 0.021469129249453545f, 
+  0.000529353390447795f, 0.023001590743660927f, 
+  0.000602271873503923f, 0.024533838033676147f, 0.000679890916217119f, 
+  0.026065852493047714f, 0.000762209703680128f, 0.027597622945904732f, 
+  0.000849227537401021f, 0.029129132628440857f, 
+  0.000940943544264883f, 0.030660368502140045f, 0.001037356909364462f, 
+  0.032191313803195953f, 0.001138466643169522f, 0.033721961081027985f, 
+  0.001244271872565150f, 0.035252287983894348f, 
+  0.001354771666228771f, 0.036782283335924149f, 0.001469964860007167f, 
+  0.038311932235956192f, 0.001589850406162441f, 0.039841219782829285f, 
+  0.001714427140541375f, 0.041370131075382233f, 
+  0.001843693898990750f, 0.042898654937744141f, 0.001977649517357349f, 
+  0.044426776468753815f, 0.002116292715072632f, 0.045954477041959763f, 
+  0.002259622327983379f, 0.047481749206781387f, 
+  0.002407636726275086f, 0.049008570611476898f, 0.002560334512963891f, 
+  0.050534930080175400f, 0.002717714523896575f, 0.052060816437005997f, 
+  0.002879775362089276f, 0.053586211055517197f, 
+  0.003046514932066202f, 0.055111102759838104f, 0.003217932302504778f, 
+  0.056635476648807526f, 0.003394025377929211f, 0.058159314095973969f, 
+  0.003574792761355639f, 0.059682607650756836f, 
+  0.003760232590138912f, 0.061205338686704636f, 0.003950343467295170f, 
+  0.062727488577365875f, 0.004145123064517975f, 0.064249053597450256f, 
+  0.004344569984823465f, 0.065770015120506287f, 
+  0.004548682365566492f, 0.067290350794792175f, 0.004757457878440619f, 
+  0.068810060620307922f, 0.004970894660800695f, 0.070329122245311737f, 
+  0.005188991315662861f, 0.071847513318061829f, 
+  0.005411745049059391f, 0.073365233838558197f, 0.005639153998345137f, 
+  0.074882268905639648f, 0.005871216300874949f, 0.076398596167564392f, 
+  0.006107929162681103f, 0.077914200723171234f, 
+  0.006349290721118450f, 0.079429075121879578f, 0.006595299113541842f, 
+  0.080943197011947632f, 0.006845951545983553f, 0.082456558942794800f, 
+  0.007101245224475861f, 0.083969146013259888f, 
+  0.007361178752034903f, 0.085480943322181702f, 0.007625748869031668f, 
+  0.086991935968399048f, 0.007894953712821007f, 0.088502109050750732f, 
+  0.008168790489435196f, 0.090011447668075562f, 
+  0.008447255939245224f, 0.091519944369792938f, 0.008730349130928516f, 
+  0.093027576804161072f, 0.009018065407872200f, 0.094534330070018768f, 
+  0.009310402907431126f, 0.096040196716785431f, 
+  0.009607359766960144f, 0.097545161843299866f, 0.009908932261168957f, 
+  0.099049203097820282f, 0.010215117596089840f, 0.100552320480346680f, 
+  0.010525912046432495f, 0.102054484188556670f, 
+  0.010841314680874348f, 0.103555686771869660f, 0.011161320842802525f, 
+  0.105055920779705050f, 0.011485928669571877f, 0.106555156409740450f, 
+  0.011815134435892105f, 0.108053401112556460f, 
+  0.012148935347795486f, 0.109550617635250090f, 0.012487327679991722f, 
+  0.111046813428401950f, 0.012830308638513088f, 0.112541958689689640f, 
+  0.013177875429391861f, 0.114036038517951970f, 
+  0.013530024327337742f, 0.115529052913188930f, 0.013886751607060432f, 
+  0.117020979523658750f, 0.014248054474592209f, 0.118511803448200230f, 
+  0.014613929204642773f, 0.120001509785652160f, 
+  0.014984373003244400f, 0.121490091085433960f, 0.015359382145106792f, 
+  0.122977524995803830f, 0.015738952904939651f, 0.124463804066181180f, 
+  0.016123080626130104f, 0.125948905944824220f, 
+  0.016511764377355576f, 0.127432823181152340f, 0.016904998570680618f, 
+  0.128915548324584960f, 0.017302779480814934f, 0.130397051572799680f, 
+  0.017705103382468224f, 0.131877332925796510f, 
+  0.018111966550350189f, 0.133356377482414250f, 0.018523367121815681f, 
+  0.134834155440330510f, 0.018939297646284103f, 0.136310681700706480f, 
+  0.019359756261110306f, 0.137785911560058590f, 
+  0.019784741103649139f, 0.139259845018386840f, 0.020214242860674858f, 
+  0.140732467174530030f, 0.020648263394832611f, 0.142203763127326970f, 
+  0.021086793392896652f, 0.143673732876777650f, 
+  0.021529832854866982f, 0.145142331719398500f, 0.021977374330163002f, 
+  0.146609574556350710f, 0.022429415956139565f, 0.148075446486473080f, 
+  0.022885952144861221f, 0.149539917707443240f, 
+  0.023346979171037674f, 0.151002973318099980f, 0.023812493309378624f, 
+  0.152464613318443300f, 0.024282488971948624f, 0.153924822807312010f, 
+  0.024756962433457375f, 0.155383571982383730f, 
+  0.025235909968614578f, 0.156840875744819640f, 0.025719324126839638f, 
+  0.158296689391136170f, 0.026207204908132553f, 0.159751012921333310f, 
+  0.026699542999267578f, 0.161203846335411070f, 
+  0.027196336537599564f, 0.162655144929885860f, 0.027697581797838211f, 
+  0.164104923605918880f, 0.028203271329402924f, 0.165553152561187740f, 
+  0.028713401407003403f, 0.166999831795692440f, 
+  0.029227968305349350f, 0.168444931507110600f, 0.029746964573860168f, 
+  0.169888436794281010f, 0.030270388349890709f, 0.171330362558364870f, 
+  0.030798232182860374f, 0.172770664095878600f, 
+  0.031330492347478867f, 0.174209341406822200f, 0.031867165118455887f, 
+  0.175646379590034480f, 0.032408244907855988f, 0.177081763744354250f, 
+  0.032953724265098572f, 0.178515478968620300f, 
+  0.033503599464893341f, 0.179947525262832640f, 0.034057866781949997f, 
+  0.181377857923507690f, 0.034616518765687943f, 0.182806491851806640f, 
+  0.035179551690816879f, 0.184233412146568300f, 
+  0.035746958106756210f, 0.185658603906631470f, 0.036318738013505936f, 
+  0.187082037329673770f, 0.036894880235195160f, 0.188503712415695190f, 
+  0.037475381046533585f, 0.189923599362373350f, 
+  0.038060232996940613f, 0.191341713070869450f, 0.038649436086416245f, 
+  0.192758023738861080f, 0.039242979139089584f, 0.194172516465187070f, 
+  0.039840862154960632f, 0.195585191249847410f, 
+  0.040443073958158493f, 0.196996018290519710f, 0.041049610823392868f, 
+  0.198404997587203980f, 0.041660469025373459f, 0.199812099337577820f, 
+  0.042275641113519669f, 0.201217323541641240f, 
+  0.042895123362541199f, 0.202620655298233030f, 0.043518904596567154f, 
+  0.204022079706192020f, 0.044146984815597534f, 0.205421581864356990f, 
+  0.044779352843761444f, 0.206819161772727970f, 
+  0.045416008681058884f, 0.208214774727821350f, 0.046056941151618958f, 
+  0.209608450531959530f, 0.046702146530151367f, 0.211000129580497740f, 
+  0.047351621091365814f, 0.212389841675758360f, 
+  0.048005353659391403f, 0.213777542114257810f, 0.048663340508937836f, 
+  0.215163245797157290f, 0.049325577914714813f, 0.216546908020973210f, 
+  0.049992054700851440f, 0.217928543686866760f, 
+  0.050662767142057419f, 0.219308122992515560f, 0.051337707787752151f, 
+  0.220685631036758420f, 0.052016876637935638f, 0.222061067819595340f, 
+  0.052700258791446686f, 0.223434418439865110f, 
+  0.053387850522994995f, 0.224805667996406560f, 0.054079644381999969f, 
+  0.226174786686897280f, 0.054775636643171310f, 0.227541789412498470f, 
+  0.055475823581218719f, 0.228906646370887760f, 
+  0.056180190294981003f, 0.230269357562065120f, 0.056888736784458160f, 
+  0.231629893183708190f, 0.057601451873779297f, 0.232988253235816960f, 
+  0.058318331837654114f, 0.234344407916069030f, 
+  0.059039369225502014f, 0.235698372125625610f, 0.059764556586742401f, 
+  0.237050101161003110f, 0.060493886470794678f, 0.238399609923362730f, 
+  0.061227355152368546f, 0.239746883511543270f, 
+  0.061964951455593109f, 0.241091892123222350f, 0.062706671655178070f, 
+  0.242434620857238770f, 0.063452512025833130f, 0.243775084614753720f, 
+  0.064202457666397095f, 0.245113238692283630f, 
+  0.064956501126289368f, 0.246449097990989690f, 0.065714649856090546f, 
+  0.247782632708549500f, 0.066476874053478241f, 0.249113827943801880f, 
+  0.067243188619613647f, 0.250442683696746830f, 
+  0.068013571202754974f, 0.251769185066223140f, 0.068788021802902222f, 
+  0.253093332052230830f, 0.069566532969474792f, 0.254415065050125120f, 
+  0.070349089801311493f, 0.255734413862228390f, 
+  0.071135692298412323f, 0.257051378488540650f, 0.071926333010196686f, 
+  0.258365899324417110f, 0.072721004486083984f, 0.259678006172180180f, 
+  0.073519699275493622f, 0.260987639427185060f, 
+  0.074322402477264404f, 0.262294828891754150f, 0.075129114091396332f, 
+  0.263599574565887450f, 0.075939826667308807f, 0.264901816844940190f, 
+  0.076754532754421234f, 0.266201555728912350f, 
+  0.077573217451572418f, 0.267498821020126340f, 0.078395880758762360f, 
+  0.268793523311614990f, 0.079222507774829865f, 0.270085722208023070f, 
+  0.080053105950355530f, 0.271375387907028200f, 
+  0.080887645483016968f, 0.272662490606307980f, 0.081726133823394775f, 
+  0.273947030305862430f, 0.082568563520908356f, 0.275228977203369140f, 
+  0.083414919674396515f, 0.276508361101150510f, 
+  0.084265194833278656f, 0.277785122394561770f, 0.085119381546974182f, 
+  0.279059261083602910f, 0.085977479815483093f, 0.280330777168273930f, 
+  0.086839467287063599f, 0.281599670648574830f, 
+  0.087705351412296295f, 0.282865911722183230f, 0.088575109839439392f, 
+  0.284129470586776730f, 0.089448742568492889f, 0.285390377044677730f, 
+  0.090326242148876190f, 0.286648571491241460f, 
+  0.091207593679428101f, 0.287904083728790280f, 0.092092797160148621f, 
+  0.289156883955001830f, 0.092981837689876556f, 0.290406972169876100f, 
+  0.093874707818031311f, 0.291654318571090700f, 
+  0.094771400094032288f, 0.292898923158645630f, 0.095671907067298889f, 
+  0.294140785932540890f, 0.096576221287250519f, 0.295379847288131710f, 
+  0.097484335303306580f, 0.296616137027740480f, 
+  0.098396234214305878f, 0.297849655151367190f, 0.099311910569667816f, 
+  0.299080342054367070f, 0.100231364369392400f, 0.300308227539062500f, 
+  0.101154580712318420f, 0.301533311605453490f, 
+  0.102081544697284700f, 0.302755534648895260f, 0.103012263774871830f, 
+  0.303974896669387820f, 0.103946708142757420f, 0.305191397666931150f, 
+  0.104884892702102660f, 0.306405037641525270f, 
+  0.105826787650585170f, 0.307615786790847780f, 0.106772392988204960f, 
+  0.308823645114898680f, 0.107721701264381410f, 0.310028612613677980f, 
+  0.108674705028533940f, 0.311230629682540890f, 
+  0.109631389379501340f, 0.312429755926132200f, 0.110591746866703030f, 
+  0.313625901937484740f, 0.111555770039558410f, 0.314819127321243290f, 
+  0.112523443996906280f, 0.316009372472763060f, 
+  0.113494776189327240f, 0.317196637392044070f, 0.114469736814498900f, 
+  0.318380922079086300f, 0.115448333323001860f, 0.319562226533889770f, 
+  0.116430543363094330f, 0.320740520954132080f, 
+  0.117416366934776310f, 0.321915775537490840f, 0.118405789136886600f, 
+  0.323088020086288450f, 0.119398809969425200f, 0.324257194995880130f, 
+  0.120395407080650330f, 0.325423330068588260f, 
+  0.121395580470561980f, 0.326586425304412840f, 0.122399315237998960f, 
+  0.327746421098709110f, 0.123406603932380680f, 0.328903347253799440f, 
+  0.124417431652545930f, 0.330057173967361450f, 
+  0.125431805849075320f, 0.331207901239395140f, 0.126449704170227050f, 
+  0.332355499267578130f, 0.127471104264259340f, 0.333499968051910400f, 
+  0.128496021032333370f, 0.334641307592391970f, 
+  0.129524439573287960f, 0.335779488086700440f, 0.130556344985961910f, 
+  0.336914509534835820f, 0.131591722369194030f, 0.338046342134475710f, 
+  0.132630556821823120f, 0.339175015687942500f, 
+  0.133672863245010380f, 0.340300500392913820f, 0.134718611836433410f, 
+  0.341422766447067260f, 0.135767802596092220f, 0.342541843652725220f, 
+  0.136820420622825620f, 0.343657672405242920f, 
+  0.137876465916633610f, 0.344770282506942750f, 0.138935908675193790f, 
+  0.345879614353179930f, 0.139998748898506160f, 0.346985727548599240f, 
+  0.141064971685409550f, 0.348088562488555910f, 
+  0.142134591937065120f, 0.349188119173049930f, 0.143207564949989320f, 
+  0.350284397602081300f, 0.144283905625343320f, 0.351377367973327640f, 
+  0.145363584160804750f, 0.352467030286788940f, 
+  0.146446615457534790f, 0.353553384542465210f, 0.147532954812049870f, 
+  0.354636400938034060f, 0.148622632026672360f, 0.355716109275817870f, 
+  0.149715602397918700f, 0.356792420148849490f, 
+  0.150811880826950070f, 0.357865422964096070f, 0.151911437511444090f, 
+  0.358935028314590450f, 0.153014272451400760f, 0.360001266002655030f, 
+  0.154120370745658870f, 0.361064106225967410f, 
+  0.155229732394218440f, 0.362123548984527590f, 0.156342327594757080f, 
+  0.363179564476013180f, 0.157458171248435970f, 0.364232182502746580f, 
+  0.158577233552932740f, 0.365281373262405400f, 
+  0.159699499607086180f, 0.366327136754989620f, 0.160824984312057500f, 
+  0.367369443178176880f, 0.161953642964363100f, 0.368408292531967160f, 
+  0.163085505366325380f, 0.369443655014038090f, 
+  0.164220526814460750f, 0.370475560426712040f, 0.165358707308769230f, 
+  0.371503978967666630f, 0.166500031948089600f, 0.372528880834579470f, 
+  0.167644515633583070f, 0.373550295829772950f, 
+  0.168792113661766050f, 0.374568194150924680f, 0.169942826032638550f, 
+  0.375582575798034670f, 0.171096652746200560f, 0.376593410968780520f, 
+  0.172253578901290890f, 0.377600699663162230f, 
+  0.173413574695587160f, 0.378604412078857420f, 0.174576655030250550f, 
+  0.379604607820510860f, 0.175742805004119870f, 0.380601197481155400f, 
+  0.176911994814872740f, 0.381594210863113400f, 
+  0.178084224462509160f, 0.382583618164062500f, 0.179259493947029110f, 
+  0.383569449186325070f, 0.180437773466110230f, 0.384551674127578740f, 
+  0.181619063019752500f, 0.385530263185501100f, 
+  0.182803362607955930f, 0.386505216360092160f, 0.183990627527236940f, 
+  0.387476563453674320f, 0.185180887579917910f, 0.388444244861602780f, 
+  0.186374098062515260f, 0.389408260583877560f, 
+  0.187570258975028990f, 0.390368610620498660f, 0.188769355416297910f, 
+  0.391325294971466060f, 0.189971387386322020f, 0.392278283834457400f, 
+  0.191176339983940120f, 0.393227607011795040f, 
+  0.192384198307991030f, 0.394173204898834230f, 0.193594962358474730f, 
+  0.395115107297897340f, 0.194808602333068850f, 0.396053284406661990f, 
+  0.196025103330612180f, 0.396987736225128170f, 
+  0.197244480252265930f, 0.397918462753295900f, 0.198466703295707700f, 
+  0.398845434188842770f, 0.199691757559776310f, 0.399768620729446410f, 
+  0.200919643044471740f, 0.400688081979751590f, 
+  0.202150344848632810f, 0.401603758335113530f, 0.203383848071098330f, 
+  0.402515679597854610f, 0.204620152711868290f, 0.403423786163330080f, 
+  0.205859228968620300f, 0.404328078031539920f, 
+  0.207101076841354370f, 0.405228585004806520f, 0.208345666527748110f, 
+  0.406125307083129880f, 0.209593027830123900f, 0.407018154859542850f, 
+  0.210843101143836980f, 0.407907217741012570f, 
+  0.212095901370048520f, 0.408792406320571900f, 0.213351413607597350f, 
+  0.409673750400543210f, 0.214609622955322270f, 0.410551249980926510f, 
+  0.215870529413223270f, 0.411424905061721800f, 
+  0.217134088277816770f, 0.412294656038284300f, 0.218400329351425170f, 
+  0.413160532712936400f, 0.219669207930564880f, 0.414022535085678100f, 
+  0.220940738916397090f, 0.414880603551864620f, 
+  0.222214877605438230f, 0.415734797716140750f, 0.223491653800010680f, 
+  0.416585087776184080f, 0.224771007895469670f, 0.417431443929672240f, 
+  0.226052969694137570f, 0.418273866176605220f, 
+  0.227337509393692020f, 0.419112354516983030f, 0.228624612092971800f, 
+  0.419946908950805660f, 0.229914262890815730f, 0.420777499675750730f, 
+  0.231206461787223820f, 0.421604126691818240f, 
+  0.232501193881034850f, 0.422426789999008180f, 0.233798429369926450f, 
+  0.423245459794998170f, 0.235098183155059810f, 0.424060165882110600f, 
+  0.236400425434112550f, 0.424870878458023070f, 
+  0.237705156207084660f, 0.425677597522735600f, 0.239012360572814940f, 
+  0.426480293273925780f, 0.240322008728981020f, 0.427278995513916020f, 
+  0.241634100675582890f, 0.428073674440383910f, 
+  0.242948621511459350f, 0.428864300251007080f, 0.244265571236610410f, 
+  0.429650902748107910f, 0.245584934949874880f, 0.430433481931686400f, 
+  0.246906682848930360f, 0.431211978197097780f, 
+  0.248230814933776860f, 0.431986421346664430f, 0.249557301402091980f, 
+  0.432756811380386350f, 0.250886172056198120f, 0.433523118495941160f, 
+  0.252217382192611690f, 0.434285342693328860f, 
+  0.253550916910171510f, 0.435043483972549440f, 0.254886746406555180f, 
+  0.435797542333602910f, 0.256224930286407470f, 0.436547487974166870f, 
+  0.257565379142761230f, 0.437293320894241330f, 
+  0.258908122777938840f, 0.438035041093826290f, 0.260253131389617920f, 
+  0.438772648572921750f, 0.261600375175476070f, 0.439506113529205320f, 
+  0.262949883937835690f, 0.440235435962677000f, 
+  0.264301627874374390f, 0.440960645675659180f, 0.265655577182769780f, 
+  0.441681683063507080f, 0.267011761665344240f, 0.442398548126220700f, 
+  0.268370121717453000f, 0.443111270666122440f, 
+  0.269730657339096070f, 0.443819820880889890f, 0.271093338727951050f, 
+  0.444524168968200680f, 0.272458195686340330f, 0.445224374532699580f, 
+  0.273825198411941530f, 0.445920348167419430f, 
+  0.275194346904754640f, 0.446612149477005000f, 0.276565581560134890f, 
+  0.447299748659133910f, 0.277938932180404660f, 0.447983115911483760f, 
+  0.279314368963241580f, 0.448662281036376950f, 
+  0.280691891908645630f, 0.449337244033813480f, 0.282071471214294430f, 
+  0.450007945299148560f, 0.283453077077865600f, 0.450674414634704590f, 
+  0.284836769104003910f, 0.451336652040481570f, 
+  0.286222457885742190f, 0.451994657516479490f, 0.287610173225402830f, 
+  0.452648371458053590f, 0.288999855518341060f, 0.453297853469848630f, 
+  0.290391564369201660f, 0.453943043947219850f, 
+  0.291785210371017460f, 0.454584002494812010f, 0.293180853128433230f, 
+  0.455220639705657960f, 0.294578403234481810f, 0.455853015184402470f, 
+  0.295977920293807980f, 0.456481099128723140f, 
+  0.297379344701766970f, 0.457104891538620000f, 0.298782676458358760f, 
+  0.457724362611770630f, 0.300187885761260990f, 0.458339542150497440f, 
+  0.301595002412796020f, 0.458950400352478030f, 
+  0.303003966808319090f, 0.459556937217712400f, 0.304414808750152590f, 
+  0.460159152746200560f, 0.305827468633651730f, 0.460757017135620120f, 
+  0.307241976261138920f, 0.461350560188293460f, 
+  0.308658272027969360f, 0.461939752101898190f, 0.310076385736465450f, 
+  0.462524622678756710f, 0.311496287584304810f, 0.463105112314224240f, 
+  0.312917977571487430f, 0.463681250810623170f, 
+  0.314341396093368530f, 0.464253038167953490f, 0.315766572952270510f, 
+  0.464820444583892820f, 0.317193508148193360f, 0.465383470058441160f, 
+  0.318622142076492310f, 0.465942144393920900f, 
+  0.320052474737167360f, 0.466496407985687260f, 0.321484506130218510f, 
+  0.467046260833740230f, 0.322918236255645750f, 0.467591762542724610f, 
+  0.324353635311126710f, 0.468132823705673220f, 
+  0.325790673494338990f, 0.468669503927230830f, 0.327229350805282590f, 
+  0.469201773405075070f, 0.328669637441635130f, 0.469729602336883540f, 
+  0.330111563205718990f, 0.470253020524978640f, 
+  0.331555068492889400f, 0.470772027969360350f, 0.333000183105468750f, 
+  0.471286594867706300f, 0.334446847438812260f, 0.471796721220016480f, 
+  0.335895091295242310f, 0.472302407026290890f, 
+  0.337344855070114140f, 0.472803652286529540f, 0.338796168565750120f, 
+  0.473300457000732420f, 0.340248972177505490f, 0.473792791366577150f, 
+  0.341703325510025020f, 0.474280685186386110f, 
+  0.343159139156341550f, 0.474764078855514530f, 0.344616413116455080f, 
+  0.475243031978607180f, 0.346075177192687990f, 0.475717514753341670f, 
+  0.347535371780395510f, 0.476187497377395630f, 
+  0.348997026681900020f, 0.476653009653091430f, 0.350460082292556760f, 
+  0.477114051580429080f, 0.351924568414688110f, 0.477570593357086180f, 
+  0.353390425443649290f, 0.478022634983062740f, 
+  0.354857653379440310f, 0.478470176458358760f, 0.356326282024383540f, 
+  0.478913217782974240f, 0.357796221971511840f, 0.479351729154586790f, 
+  0.359267532825469970f, 0.479785770177841190f, 
+  0.360740154981613160f, 0.480215251445770260f, 0.362214088439941410f, 
+  0.480640232563018800f, 0.363689333200454710f, 0.481060713529586790f, 
+  0.365165829658508300f, 0.481476634740829470f, 
+  0.366643607616424560f, 0.481888025999069210f, 0.368122667074203490f, 
+  0.482294887304306030f, 0.369602948427200320f, 0.482697218656539920f, 
+  0.371084451675415040f, 0.483094990253448490f, 
+  0.372567176818847660f, 0.483488231897354130f, 0.374051094055175780f, 
+  0.483876913785934450f, 0.375536203384399410f, 0.484261035919189450f, 
+  0.377022475004196170f, 0.484640628099441530f, 
+  0.378509908914566040f, 0.485015630722045900f, 0.379998475313186650f, 
+  0.485386073589324950f, 0.381488204002380370f, 0.485751956701278690f, 
+  0.382979035377502440f, 0.486113250255584720f, 
+  0.384470939636230470f, 0.486469984054565430f, 0.385963946580886840f, 
+  0.486822128295898440f, 0.387458056211471560f, 0.487169682979583740f, 
+  0.388953179121017460f, 0.487512677907943730f, 
+  0.390449374914169310f, 0.487851053476333620f, 0.391946613788604740f, 
+  0.488184869289398190f, 0.393444836139678960f, 0.488514065742492680f, 
+  0.394944071769714360f, 0.488838672637939450f, 
+  0.396444320678710940f, 0.489158689975738530f, 0.397945523262023930f, 
+  0.489474087953567500f, 0.399447679519653320f, 0.489784896373748780f, 
+  0.400950789451599120f, 0.490091055631637570f, 
+  0.402454853057861330f, 0.490392625331878660f, 0.403959810733795170f, 
+  0.490689605474472050f, 0.405465662479400630f, 0.490981936454772950f, 
+  0.406972438097000120f, 0.491269648075103760f, 
+  0.408480048179626460f, 0.491552740335464480f, 0.409988552331924440f, 
+  0.491831213235855100f, 0.411497890949249270f, 0.492105036973953250f, 
+  0.413008064031600950f, 0.492374241352081300f, 
+  0.414519041776657100f, 0.492638826370239260f, 0.416030853986740110f, 
+  0.492898762226104740f, 0.417543441057205200f, 0.493154048919677730f, 
+  0.419056802988052370f, 0.493404686450958250f, 
+  0.420570939779281620f, 0.493650704622268680f, 0.422085791826248170f, 
+  0.493892073631286620f, 0.423601418733596800f, 0.494128793478012080f, 
+  0.425117731094360350f, 0.494360834360122680f, 
+  0.426634758710861210f, 0.494588255882263180f, 0.428152471780776980f, 
+  0.494810998439788820f, 0.429670870304107670f, 0.495029091835021970f, 
+  0.431189924478530880f, 0.495242536067962650f, 
+  0.432709634304046630f, 0.495451331138610840f, 0.434229999780654910f, 
+  0.495655417442321780f, 0.435750931501388550f, 0.495854884386062620f, 
+  0.437272518873214720f, 0.496049642562866210f, 
+  0.438794672489166260f, 0.496239781379699710f, 0.440317392349243160f, 
+  0.496425211429595950f, 0.441840678453445430f, 0.496605962514877320f, 
+  0.443364530801773070f, 0.496782064437866210f, 
+  0.444888889789581300f, 0.496953487396240230f, 0.446413785219192500f, 
+  0.497120231389999390f, 0.447939187288284300f, 0.497282296419143680f, 
+  0.449465066194534300f, 0.497439652681350710f, 
+  0.450991421937942500f, 0.497592359781265260f, 0.452518254518508910f, 
+  0.497740387916564940f, 0.454045534133911130f, 0.497883707284927370f, 
+  0.455573230981826780f, 0.498022347688674930f, 
+  0.457101345062255860f, 0.498156309127807620f, 0.458629876375198360f, 
+  0.498285561800003050f, 0.460158795118331910f, 0.498410135507583620f, 
+  0.461688071489334110f, 0.498530030250549320f, 
+  0.463217705488204960f, 0.498645216226577760f, 0.464747726917266850f, 
+  0.498755723237991330f, 0.466278046369552610f, 0.498861521482467650f, 
+  0.467808693647384640f, 0.498962640762329100f, 
+  0.469339638948440550f, 0.499059051275253300f, 0.470870882272720340f, 
+  0.499150782823562620f, 0.472402364015579220f, 0.499237775802612300f, 
+  0.473934143781661990f, 0.499320119619369510f, 
+  0.475466161966323850f, 0.499397724866867070f, 0.476998418569564820f, 
+  0.499470651149749760f, 0.478530883789062500f, 0.499538868665695190f, 
+  0.480063527822494510f, 0.499602377414703370f, 
+  0.481596380472183230f, 0.499661177396774290f, 0.483129411935806270f, 
+  0.499715298414230350f, 0.484662592411041260f, 0.499764710664749150f, 
+  0.486195921897888180f, 0.499809414148330690f, 
+  0.487729400396347050f, 0.499849408864974980f, 0.489262968301773070f, 
+  0.499884694814682010f, 0.490796625614166260f, 0.499915301799774170f, 
+  0.492330402135849000f, 0.499941170215606690f, 
+  0.493864238262176510f, 0.499962359666824340f, 0.495398133993148800f, 
+  0.499978810548782350f, 0.496932059526443480f, 0.499990582466125490f, 
+  0.498466014862060550f, 0.499997645616531370f 
+}; 
+ 
+ 
+/**  
+* \par 
+* Generation of realCoefB array:  
+* \par 
+* n = 1024  
+* <pre>for (i = 0; i < n; i++)  
+* {  
+*    pBTable[2 * i] = 0.5 * (1.0 + sin (2 * PI / (double) (2 * n) * (double) i));  
+*    pBTable[2 * i + 1] = 0.5 * (1.0 * cos (2 * PI / (double) (2 * n) * (double) i));  
+*  } </pre>  
+*  
+*/ 
+static const float32_t realCoefB[2048] = { 
+  0.500000000000000000f, 0.500000000000000000f, 0.501533985137939450f, 
+  0.499997645616531370f, 0.503067970275878910f, 0.499990582466125490f, 
+  0.504601895809173580f, 0.499978810548782350f, 
+  0.506135761737823490f, 0.499962359666824340f, 0.507669627666473390f, 
+  0.499941170215606690f, 0.509203374385833740f, 0.499915301799774170f, 
+  0.510737061500549320f, 0.499884694814682010f, 
+  0.512270629405975340f, 0.499849408864974980f, 0.513804078102111820f, 
+  0.499809414148330690f, 0.515337407588958740f, 0.499764710664749150f, 
+  0.516870558261871340f, 0.499715298414230350f, 
+  0.518403589725494380f, 0.499661177396774290f, 0.519936442375183110f, 
+  0.499602377414703370f, 0.521469116210937500f, 0.499538868665695190f, 
+  0.523001611232757570f, 0.499470651149749760f, 
+  0.524533808231353760f, 0.499397724866867070f, 0.526065826416015630f, 
+  0.499320119619369510f, 0.527597606182098390f, 0.499237775802612300f, 
+  0.529129147529602050f, 0.499150782823562620f, 
+  0.530660390853881840f, 0.499059051275253300f, 0.532191336154937740f, 
+  0.498962640762329100f, 0.533721983432769780f, 0.498861521482467650f, 
+  0.535252273082733150f, 0.498755723237991330f, 
+  0.536782264709472660f, 0.498645216226577760f, 0.538311958312988280f, 
+  0.498530030250549320f, 0.539841234683990480f, 0.498410135507583620f, 
+  0.541370153427124020f, 0.498285561800003050f, 
+  0.542898654937744140f, 0.498156309127807620f, 0.544426798820495610f, 
+  0.498022347688674930f, 0.545954465866088870f, 0.497883707284927370f, 
+  0.547481775283813480f, 0.497740387916564940f, 
+  0.549008548259735110f, 0.497592359781265260f, 0.550534904003143310f, 
+  0.497439652681350710f, 0.552060842514038090f, 0.497282296419143680f, 
+  0.553586184978485110f, 0.497120231389999390f, 
+  0.555111110210418700f, 0.496953487396240230f, 0.556635499000549320f, 
+  0.496782064437866210f, 0.558159291744232180f, 0.496605962514877320f, 
+  0.559682607650756840f, 0.496425211429595950f, 
+  0.561205327510833740f, 0.496239781379699710f, 0.562727510929107670f, 
+  0.496049642562866210f, 0.564249038696289060f, 0.495854884386062620f, 
+  0.565770030021667480f, 0.495655417442321780f, 
+  0.567290365695953370f, 0.495451331138610840f, 0.568810045719146730f, 
+  0.495242536067962650f, 0.570329129695892330f, 0.495029091835021970f, 
+  0.571847498416900630f, 0.494810998439788820f, 
+  0.573365211486816410f, 0.494588255882263180f, 0.574882268905639650f, 
+  0.494360834360122680f, 0.576398611068725590f, 0.494128793478012080f, 
+  0.577914178371429440f, 0.493892073631286620f, 
+  0.579429090023040770f, 0.493650704622268680f, 0.580943167209625240f, 
+  0.493404686450958250f, 0.582456588745117190f, 0.493154048919677730f, 
+  0.583969175815582280f, 0.492898762226104740f, 
+  0.585480928421020510f, 0.492638826370239260f, 0.586991965770721440f, 
+  0.492374241352081300f, 0.588502109050750730f, 0.492105036973953250f, 
+  0.590011477470397950f, 0.491831213235855100f, 
+  0.591519951820373540f, 0.491552740335464480f, 0.593027591705322270f, 
+  0.491269648075103760f, 0.594534337520599370f, 0.490981936454772950f, 
+  0.596040189266204830f, 0.490689605474472050f, 
+  0.597545146942138670f, 0.490392625331878660f, 0.599049210548400880f, 
+  0.490091055631637570f, 0.600552320480346680f, 0.489784896373748780f, 
+  0.602054476737976070f, 0.489474087953567500f, 
+  0.603555679321289060f, 0.489158689975738530f, 0.605055928230285640f, 
+  0.488838672637939450f, 0.606555163860321040f, 0.488514065742492680f, 
+  0.608053386211395260f, 0.488184869289398190f, 
+  0.609550595283508300f, 0.487851053476333620f, 0.611046791076660160f, 
+  0.487512677907943730f, 0.612541973590850830f, 0.487169682979583740f, 
+  0.614036023616790770f, 0.486822128295898440f, 
+  0.615529060363769530f, 0.486469984054565430f, 0.617020964622497560f, 
+  0.486113250255584720f, 0.618511795997619630f, 0.485751956701278690f, 
+  0.620001494884490970f, 0.485386073589324950f, 
+  0.621490061283111570f, 0.485015630722045900f, 0.622977554798126220f, 
+  0.484640628099441530f, 0.624463796615600590f, 0.484261035919189450f, 
+  0.625948905944824220f, 0.483876913785934450f, 
+  0.627432823181152340f, 0.483488231897354130f, 0.628915548324584960f, 
+  0.483094990253448490f, 0.630397081375122070f, 0.482697218656539920f, 
+  0.631877362728118900f, 0.482294887304306030f, 
+  0.633356392383575440f, 0.481888025999069210f, 0.634834170341491700f, 
+  0.481476634740829470f, 0.636310696601867680f, 0.481060713529586790f, 
+  0.637785911560058590f, 0.480640232563018800f, 
+  0.639259815216064450f, 0.480215251445770260f, 0.640732467174530030f, 
+  0.479785770177841190f, 0.642203748226165770f, 0.479351729154586790f, 
+  0.643673717975616460f, 0.478913217782974240f, 
+  0.645142316818237300f, 0.478470176458358760f, 0.646609604358673100f, 
+  0.478022634983062740f, 0.648075461387634280f, 0.477570593357086180f, 
+  0.649539887905120850f, 0.477114051580429080f, 
+  0.651003003120422360f, 0.476653009653091430f, 0.652464628219604490f, 
+  0.476187497377395630f, 0.653924822807312010f, 0.475717514753341670f, 
+  0.655383586883544920f, 0.475243031978607180f, 
+  0.656840860843658450f, 0.474764078855514530f, 0.658296704292297360f, 
+  0.474280685186386110f, 0.659750998020172120f, 0.473792791366577150f, 
+  0.661203861236572270f, 0.473300457000732420f, 
+  0.662655174732208250f, 0.472803652286529540f, 0.664104938507080080f, 
+  0.472302407026290890f, 0.665553152561187740f, 0.471796721220016480f, 
+  0.666999816894531250f, 0.471286594867706300f, 
+  0.668444931507110600f, 0.470772027969360350f, 0.669888436794281010f, 
+  0.470253020524978640f, 0.671330332756042480f, 0.469729602336883540f, 
+  0.672770678997039790f, 0.469201773405075070f, 
+  0.674209356307983400f, 0.468669503927230830f, 0.675646364688873290f, 
+  0.468132823705673220f, 0.677081763744354250f, 0.467591762542724610f, 
+  0.678515493869781490f, 0.467046260833740230f, 
+  0.679947495460510250f, 0.466496407985687260f, 0.681377887725830080f, 
+  0.465942144393920900f, 0.682806491851806640f, 0.465383470058441160f, 
+  0.684233427047729490f, 0.464820444583892820f, 
+  0.685658574104309080f, 0.464253038167953490f, 0.687082052230834960f, 
+  0.463681250810623170f, 0.688503682613372800f, 0.463105112314224240f, 
+  0.689923584461212160f, 0.462524622678756710f, 
+  0.691341698169708250f, 0.461939752101898190f, 0.692758023738861080f, 
+  0.461350560188293460f, 0.694172501564025880f, 0.460757017135620120f, 
+  0.695585191249847410f, 0.460159152746200560f, 
+  0.696996033191680910f, 0.459556937217712400f, 0.698404967784881590f, 
+  0.458950400352478030f, 0.699812114238739010f, 0.458339542150497440f, 
+  0.701217353343963620f, 0.457724362611770630f, 
+  0.702620685100555420f, 0.457104891538620000f, 0.704022109508514400f, 
+  0.456481099128723140f, 0.705421566963195800f, 0.455853015184402470f, 
+  0.706819176673889160f, 0.455220639705657960f, 
+  0.708214759826660160f, 0.454584002494812010f, 0.709608435630798340f, 
+  0.453943043947219850f, 0.711000144481658940f, 0.453297853469848630f, 
+  0.712389826774597170f, 0.452648371458053590f, 
+  0.713777542114257810f, 0.451994657516479490f, 0.715163230895996090f, 
+  0.451336652040481570f, 0.716546893119812010f, 0.450674414634704590f, 
+  0.717928528785705570f, 0.450007945299148560f, 
+  0.719308137893676760f, 0.449337244033813480f, 0.720685660839080810f, 
+  0.448662281036376950f, 0.722061097621917720f, 0.447983115911483760f, 
+  0.723434448242187500f, 0.447299748659133910f, 
+  0.724805653095245360f, 0.446612149477005000f, 0.726174771785736080f, 
+  0.445920348167419430f, 0.727541804313659670f, 0.445224374532699580f, 
+  0.728906631469726560f, 0.444524168968200680f, 
+  0.730269372463226320f, 0.443819820880889890f, 0.731629908084869380f, 
+  0.443111270666122440f, 0.732988238334655760f, 0.442398548126220700f, 
+  0.734344422817230220f, 0.441681683063507080f, 
+  0.735698342323303220f, 0.440960645675659180f, 0.737050116062164310f, 
+  0.440235435962677000f, 0.738399624824523930f, 0.439506113529205320f, 
+  0.739746868610382080f, 0.438772648572921750f, 
+  0.741091907024383540f, 0.438035041093826290f, 0.742434620857238770f, 
+  0.437293320894241330f, 0.743775069713592530f, 0.436547487974166870f, 
+  0.745113253593444820f, 0.435797542333602910f, 
+  0.746449112892150880f, 0.435043483972549440f, 0.747782647609710690f, 
+  0.434285342693328860f, 0.749113857746124270f, 0.433523118495941160f, 
+  0.750442683696746830f, 0.432756811380386350f, 
+  0.751769185066223140f, 0.431986421346664430f, 0.753093302249908450f, 
+  0.431211978197097780f, 0.754415094852447510f, 0.430433481931686400f, 
+  0.755734443664550780f, 0.429650902748107910f, 
+  0.757051348686218260f, 0.428864300251007080f, 0.758365929126739500f, 
+  0.428073674440383910f, 0.759678006172180180f, 0.427278995513916020f, 
+  0.760987639427185060f, 0.426480293273925780f, 
+  0.762294828891754150f, 0.425677597522735600f, 0.763599574565887450f, 
+  0.424870878458023070f, 0.764901816844940190f, 0.424060165882110600f, 
+  0.766201555728912350f, 0.423245459794998170f, 
+  0.767498791217803960f, 0.422426789999008180f, 0.768793523311614990f, 
+  0.421604126691818240f, 0.770085752010345460f, 0.420777499675750730f, 
+  0.771375417709350590f, 0.419946908950805660f, 
+  0.772662520408630370f, 0.419112354516983030f, 0.773947000503540040f, 
+  0.418273866176605220f, 0.775228977203369140f, 0.417431443929672240f, 
+  0.776508331298828130f, 0.416585087776184080f, 
+  0.777785122394561770f, 0.415734797716140750f, 0.779059290885925290f, 
+  0.414880603551864620f, 0.780330777168273930f, 0.414022535085678100f, 
+  0.781599700450897220f, 0.413160532712936400f, 
+  0.782865881919860840f, 0.412294656038284300f, 0.784129500389099120f, 
+  0.411424905061721800f, 0.785390377044677730f, 0.410551249980926510f, 
+  0.786648571491241460f, 0.409673750400543210f, 
+  0.787904083728790280f, 0.408792406320571900f, 0.789156913757324220f, 
+  0.407907217741012570f, 0.790407001972198490f, 0.407018154859542850f, 
+  0.791654348373413090f, 0.406125307083129880f, 
+  0.792898952960968020f, 0.405228585004806520f, 0.794140756130218510f, 
+  0.404328078031539920f, 0.795379877090454100f, 0.403423786163330080f, 
+  0.796616137027740480f, 0.402515679597854610f, 
+  0.797849655151367190f, 0.401603758335113530f, 0.799080371856689450f, 
+  0.400688081979751590f, 0.800308227539062500f, 0.399768620729446410f, 
+  0.801533281803131100f, 0.398845434188842770f, 
+  0.802755534648895260f, 0.397918462753295900f, 0.803974866867065430f, 
+  0.396987736225128170f, 0.805191397666931150f, 0.396053284406661990f, 
+  0.806405067443847660f, 0.395115107297897340f, 
+  0.807615816593170170f, 0.394173204898834230f, 0.808823645114898680f, 
+  0.393227607011795040f, 0.810028612613677980f, 0.392278283834457400f, 
+  0.811230659484863280f, 0.391325294971466060f, 
+  0.812429726123809810f, 0.390368610620498660f, 0.813625931739807130f, 
+  0.389408260583877560f, 0.814819097518920900f, 0.388444244861602780f, 
+  0.816009342670440670f, 0.387476563453674320f, 
+  0.817196667194366460f, 0.386505216360092160f, 0.818380951881408690f, 
+  0.385530263185501100f, 0.819562196731567380f, 0.384551674127578740f, 
+  0.820740520954132080f, 0.383569449186325070f, 
+  0.821915745735168460f, 0.382583618164062500f, 0.823087990283966060f, 
+  0.381594210863113400f, 0.824257194995880130f, 0.380601197481155400f, 
+  0.825423359870910640f, 0.379604607820510860f, 
+  0.826586425304412840f, 0.378604412078857420f, 0.827746450901031490f, 
+  0.377600699663162230f, 0.828903317451477050f, 0.376593410968780520f, 
+  0.830057144165039060f, 0.375582575798034670f, 
+  0.831207871437072750f, 0.374568194150924680f, 0.832355499267578130f, 
+  0.373550295829772950f, 0.833499968051910400f, 0.372528880834579470f, 
+  0.834641277790069580f, 0.371503978967666630f, 
+  0.835779488086700440f, 0.370475560426712040f, 0.836914479732513430f, 
+  0.369443655014038090f, 0.838046371936798100f, 0.368408292531967160f, 
+  0.839175045490264890f, 0.367369443178176880f, 
+  0.840300500392913820f, 0.366327136754989620f, 0.841422796249389650f, 
+  0.365281373262405400f, 0.842541813850402830f, 0.364232182502746580f, 
+  0.843657672405242920f, 0.363179564476013180f, 
+  0.844770252704620360f, 0.362123548984527590f, 0.845879614353179930f, 
+  0.361064106225967410f, 0.846985757350921630f, 0.360001266002655030f, 
+  0.848088562488555910f, 0.358935028314590450f, 
+  0.849188148975372310f, 0.357865422964096070f, 0.850284397602081300f, 
+  0.356792420148849490f, 0.851377367973327640f, 0.355716109275817870f, 
+  0.852467060089111330f, 0.354636400938034060f, 
+  0.853553414344787600f, 0.353553384542465210f, 0.854636430740356450f, 
+  0.352467030286788940f, 0.855716109275817870f, 0.351377367973327640f, 
+  0.856792449951171880f, 0.350284397602081300f, 
+  0.857865393161773680f, 0.349188119173049930f, 0.858934998512268070f, 
+  0.348088562488555910f, 0.860001266002655030f, 0.346985727548599240f, 
+  0.861064076423645020f, 0.345879614353179930f, 
+  0.862123548984527590f, 0.344770282506942750f, 0.863179564476013180f, 
+  0.343657672405242920f, 0.864232182502746580f, 0.342541843652725220f, 
+  0.865281403064727780f, 0.341422766447067260f, 
+  0.866327106952667240f, 0.340300500392913820f, 0.867369413375854490f, 
+  0.339175015687942500f, 0.868408262729644780f, 0.338046342134475710f, 
+  0.869443655014038090f, 0.336914509534835820f, 
+  0.870475590229034420f, 0.335779488086700440f, 0.871503949165344240f, 
+  0.334641307592391970f, 0.872528910636901860f, 0.333499968051910400f, 
+  0.873550295829772950f, 0.332355499267578130f, 
+  0.874568223953247070f, 0.331207901239395140f, 0.875582575798034670f, 
+  0.330057173967361450f, 0.876593410968780520f, 0.328903347253799440f, 
+  0.877600669860839840f, 0.327746421098709110f, 
+  0.878604412078857420f, 0.326586425304412840f, 0.879604578018188480f, 
+  0.325423330068588260f, 0.880601167678833010f, 0.324257194995880130f, 
+  0.881594181060791020f, 0.323088020086288450f, 
+  0.882583618164062500f, 0.321915775537490840f, 0.883569478988647460f, 
+  0.320740520954132080f, 0.884551644325256350f, 0.319562226533889770f, 
+  0.885530233383178710f, 0.318380922079086300f, 
+  0.886505246162414550f, 0.317196637392044070f, 0.887476563453674320f, 
+  0.316009372472763060f, 0.888444244861602780f, 0.314819127321243290f, 
+  0.889408230781555180f, 0.313625901937484740f, 
+  0.890368640422821040f, 0.312429755926132200f, 0.891325294971466060f, 
+  0.311230629682540890f, 0.892278313636779790f, 0.310028612613677980f, 
+  0.893227577209472660f, 0.308823645114898680f, 
+  0.894173204898834230f, 0.307615786790847780f, 0.895115137100219730f, 
+  0.306405037641525270f, 0.896053314208984380f, 0.305191397666931150f, 
+  0.896987736225128170f, 0.303974896669387820f, 
+  0.897918462753295900f, 0.302755534648895260f, 0.898845434188842770f, 
+  0.301533311605453490f, 0.899768650531768800f, 0.300308227539062500f, 
+  0.900688111782073970f, 0.299080342054367070f, 
+  0.901603758335113530f, 0.297849655151367190f, 0.902515649795532230f, 
+  0.296616137027740480f, 0.903423786163330080f, 0.295379847288131710f, 
+  0.904328107833862300f, 0.294140785932540890f, 
+  0.905228614807128910f, 0.292898923158645630f, 0.906125307083129880f, 
+  0.291654318571090700f, 0.907018184661865230f, 0.290406972169876100f, 
+  0.907907187938690190f, 0.289156883955001830f, 
+  0.908792436122894290f, 0.287904083728790280f, 0.909673750400543210f, 
+  0.286648571491241460f, 0.910551249980926510f, 0.285390377044677730f, 
+  0.911424875259399410f, 0.284129470586776730f, 
+  0.912294626235961910f, 0.282865911722183230f, 0.913160502910614010f, 
+  0.281599670648574830f, 0.914022505283355710f, 0.280330777168273930f, 
+  0.914880633354187010f, 0.279059261083602910f, 
+  0.915734827518463130f, 0.277785122394561770f, 0.916585087776184080f, 
+  0.276508361101150510f, 0.917431414127349850f, 0.275228977203369140f, 
+  0.918273866176605220f, 0.273947030305862430f, 
+  0.919112324714660640f, 0.272662490606307980f, 0.919946908950805660f, 
+  0.271375387907028200f, 0.920777499675750730f, 0.270085722208023070f, 
+  0.921604096889495850f, 0.268793523311614990f, 
+  0.922426760196685790f, 0.267498821020126340f, 0.923245489597320560f, 
+  0.266201555728912350f, 0.924060165882110600f, 0.264901816844940190f, 
+  0.924870908260345460f, 0.263599574565887450f, 
+  0.925677597522735600f, 0.262294828891754150f, 0.926480293273925780f, 
+  0.260987639427185060f, 0.927278995513916020f, 0.259678006172180180f, 
+  0.928073644638061520f, 0.258365899324417110f, 
+  0.928864300251007080f, 0.257051378488540650f, 0.929650902748107910f, 
+  0.255734413862228390f, 0.930433452129364010f, 0.254415065050125120f, 
+  0.931211948394775390f, 0.253093332052230830f, 
+  0.931986451148986820f, 0.251769185066223140f, 0.932756841182708740f, 
+  0.250442683696746830f, 0.933523118495941160f, 0.249113827943801880f, 
+  0.934285342693328860f, 0.247782632708549500f, 
+  0.935043513774871830f, 0.246449097990989690f, 0.935797572135925290f, 
+  0.245113238692283630f, 0.936547517776489260f, 0.243775084614753720f, 
+  0.937293350696563720f, 0.242434620857238770f, 
+  0.938035070896148680f, 0.241091892123222350f, 0.938772618770599370f, 
+  0.239746883511543270f, 0.939506113529205320f, 0.238399609923362730f, 
+  0.940235435962677000f, 0.237050101161003110f, 
+  0.940960645675659180f, 0.235698372125625610f, 0.941681683063507080f, 
+  0.234344407916069030f, 0.942398548126220700f, 0.232988253235816960f, 
+  0.943111240863800050f, 0.231629893183708190f, 
+  0.943819820880889890f, 0.230269357562065120f, 0.944524168968200680f, 
+  0.228906646370887760f, 0.945224344730377200f, 0.227541789412498470f, 
+  0.945920348167419430f, 0.226174786686897280f, 
+  0.946612179279327390f, 0.224805667996406560f, 0.947299718856811520f, 
+  0.223434418439865110f, 0.947983145713806150f, 0.222061067819595340f, 
+  0.948662281036376950f, 0.220685631036758420f, 
+  0.949337244033813480f, 0.219308122992515560f, 0.950007975101470950f, 
+  0.217928543686866760f, 0.950674414634704590f, 0.216546908020973210f, 
+  0.951336681842803960f, 0.215163245797157290f, 
+  0.951994657516479490f, 0.213777542114257810f, 0.952648401260375980f, 
+  0.212389841675758360f, 0.953297853469848630f, 0.211000129580497740f, 
+  0.953943073749542240f, 0.209608450531959530f, 
+  0.954584002494812010f, 0.208214774727821350f, 0.955220639705657960f, 
+  0.206819161772727970f, 0.955853044986724850f, 0.205421581864356990f, 
+  0.956481099128723140f, 0.204022079706192020f, 
+  0.957104861736297610f, 0.202620655298233030f, 0.957724332809448240f, 
+  0.201217323541641240f, 0.958339512348175050f, 0.199812099337577820f, 
+  0.958950400352478030f, 0.198404997587203980f, 
+  0.959556937217712400f, 0.196996018290519710f, 0.960159122943878170f, 
+  0.195585191249847410f, 0.960757017135620120f, 0.194172516465187070f, 
+  0.961350560188293460f, 0.192758023738861080f, 
+  0.961939752101898190f, 0.191341713070869450f, 0.962524592876434330f, 
+  0.189923599362373350f, 0.963105142116546630f, 0.188503712415695190f, 
+  0.963681280612945560f, 0.187082037329673770f, 
+  0.964253067970275880f, 0.185658603906631470f, 0.964820444583892820f, 
+  0.184233412146568300f, 0.965383470058441160f, 0.182806491851806640f, 
+  0.965942144393920900f, 0.181377857923507690f, 
+  0.966496407985687260f, 0.179947525262832640f, 0.967046260833740230f, 
+  0.178515478968620300f, 0.967591762542724610f, 0.177081763744354250f, 
+  0.968132853507995610f, 0.175646379590034480f, 
+  0.968669533729553220f, 0.174209341406822200f, 0.969201743602752690f, 
+  0.172770664095878600f, 0.969729602336883540f, 0.171330362558364870f, 
+  0.970253050327301030f, 0.169888436794281010f, 
+  0.970772027969360350f, 0.168444931507110600f, 0.971286594867706300f, 
+  0.166999831795692440f, 0.971796751022338870f, 0.165553152561187740f, 
+  0.972302436828613280f, 0.164104923605918880f, 
+  0.972803652286529540f, 0.162655144929885860f, 0.973300457000732420f, 
+  0.161203846335411070f, 0.973792791366577150f, 0.159751012921333310f, 
+  0.974280655384063720f, 0.158296689391136170f, 
+  0.974764108657836910f, 0.156840875744819640f, 0.975243031978607180f, 
+  0.155383571982383730f, 0.975717484951019290f, 0.153924822807312010f, 
+  0.976187527179718020f, 0.152464613318443300f, 
+  0.976653039455413820f, 0.151002973318099980f, 0.977114021778106690f, 
+  0.149539917707443240f, 0.977570593357086180f, 0.148075446486473080f, 
+  0.978022634983062740f, 0.146609574556350710f, 
+  0.978470146656036380f, 0.145142331719398500f, 0.978913187980651860f, 
+  0.143673732876777650f, 0.979351758956909180f, 0.142203763127326970f, 
+  0.979785740375518800f, 0.140732467174530030f, 
+  0.980215251445770260f, 0.139259845018386840f, 0.980640232563018800f, 
+  0.137785911560058590f, 0.981060683727264400f, 0.136310681700706480f, 
+  0.981476604938507080f, 0.134834155440330510f, 
+  0.981888055801391600f, 0.133356377482414250f, 0.982294917106628420f, 
+  0.131877332925796510f, 0.982697248458862300f, 0.130397051572799680f, 
+  0.983094990253448490f, 0.128915548324584960f, 
+  0.983488261699676510f, 0.127432823181152340f, 0.983876943588256840f, 
+  0.125948905944824220f, 0.984261035919189450f, 0.124463804066181180f, 
+  0.984640598297119140f, 0.122977524995803830f, 
+  0.985015630722045900f, 0.121490091085433960f, 0.985386073589324950f, 
+  0.120001509785652160f, 0.985751926898956300f, 0.118511803448200230f, 
+  0.986113250255584720f, 0.117020979523658750f, 
+  0.986469984054565430f, 0.115529052913188930f, 0.986822128295898440f, 
+  0.114036038517951970f, 0.987169682979583740f, 0.112541958689689640f, 
+  0.987512648105621340f, 0.111046813428401950f, 
+  0.987851083278656010f, 0.109550617635250090f, 0.988184869289398190f, 
+  0.108053401112556460f, 0.988514065742492680f, 0.106555156409740450f, 
+  0.988838672637939450f, 0.105055920779705050f, 
+  0.989158689975738530f, 0.103555686771869660f, 0.989474058151245120f, 
+  0.102054484188556670f, 0.989784896373748780f, 0.100552320480346680f, 
+  0.990091085433959960f, 0.099049203097820282f, 
+  0.990392625331878660f, 0.097545161843299866f, 0.990689575672149660f, 
+  0.096040196716785431f, 0.990981936454772950f, 0.094534330070018768f, 
+  0.991269648075103760f, 0.093027576804161072f, 
+  0.991552770137786870f, 0.091519944369792938f, 0.991831183433532710f, 
+  0.090011447668075562f, 0.992105066776275630f, 0.088502109050750732f, 
+  0.992374241352081300f, 0.086991935968399048f, 
+  0.992638826370239260f, 0.085480943322181702f, 0.992898762226104740f, 
+  0.083969146013259888f, 0.993154048919677730f, 0.082456558942794800f, 
+  0.993404686450958250f, 0.080943197011947632f, 
+  0.993650734424591060f, 0.079429075121879578f, 0.993892073631286620f, 
+  0.077914200723171234f, 0.994128763675689700f, 0.076398596167564392f, 
+  0.994360864162445070f, 0.074882268905639648f, 
+  0.994588255882263180f, 0.073365233838558197f, 0.994810998439788820f, 
+  0.071847513318061829f, 0.995029091835021970f, 0.070329122245311737f, 
+  0.995242536067962650f, 0.068810060620307922f, 
+  0.995451331138610840f, 0.067290350794792175f, 0.995655417442321780f, 
+  0.065770015120506287f, 0.995854854583740230f, 0.064249053597450256f, 
+  0.996049642562866210f, 0.062727488577365875f, 
+  0.996239781379699710f, 0.061205338686704636f, 0.996425211429595950f, 
+  0.059682607650756836f, 0.996605992317199710f, 0.058159314095973969f, 
+  0.996782064437866210f, 0.056635476648807526f, 
+  0.996953487396240230f, 0.055111102759838104f, 0.997120201587677000f, 
+  0.053586211055517197f, 0.997282266616821290f, 0.052060816437005997f, 
+  0.997439682483673100f, 0.050534930080175400f, 
+  0.997592389583587650f, 0.049008570611476898f, 0.997740387916564940f, 
+  0.047481749206781387f, 0.997883677482604980f, 0.045954477041959763f, 
+  0.998022377490997310f, 0.044426776468753815f, 
+  0.998156309127807620f, 0.042898654937744141f, 0.998285591602325440f, 
+  0.041370131075382233f, 0.998410165309906010f, 0.039841219782829285f, 
+  0.998530030250549320f, 0.038311932235956192f, 
+  0.998645246028900150f, 0.036782283335924149f, 0.998755753040313720f, 
+  0.035252287983894348f, 0.998861551284790040f, 0.033721961081027985f, 
+  0.998962640762329100f, 0.032191313803195953f, 
+  0.999059081077575680f, 0.030660368502140045f, 0.999150753021240230f, 
+  0.029129132628440857f, 0.999237775802612300f, 0.027597622945904732f, 
+  0.999320089817047120f, 0.026065852493047714f, 
+  0.999397754669189450f, 0.024533838033676147f, 0.999470651149749760f, 
+  0.023001590743660927f, 0.999538838863372800f, 0.021469129249453545f, 
+  0.999602377414703370f, 0.019936462864279747f, 
+  0.999661207199096680f, 0.018403612077236176f, 0.999715328216552730f, 
+  0.016870586201548576f, 0.999764680862426760f, 0.015337402001023293f, 
+  0.999809384346008300f, 0.013804072514176369f, 
+  0.999849438667297360f, 0.012270614504814148f, 0.999884724617004390f, 
+  0.010737040080130100f, 0.999915301799774170f, 0.009203365072607994f, 
+  0.999941170215606690f, 0.007669602986425161f, 
+  0.999962329864501950f, 0.006135769188404083f, 0.999978840351104740f, 
+  0.004601877182722092f, 0.999990582466125490f, 0.003067942336201668f, 
+  0.999997675418853760f, 0.001533978385850787f, 
+  1.000000000000000000f, 0.000000000000023345f, 0.999997675418853760f, 
+  -0.001533978385850787f, 0.999990582466125490f, -0.003067942336201668f, 
+  0.999978840351104740f, -0.004601877182722092f, 
+  0.999962329864501950f, -0.006135769188404083f, 0.999941170215606690f, 
+  -0.007669602986425161f, 0.999915301799774170f, -0.009203365072607994f, 
+  0.999884724617004390f, -0.010737040080130100f, 
+  0.999849438667297360f, -0.012270614504814148f, 0.999809384346008300f, 
+  -0.013804072514176369f, 0.999764680862426760f, -0.015337402001023293f, 
+  0.999715328216552730f, -0.016870586201548576f, 
+  0.999661207199096680f, -0.018403612077236176f, 0.999602377414703370f, 
+  -0.019936462864279747f, 0.999538838863372800f, -0.021469129249453545f, 
+  0.999470651149749760f, -0.023001590743660927f, 
+  0.999397754669189450f, -0.024533838033676147f, 0.999320089817047120f, 
+  -0.026065852493047714f, 0.999237775802612300f, -0.027597622945904732f, 
+  0.999150753021240230f, -0.029129132628440857f, 
+  0.999059081077575680f, -0.030660368502140045f, 0.998962640762329100f, 
+  -0.032191313803195953f, 0.998861551284790040f, -0.033721961081027985f, 
+  0.998755753040313720f, -0.035252287983894348f, 
+  0.998645246028900150f, -0.036782283335924149f, 0.998530030250549320f, 
+  -0.038311932235956192f, 0.998410165309906010f, -0.039841219782829285f, 
+  0.998285591602325440f, -0.041370131075382233f, 
+  0.998156309127807620f, -0.042898654937744141f, 0.998022377490997310f, 
+  -0.044426776468753815f, 0.997883677482604980f, -0.045954477041959763f, 
+  0.997740387916564940f, -0.047481749206781387f, 
+  0.997592389583587650f, -0.049008570611476898f, 0.997439682483673100f, 
+  -0.050534930080175400f, 0.997282266616821290f, -0.052060816437005997f, 
+  0.997120201587677000f, -0.053586211055517197f, 
+  0.996953487396240230f, -0.055111102759838104f, 0.996782064437866210f, 
+  -0.056635476648807526f, 0.996605992317199710f, -0.058159314095973969f, 
+  0.996425211429595950f, -0.059682607650756836f, 
+  0.996239781379699710f, -0.061205338686704636f, 0.996049642562866210f, 
+  -0.062727488577365875f, 0.995854854583740230f, -0.064249053597450256f, 
+  0.995655417442321780f, -0.065770015120506287f, 
+  0.995451331138610840f, -0.067290350794792175f, 0.995242536067962650f, 
+  -0.068810060620307922f, 0.995029091835021970f, -0.070329122245311737f, 
+  0.994810998439788820f, -0.071847513318061829f, 
+  0.994588255882263180f, -0.073365233838558197f, 0.994360864162445070f, 
+  -0.074882268905639648f, 0.994128763675689700f, -0.076398596167564392f, 
+  0.993892073631286620f, -0.077914200723171234f, 
+  0.993650734424591060f, -0.079429075121879578f, 0.993404686450958250f, 
+  -0.080943197011947632f, 0.993154048919677730f, -0.082456558942794800f, 
+  0.992898762226104740f, -0.083969146013259888f, 
+  0.992638826370239260f, -0.085480943322181702f, 0.992374241352081300f, 
+  -0.086991935968399048f, 0.992105066776275630f, -0.088502109050750732f, 
+  0.991831183433532710f, -0.090011447668075562f, 
+  0.991552770137786870f, -0.091519944369792938f, 0.991269648075103760f, 
+  -0.093027576804161072f, 0.990981936454772950f, -0.094534330070018768f, 
+  0.990689575672149660f, -0.096040196716785431f, 
+  0.990392625331878660f, -0.097545161843299866f, 0.990091085433959960f, 
+  -0.099049203097820282f, 0.989784896373748780f, -0.100552320480346680f, 
+  0.989474058151245120f, -0.102054484188556670f, 
+  0.989158689975738530f, -0.103555686771869660f, 0.988838672637939450f, 
+  -0.105055920779705050f, 0.988514065742492680f, -0.106555156409740450f, 
+  0.988184869289398190f, -0.108053401112556460f, 
+  0.987851083278656010f, -0.109550617635250090f, 0.987512648105621340f, 
+  -0.111046813428401950f, 0.987169682979583740f, -0.112541958689689640f, 
+  0.986822128295898440f, -0.114036038517951970f, 
+  0.986469984054565430f, -0.115529052913188930f, 0.986113250255584720f, 
+  -0.117020979523658750f, 0.985751926898956300f, -0.118511803448200230f, 
+  0.985386073589324950f, -0.120001509785652160f, 
+  0.985015630722045900f, -0.121490091085433960f, 0.984640598297119140f, 
+  -0.122977524995803830f, 0.984261035919189450f, -0.124463804066181180f, 
+  0.983876943588256840f, -0.125948905944824220f, 
+  0.983488261699676510f, -0.127432823181152340f, 0.983094990253448490f, 
+  -0.128915548324584960f, 0.982697248458862300f, -0.130397051572799680f, 
+  0.982294917106628420f, -0.131877332925796510f, 
+  0.981888055801391600f, -0.133356377482414250f, 0.981476604938507080f, 
+  -0.134834155440330510f, 0.981060683727264400f, -0.136310681700706480f, 
+  0.980640232563018800f, -0.137785911560058590f, 
+  0.980215251445770260f, -0.139259845018386840f, 0.979785740375518800f, 
+  -0.140732467174530030f, 0.979351758956909180f, -0.142203763127326970f, 
+  0.978913187980651860f, -0.143673732876777650f, 
+  0.978470146656036380f, -0.145142331719398500f, 0.978022634983062740f, 
+  -0.146609574556350710f, 0.977570593357086180f, -0.148075446486473080f, 
+  0.977114021778106690f, -0.149539917707443240f, 
+  0.976653039455413820f, -0.151002973318099980f, 0.976187527179718020f, 
+  -0.152464613318443300f, 0.975717484951019290f, -0.153924822807312010f, 
+  0.975243031978607180f, -0.155383571982383730f, 
+  0.974764108657836910f, -0.156840875744819640f, 0.974280655384063720f, 
+  -0.158296689391136170f, 0.973792791366577150f, -0.159751012921333310f, 
+  0.973300457000732420f, -0.161203846335411070f, 
+  0.972803652286529540f, -0.162655144929885860f, 0.972302436828613280f, 
+  -0.164104923605918880f, 0.971796751022338870f, -0.165553152561187740f, 
+  0.971286594867706300f, -0.166999831795692440f, 
+  0.970772027969360350f, -0.168444931507110600f, 0.970253050327301030f, 
+  -0.169888436794281010f, 0.969729602336883540f, -0.171330362558364870f, 
+  0.969201743602752690f, -0.172770664095878600f, 
+  0.968669533729553220f, -0.174209341406822200f, 0.968132853507995610f, 
+  -0.175646379590034480f, 0.967591762542724610f, -0.177081763744354250f, 
+  0.967046260833740230f, -0.178515478968620300f, 
+  0.966496407985687260f, -0.179947525262832640f, 0.965942144393920900f, 
+  -0.181377857923507690f, 0.965383470058441160f, -0.182806491851806640f, 
+  0.964820444583892820f, -0.184233412146568300f, 
+  0.964253067970275880f, -0.185658603906631470f, 0.963681280612945560f, 
+  -0.187082037329673770f, 0.963105142116546630f, -0.188503712415695190f, 
+  0.962524592876434330f, -0.189923599362373350f, 
+  0.961939752101898190f, -0.191341713070869450f, 0.961350560188293460f, 
+  -0.192758023738861080f, 0.960757017135620120f, -0.194172516465187070f, 
+  0.960159122943878170f, -0.195585191249847410f, 
+  0.959556937217712400f, -0.196996018290519710f, 0.958950400352478030f, 
+  -0.198404997587203980f, 0.958339512348175050f, -0.199812099337577820f, 
+  0.957724332809448240f, -0.201217323541641240f, 
+  0.957104861736297610f, -0.202620655298233030f, 0.956481099128723140f, 
+  -0.204022079706192020f, 0.955853044986724850f, -0.205421581864356990f, 
+  0.955220639705657960f, -0.206819161772727970f, 
+  0.954584002494812010f, -0.208214774727821350f, 0.953943073749542240f, 
+  -0.209608450531959530f, 0.953297853469848630f, -0.211000129580497740f, 
+  0.952648401260375980f, -0.212389841675758360f, 
+  0.951994657516479490f, -0.213777542114257810f, 0.951336681842803960f, 
+  -0.215163245797157290f, 0.950674414634704590f, -0.216546908020973210f, 
+  0.950007975101470950f, -0.217928543686866760f, 
+  0.949337244033813480f, -0.219308122992515560f, 0.948662281036376950f, 
+  -0.220685631036758420f, 0.947983145713806150f, -0.222061067819595340f, 
+  0.947299718856811520f, -0.223434418439865110f, 
+  0.946612179279327390f, -0.224805667996406560f, 0.945920348167419430f, 
+  -0.226174786686897280f, 0.945224344730377200f, -0.227541789412498470f, 
+  0.944524168968200680f, -0.228906646370887760f, 
+  0.943819820880889890f, -0.230269357562065120f, 0.943111240863800050f, 
+  -0.231629893183708190f, 0.942398548126220700f, -0.232988253235816960f, 
+  0.941681683063507080f, -0.234344407916069030f, 
+  0.940960645675659180f, -0.235698372125625610f, 0.940235435962677000f, 
+  -0.237050101161003110f, 0.939506113529205320f, -0.238399609923362730f, 
+  0.938772618770599370f, -0.239746883511543270f, 
+  0.938035070896148680f, -0.241091892123222350f, 0.937293350696563720f, 
+  -0.242434620857238770f, 0.936547517776489260f, -0.243775084614753720f, 
+  0.935797572135925290f, -0.245113238692283630f, 
+  0.935043513774871830f, -0.246449097990989690f, 0.934285342693328860f, 
+  -0.247782632708549500f, 0.933523118495941160f, -0.249113827943801880f, 
+  0.932756841182708740f, -0.250442683696746830f, 
+  0.931986451148986820f, -0.251769185066223140f, 0.931211948394775390f, 
+  -0.253093332052230830f, 0.930433452129364010f, -0.254415065050125120f, 
+  0.929650902748107910f, -0.255734413862228390f, 
+  0.928864300251007080f, -0.257051378488540650f, 0.928073644638061520f, 
+  -0.258365899324417110f, 0.927278995513916020f, -0.259678006172180180f, 
+  0.926480293273925780f, -0.260987639427185060f, 
+  0.925677597522735600f, -0.262294828891754150f, 0.924870908260345460f, 
+  -0.263599574565887450f, 0.924060165882110600f, -0.264901816844940190f, 
+  0.923245489597320560f, -0.266201555728912350f, 
+  0.922426760196685790f, -0.267498821020126340f, 0.921604096889495850f, 
+  -0.268793523311614990f, 0.920777499675750730f, -0.270085722208023070f, 
+  0.919946908950805660f, -0.271375387907028200f, 
+  0.919112324714660640f, -0.272662490606307980f, 0.918273866176605220f, 
+  -0.273947030305862430f, 0.917431414127349850f, -0.275228977203369140f, 
+  0.916585087776184080f, -0.276508361101150510f, 
+  0.915734827518463130f, -0.277785122394561770f, 0.914880633354187010f, 
+  -0.279059261083602910f, 0.914022505283355710f, -0.280330777168273930f, 
+  0.913160502910614010f, -0.281599670648574830f, 
+  0.912294626235961910f, -0.282865911722183230f, 0.911424875259399410f, 
+  -0.284129470586776730f, 0.910551249980926510f, -0.285390377044677730f, 
+  0.909673750400543210f, -0.286648571491241460f, 
+  0.908792436122894290f, -0.287904083728790280f, 0.907907187938690190f, 
+  -0.289156883955001830f, 0.907018184661865230f, -0.290406972169876100f, 
+  0.906125307083129880f, -0.291654318571090700f, 
+  0.905228614807128910f, -0.292898923158645630f, 0.904328107833862300f, 
+  -0.294140785932540890f, 0.903423786163330080f, -0.295379847288131710f, 
+  0.902515649795532230f, -0.296616137027740480f, 
+  0.901603758335113530f, -0.297849655151367190f, 0.900688111782073970f, 
+  -0.299080342054367070f, 0.899768650531768800f, -0.300308227539062500f, 
+  0.898845434188842770f, -0.301533311605453490f, 
+  0.897918462753295900f, -0.302755534648895260f, 0.896987736225128170f, 
+  -0.303974896669387820f, 0.896053314208984380f, -0.305191397666931150f, 
+  0.895115137100219730f, -0.306405037641525270f, 
+  0.894173204898834230f, -0.307615786790847780f, 0.893227577209472660f, 
+  -0.308823645114898680f, 0.892278313636779790f, -0.310028612613677980f, 
+  0.891325294971466060f, -0.311230629682540890f, 
+  0.890368640422821040f, -0.312429755926132200f, 0.889408230781555180f, 
+  -0.313625901937484740f, 0.888444244861602780f, -0.314819127321243290f, 
+  0.887476563453674320f, -0.316009372472763060f, 
+  0.886505246162414550f, -0.317196637392044070f, 0.885530233383178710f, 
+  -0.318380922079086300f, 0.884551644325256350f, -0.319562226533889770f, 
+  0.883569478988647460f, -0.320740520954132080f, 
+  0.882583618164062500f, -0.321915775537490840f, 0.881594181060791020f, 
+  -0.323088020086288450f, 0.880601167678833010f, -0.324257194995880130f, 
+  0.879604578018188480f, -0.325423330068588260f, 
+  0.878604412078857420f, -0.326586425304412840f, 0.877600669860839840f, 
+  -0.327746421098709110f, 0.876593410968780520f, -0.328903347253799440f, 
+  0.875582575798034670f, -0.330057173967361450f, 
+  0.874568223953247070f, -0.331207901239395140f, 0.873550295829772950f, 
+  -0.332355499267578130f, 0.872528910636901860f, -0.333499968051910400f, 
+  0.871503949165344240f, -0.334641307592391970f, 
+  0.870475590229034420f, -0.335779488086700440f, 0.869443655014038090f, 
+  -0.336914509534835820f, 0.868408262729644780f, -0.338046342134475710f, 
+  0.867369413375854490f, -0.339175015687942500f, 
+  0.866327106952667240f, -0.340300500392913820f, 0.865281403064727780f, 
+  -0.341422766447067260f, 0.864232182502746580f, -0.342541843652725220f, 
+  0.863179564476013180f, -0.343657672405242920f, 
+  0.862123548984527590f, -0.344770282506942750f, 0.861064076423645020f, 
+  -0.345879614353179930f, 0.860001266002655030f, -0.346985727548599240f, 
+  0.858934998512268070f, -0.348088562488555910f, 
+  0.857865393161773680f, -0.349188119173049930f, 0.856792449951171880f, 
+  -0.350284397602081300f, 0.855716109275817870f, -0.351377367973327640f, 
+  0.854636430740356450f, -0.352467030286788940f, 
+  0.853553414344787600f, -0.353553384542465210f, 0.852467060089111330f, 
+  -0.354636400938034060f, 0.851377367973327640f, -0.355716109275817870f, 
+  0.850284397602081300f, -0.356792420148849490f, 
+  0.849188148975372310f, -0.357865422964096070f, 0.848088562488555910f, 
+  -0.358935028314590450f, 0.846985757350921630f, -0.360001266002655030f, 
+  0.845879614353179930f, -0.361064106225967410f, 
+  0.844770252704620360f, -0.362123548984527590f, 0.843657672405242920f, 
+  -0.363179564476013180f, 0.842541813850402830f, -0.364232182502746580f, 
+  0.841422796249389650f, -0.365281373262405400f, 
+  0.840300500392913820f, -0.366327136754989620f, 0.839175045490264890f, 
+  -0.367369443178176880f, 0.838046371936798100f, -0.368408292531967160f, 
+  0.836914479732513430f, -0.369443655014038090f, 
+  0.835779488086700440f, -0.370475560426712040f, 0.834641277790069580f, 
+  -0.371503978967666630f, 0.833499968051910400f, -0.372528880834579470f, 
+  0.832355499267578130f, -0.373550295829772950f, 
+  0.831207871437072750f, -0.374568194150924680f, 0.830057144165039060f, 
+  -0.375582575798034670f, 0.828903317451477050f, -0.376593410968780520f, 
+  0.827746450901031490f, -0.377600699663162230f, 
+  0.826586425304412840f, -0.378604412078857420f, 0.825423359870910640f, 
+  -0.379604607820510860f, 0.824257194995880130f, -0.380601197481155400f, 
+  0.823087990283966060f, -0.381594210863113400f, 
+  0.821915745735168460f, -0.382583618164062500f, 0.820740520954132080f, 
+  -0.383569449186325070f, 0.819562196731567380f, -0.384551674127578740f, 
+  0.818380951881408690f, -0.385530263185501100f, 
+  0.817196667194366460f, -0.386505216360092160f, 0.816009342670440670f, 
+  -0.387476563453674320f, 0.814819097518920900f, -0.388444244861602780f, 
+  0.813625931739807130f, -0.389408260583877560f, 
+  0.812429726123809810f, -0.390368610620498660f, 0.811230659484863280f, 
+  -0.391325294971466060f, 0.810028612613677980f, -0.392278283834457400f, 
+  0.808823645114898680f, -0.393227607011795040f, 
+  0.807615816593170170f, -0.394173204898834230f, 0.806405067443847660f, 
+  -0.395115107297897340f, 0.805191397666931150f, -0.396053284406661990f, 
+  0.803974866867065430f, -0.396987736225128170f, 
+  0.802755534648895260f, -0.397918462753295900f, 0.801533281803131100f, 
+  -0.398845434188842770f, 0.800308227539062500f, -0.399768620729446410f, 
+  0.799080371856689450f, -0.400688081979751590f, 
+  0.797849655151367190f, -0.401603758335113530f, 0.796616137027740480f, 
+  -0.402515679597854610f, 0.795379877090454100f, -0.403423786163330080f, 
+  0.794140756130218510f, -0.404328078031539920f, 
+  0.792898952960968020f, -0.405228585004806520f, 0.791654348373413090f, 
+  -0.406125307083129880f, 0.790407001972198490f, -0.407018154859542850f, 
+  0.789156913757324220f, -0.407907217741012570f, 
+  0.787904083728790280f, -0.408792406320571900f, 0.786648571491241460f, 
+  -0.409673750400543210f, 0.785390377044677730f, -0.410551249980926510f, 
+  0.784129500389099120f, -0.411424905061721800f, 
+  0.782865881919860840f, -0.412294656038284300f, 0.781599700450897220f, 
+  -0.413160532712936400f, 0.780330777168273930f, -0.414022535085678100f, 
+  0.779059290885925290f, -0.414880603551864620f, 
+  0.777785122394561770f, -0.415734797716140750f, 0.776508331298828130f, 
+  -0.416585087776184080f, 0.775228977203369140f, -0.417431443929672240f, 
+  0.773947000503540040f, -0.418273866176605220f, 
+  0.772662520408630370f, -0.419112354516983030f, 0.771375417709350590f, 
+  -0.419946908950805660f, 0.770085752010345460f, -0.420777499675750730f, 
+  0.768793523311614990f, -0.421604126691818240f, 
+  0.767498791217803960f, -0.422426789999008180f, 0.766201555728912350f, 
+  -0.423245459794998170f, 0.764901816844940190f, -0.424060165882110600f, 
+  0.763599574565887450f, -0.424870878458023070f, 
+  0.762294828891754150f, -0.425677597522735600f, 0.760987639427185060f, 
+  -0.426480293273925780f, 0.759678006172180180f, -0.427278995513916020f, 
+  0.758365929126739500f, -0.428073674440383910f, 
+  0.757051348686218260f, -0.428864300251007080f, 0.755734443664550780f, 
+  -0.429650902748107910f, 0.754415094852447510f, -0.430433481931686400f, 
+  0.753093302249908450f, -0.431211978197097780f, 
+  0.751769185066223140f, -0.431986421346664430f, 0.750442683696746830f, 
+  -0.432756811380386350f, 0.749113857746124270f, -0.433523118495941160f, 
+  0.747782647609710690f, -0.434285342693328860f, 
+  0.746449112892150880f, -0.435043483972549440f, 0.745113253593444820f, 
+  -0.435797542333602910f, 0.743775069713592530f, -0.436547487974166870f, 
+  0.742434620857238770f, -0.437293320894241330f, 
+  0.741091907024383540f, -0.438035041093826290f, 0.739746868610382080f, 
+  -0.438772648572921750f, 0.738399624824523930f, -0.439506113529205320f, 
+  0.737050116062164310f, -0.440235435962677000f, 
+  0.735698342323303220f, -0.440960645675659180f, 0.734344422817230220f, 
+  -0.441681683063507080f, 0.732988238334655760f, -0.442398548126220700f, 
+  0.731629908084869380f, -0.443111270666122440f, 
+  0.730269372463226320f, -0.443819820880889890f, 0.728906631469726560f, 
+  -0.444524168968200680f, 0.727541804313659670f, -0.445224374532699580f, 
+  0.726174771785736080f, -0.445920348167419430f, 
+  0.724805653095245360f, -0.446612149477005000f, 0.723434448242187500f, 
+  -0.447299748659133910f, 0.722061097621917720f, -0.447983115911483760f, 
+  0.720685660839080810f, -0.448662281036376950f, 
+  0.719308137893676760f, -0.449337244033813480f, 0.717928528785705570f, 
+  -0.450007945299148560f, 0.716546893119812010f, -0.450674414634704590f, 
+  0.715163230895996090f, -0.451336652040481570f, 
+  0.713777542114257810f, -0.451994657516479490f, 0.712389826774597170f, 
+  -0.452648371458053590f, 0.711000144481658940f, -0.453297853469848630f, 
+  0.709608435630798340f, -0.453943043947219850f, 
+  0.708214759826660160f, -0.454584002494812010f, 0.706819176673889160f, 
+  -0.455220639705657960f, 0.705421566963195800f, -0.455853015184402470f, 
+  0.704022109508514400f, -0.456481099128723140f, 
+  0.702620685100555420f, -0.457104891538620000f, 0.701217353343963620f, 
+  -0.457724362611770630f, 0.699812114238739010f, -0.458339542150497440f, 
+  0.698404967784881590f, -0.458950400352478030f, 
+  0.696996033191680910f, -0.459556937217712400f, 0.695585191249847410f, 
+  -0.460159152746200560f, 0.694172501564025880f, -0.460757017135620120f, 
+  0.692758023738861080f, -0.461350560188293460f, 
+  0.691341698169708250f, -0.461939752101898190f, 0.689923584461212160f, 
+  -0.462524622678756710f, 0.688503682613372800f, -0.463105112314224240f, 
+  0.687082052230834960f, -0.463681250810623170f, 
+  0.685658574104309080f, -0.464253038167953490f, 0.684233427047729490f, 
+  -0.464820444583892820f, 0.682806491851806640f, -0.465383470058441160f, 
+  0.681377887725830080f, -0.465942144393920900f, 
+  0.679947495460510250f, -0.466496407985687260f, 0.678515493869781490f, 
+  -0.467046260833740230f, 0.677081763744354250f, -0.467591762542724610f, 
+  0.675646364688873290f, -0.468132823705673220f, 
+  0.674209356307983400f, -0.468669503927230830f, 0.672770678997039790f, 
+  -0.469201773405075070f, 0.671330332756042480f, -0.469729602336883540f, 
+  0.669888436794281010f, -0.470253020524978640f, 
+  0.668444931507110600f, -0.470772027969360350f, 0.666999816894531250f, 
+  -0.471286594867706300f, 0.665553152561187740f, -0.471796721220016480f, 
+  0.664104938507080080f, -0.472302407026290890f, 
+  0.662655174732208250f, -0.472803652286529540f, 0.661203861236572270f, 
+  -0.473300457000732420f, 0.659750998020172120f, -0.473792791366577150f, 
+  0.658296704292297360f, -0.474280685186386110f, 
+  0.656840860843658450f, -0.474764078855514530f, 0.655383586883544920f, 
+  -0.475243031978607180f, 0.653924822807312010f, -0.475717514753341670f, 
+  0.652464628219604490f, -0.476187497377395630f, 
+  0.651003003120422360f, -0.476653009653091430f, 0.649539887905120850f, 
+  -0.477114051580429080f, 0.648075461387634280f, -0.477570593357086180f, 
+  0.646609604358673100f, -0.478022634983062740f, 
+  0.645142316818237300f, -0.478470176458358760f, 0.643673717975616460f, 
+  -0.478913217782974240f, 0.642203748226165770f, -0.479351729154586790f, 
+  0.640732467174530030f, -0.479785770177841190f, 
+  0.639259815216064450f, -0.480215251445770260f, 0.637785911560058590f, 
+  -0.480640232563018800f, 0.636310696601867680f, -0.481060713529586790f, 
+  0.634834170341491700f, -0.481476634740829470f, 
+  0.633356392383575440f, -0.481888025999069210f, 0.631877362728118900f, 
+  -0.482294887304306030f, 0.630397081375122070f, -0.482697218656539920f, 
+  0.628915548324584960f, -0.483094990253448490f, 
+  0.627432823181152340f, -0.483488231897354130f, 0.625948905944824220f, 
+  -0.483876913785934450f, 0.624463796615600590f, -0.484261035919189450f, 
+  0.622977554798126220f, -0.484640628099441530f, 
+  0.621490061283111570f, -0.485015630722045900f, 0.620001494884490970f, 
+  -0.485386073589324950f, 0.618511795997619630f, -0.485751956701278690f, 
+  0.617020964622497560f, -0.486113250255584720f, 
+  0.615529060363769530f, -0.486469984054565430f, 0.614036023616790770f, 
+  -0.486822128295898440f, 0.612541973590850830f, -0.487169682979583740f, 
+  0.611046791076660160f, -0.487512677907943730f, 
+  0.609550595283508300f, -0.487851053476333620f, 0.608053386211395260f, 
+  -0.488184869289398190f, 0.606555163860321040f, -0.488514065742492680f, 
+  0.605055928230285640f, -0.488838672637939450f, 
+  0.603555679321289060f, -0.489158689975738530f, 0.602054476737976070f, 
+  -0.489474087953567500f, 0.600552320480346680f, -0.489784896373748780f, 
+  0.599049210548400880f, -0.490091055631637570f, 
+  0.597545146942138670f, -0.490392625331878660f, 0.596040189266204830f, 
+  -0.490689605474472050f, 0.594534337520599370f, -0.490981936454772950f, 
+  0.593027591705322270f, -0.491269648075103760f, 
+  0.591519951820373540f, -0.491552740335464480f, 0.590011477470397950f, 
+  -0.491831213235855100f, 0.588502109050750730f, -0.492105036973953250f, 
+  0.586991965770721440f, -0.492374241352081300f, 
+  0.585480928421020510f, -0.492638826370239260f, 0.583969175815582280f, 
+  -0.492898762226104740f, 0.582456588745117190f, -0.493154048919677730f, 
+  0.580943167209625240f, -0.493404686450958250f, 
+  0.579429090023040770f, -0.493650704622268680f, 0.577914178371429440f, 
+  -0.493892073631286620f, 0.576398611068725590f, -0.494128793478012080f, 
+  0.574882268905639650f, -0.494360834360122680f, 
+  0.573365211486816410f, -0.494588255882263180f, 0.571847498416900630f, 
+  -0.494810998439788820f, 0.570329129695892330f, -0.495029091835021970f, 
+  0.568810045719146730f, -0.495242536067962650f, 
+  0.567290365695953370f, -0.495451331138610840f, 0.565770030021667480f, 
+  -0.495655417442321780f, 0.564249038696289060f, -0.495854884386062620f, 
+  0.562727510929107670f, -0.496049642562866210f, 
+  0.561205327510833740f, -0.496239781379699710f, 0.559682607650756840f, 
+  -0.496425211429595950f, 0.558159291744232180f, -0.496605962514877320f, 
+  0.556635499000549320f, -0.496782064437866210f, 
+  0.555111110210418700f, -0.496953487396240230f, 0.553586184978485110f, 
+  -0.497120231389999390f, 0.552060842514038090f, -0.497282296419143680f, 
+  0.550534904003143310f, -0.497439652681350710f, 
+  0.549008548259735110f, -0.497592359781265260f, 0.547481775283813480f, 
+  -0.497740387916564940f, 0.545954465866088870f, -0.497883707284927370f, 
+  0.544426798820495610f, -0.498022347688674930f, 
+  0.542898654937744140f, -0.498156309127807620f, 0.541370153427124020f, 
+  -0.498285561800003050f, 0.539841234683990480f, -0.498410135507583620f, 
+  0.538311958312988280f, -0.498530030250549320f, 
+  0.536782264709472660f, -0.498645216226577760f, 0.535252273082733150f, 
+  -0.498755723237991330f, 0.533721983432769780f, -0.498861521482467650f, 
+  0.532191336154937740f, -0.498962640762329100f, 
+  0.530660390853881840f, -0.499059051275253300f, 0.529129147529602050f, 
+  -0.499150782823562620f, 0.527597606182098390f, -0.499237775802612300f, 
+  0.526065826416015630f, -0.499320119619369510f, 
+  0.524533808231353760f, -0.499397724866867070f, 0.523001611232757570f, 
+  -0.499470651149749760f, 0.521469116210937500f, -0.499538868665695190f, 
+  0.519936442375183110f, -0.499602377414703370f, 
+  0.518403589725494380f, -0.499661177396774290f, 0.516870558261871340f, 
+  -0.499715298414230350f, 0.515337407588958740f, -0.499764710664749150f, 
+  0.513804078102111820f, -0.499809414148330690f, 
+  0.512270629405975340f, -0.499849408864974980f, 0.510737061500549320f, 
+  -0.499884694814682010f, 0.509203374385833740f, -0.499915301799774170f, 
+  0.507669627666473390f, -0.499941170215606690f, 
+  0.506135761737823490f, -0.499962359666824340f, 0.504601895809173580f, 
+  -0.499978810548782350f, 0.503067970275878910f, -0.499990582466125490f, 
+  0.501533985137939450f, -0.499997645616531370f 
+}; 
+ 
+ 
+ 
+/**  
+* @brief  Initialization function for the floating-point RFFT/RIFFT. 
+* @param[in,out] *S             points to an instance of the floating-point RFFT/RIFFT structure. 
+* @param[in,out] *S_CFFT        points to an instance of the floating-point CFFT/CIFFT structure. 
+* @param[in]     fftLenReal     length of the FFT. 
+* @param[in]     ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. 
+* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. 
+* @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value. 
+*  
+* \par Description: 
+* \par 
+* The parameter <code>fftLenReal</code>	Specifies length of RFFT/RIFFT Process. Supported FFT Lengths are 128, 512, 2048.  
+* \par  
+* The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed.  
+* Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated.  
+* \par  
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.  
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. 
+* \par  
+* This function also initializes Twiddle factor table.   
+*/ 
+ 
+arm_status arm_rfft_init_f32( 
+  arm_rfft_instance_f32 * S, 
+  arm_cfft_radix4_instance_f32 * S_CFFT, 
+  uint32_t fftLenReal, 
+  uint32_t ifftFlagR, 
+  uint32_t bitReverseFlag) 
+{ 
+ 
+  /*  Initialise the default arm status */ 
+  arm_status status = ARM_MATH_SUCCESS; 
+ 
+  /*  Initialize the Real FFT length */ 
+  S->fftLenReal = (uint16_t) fftLenReal; 
+ 
+  /*  Initialize the Complex FFT length */ 
+  S->fftLenBy2 = (uint16_t) fftLenReal / 2u; 
+ 
+  /*  Initialize the Twiddle coefficientA pointer */ 
+  S->pTwiddleAReal = (float32_t *) realCoefA; 
+ 
+  /*  Initialize the Twiddle coefficientB pointer */ 
+  S->pTwiddleBReal = (float32_t *) realCoefB; 
+ 
+  /*  Initialize the Flag for selection of RFFT or RIFFT */ 
+  S->ifftFlagR = (uint8_t) ifftFlagR; 
+ 
+  /*  Initialize the Flag for calculation Bit reversal or not */ 
+  S->bitReverseFlagR = (uint8_t) bitReverseFlag; 
+ 
+  /*  Initializations of structure parameters depending on the FFT length */ 
+  switch (S->fftLenReal) 
+  { 
+    /* Init table modifier value */ 
+  case 2048u: 
+    S->twidCoefRModifier = 1u; 
+    break; 
+  case 512u: 
+    S->twidCoefRModifier = 4u; 
+    break; 
+  case 128u: 
+    S->twidCoefRModifier = 16u; 
+    break; 
+  default: 
+    /*  Reporting argument error if rfftSize is not valid value */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+    break; 
+  } 
+ 
+  /* Init Complex FFT Instance */ 
+  S->pCfft = S_CFFT; 
+ 
+  if(S->ifftFlagR) 
+  { 
+    /* Initializes the CIFFT Module for fftLenreal/2 length */ 
+    arm_cfft_radix4_init_f32(S->pCfft, S->fftLenBy2, 1u, 0u); 
+  } 
+  else 
+  { 
+    /* Initializes the CFFT Module for fftLenreal/2 length */ 
+    arm_cfft_radix4_init_f32(S->pCfft, S->fftLenBy2, 0u, 0u); 
+  } 
+ 
+  /* return the status of RFFT Init function */ 
+  return (status); 
+ 
+} 
+ 
+  /**  
+   * @} end of RFFT_RIFFT group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_rfft_init_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,685 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_rfft_init_q15.c  
+*  
+* Description:	RFFT & RIFFT Q15 initialisation function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @addtogroup RFFT_RIFFT  
+ * @{  
+ */ 
+ 
+ 
+ 
+/**  
+* \par  
+* Generation floating point real_CoefA array:  
+* \par  
+* n = 1024  
+* <pre>for (i = 0; i < n; i++)  
+*  {  
+*    pATable[2 * i] = 0.5 * (1.0 - sin (2 * PI / (double) (2 * n) * (double) i));  
+*    pATable[2 * i + 1] = 0.5 * (-1.0 * cos (2 * PI / (double) (2 * n) * (double) i));  
+*  } </pre>  
+* \par  
+* Convert to fixed point Q15 format  
+*       round(pATable[i] * pow(2, 15))  
+*/ 
+ 
+ 
+static const q15_t realCoefAQ15[2048] = { 
+ 
+  0x4000, 0xc000, 0x3fce, 0xc000, 0x3f9b, 0xc000, 0x3f69, 0xc001, 
+  0x3f37, 0xc001, 0x3f05, 0xc002, 0x3ed2, 0xc003, 0x3ea0, 0xc004, 
+  0x3e6e, 0xc005, 0x3e3c, 0xc006, 0x3e09, 0xc008, 0x3dd7, 0xc009, 
+  0x3da5, 0xc00b, 0x3d73, 0xc00d, 0x3d40, 0xc00f, 0x3d0e, 0xc011, 
+  0x3cdc, 0xc014, 0x3caa, 0xc016, 0x3c78, 0xc019, 0x3c45, 0xc01c, 
+  0x3c13, 0xc01f, 0x3be1, 0xc022, 0x3baf, 0xc025, 0x3b7d, 0xc029, 
+  0x3b4b, 0xc02c, 0x3b19, 0xc030, 0x3ae6, 0xc034, 0x3ab4, 0xc038, 
+  0x3a82, 0xc03c, 0x3a50, 0xc041, 0x3a1e, 0xc045, 0x39ec, 0xc04a, 
+  0x39ba, 0xc04f, 0x3988, 0xc054, 0x3956, 0xc059, 0x3924, 0xc05e, 
+  0x38f2, 0xc064, 0x38c0, 0xc069, 0x388e, 0xc06f, 0x385c, 0xc075, 
+  0x382a, 0xc07b, 0x37f9, 0xc081, 0x37c7, 0xc088, 0x3795, 0xc08e, 
+  0x3763, 0xc095, 0x3731, 0xc09c, 0x36ff, 0xc0a3, 0x36ce, 0xc0aa, 
+  0x369c, 0xc0b1, 0x366a, 0xc0b9, 0x3639, 0xc0c0, 0x3607, 0xc0c8, 
+  0x35d5, 0xc0d0, 0x35a4, 0xc0d8, 0x3572, 0xc0e0, 0x3540, 0xc0e9, 
+  0x350f, 0xc0f1, 0x34dd, 0xc0fa, 0x34ac, 0xc103, 0x347b, 0xc10c, 
+  0x3449, 0xc115, 0x3418, 0xc11e, 0x33e6, 0xc128, 0x33b5, 0xc131, 
+  0x3384, 0xc13b, 0x3352, 0xc145, 0x3321, 0xc14f, 0x32f0, 0xc159, 
+  0x32bf, 0xc163, 0x328e, 0xc16e, 0x325c, 0xc178, 0x322b, 0xc183, 
+  0x31fa, 0xc18e, 0x31c9, 0xc199, 0x3198, 0xc1a4, 0x3167, 0xc1b0, 
+  0x3136, 0xc1bb, 0x3105, 0xc1c7, 0x30d5, 0xc1d3, 0x30a4, 0xc1df, 
+  0x3073, 0xc1eb, 0x3042, 0xc1f7, 0x3012, 0xc204, 0x2fe1, 0xc210, 
+  0x2fb0, 0xc21d, 0x2f80, 0xc22a, 0x2f4f, 0xc237, 0x2f1f, 0xc244, 
+  0x2eee, 0xc251, 0x2ebe, 0xc25f, 0x2e8d, 0xc26d, 0x2e5d, 0xc27a, 
+  0x2e2d, 0xc288, 0x2dfc, 0xc296, 0x2dcc, 0xc2a5, 0x2d9c, 0xc2b3, 
+  0x2d6c, 0xc2c1, 0x2d3c, 0xc2d0, 0x2d0c, 0xc2df, 0x2cdc, 0xc2ee, 
+  0x2cac, 0xc2fd, 0x2c7c, 0xc30c, 0x2c4c, 0xc31c, 0x2c1c, 0xc32b, 
+  0x2bed, 0xc33b, 0x2bbd, 0xc34b, 0x2b8d, 0xc35b, 0x2b5e, 0xc36b, 
+  0x2b2e, 0xc37b, 0x2aff, 0xc38c, 0x2acf, 0xc39c, 0x2aa0, 0xc3ad, 
+  0x2a70, 0xc3be, 0x2a41, 0xc3cf, 0x2a12, 0xc3e0, 0x29e3, 0xc3f1, 
+  0x29b4, 0xc403, 0x2984, 0xc414, 0x2955, 0xc426, 0x2926, 0xc438, 
+  0x28f7, 0xc44a, 0x28c9, 0xc45c, 0x289a, 0xc46e, 0x286b, 0xc481, 
+  0x283c, 0xc493, 0x280e, 0xc4a6, 0x27df, 0xc4b9, 0x27b1, 0xc4cc, 
+  0x2782, 0xc4df, 0x2754, 0xc4f2, 0x2725, 0xc506, 0x26f7, 0xc51a, 
+  0x26c9, 0xc52d, 0x269b, 0xc541, 0x266d, 0xc555, 0x263f, 0xc569, 
+  0x2611, 0xc57e, 0x25e3, 0xc592, 0x25b5, 0xc5a7, 0x2587, 0xc5bb, 
+  0x2559, 0xc5d0, 0x252c, 0xc5e5, 0x24fe, 0xc5fa, 0x24d0, 0xc610, 
+  0x24a3, 0xc625, 0x2476, 0xc63b, 0x2448, 0xc650, 0x241b, 0xc666, 
+  0x23ee, 0xc67c, 0x23c1, 0xc692, 0x2394, 0xc6a8, 0x2367, 0xc6bf, 
+  0x233a, 0xc6d5, 0x230d, 0xc6ec, 0x22e0, 0xc703, 0x22b3, 0xc71a, 
+  0x2287, 0xc731, 0x225a, 0xc748, 0x222d, 0xc75f, 0x2201, 0xc777, 
+  0x21d5, 0xc78f, 0x21a8, 0xc7a6, 0x217c, 0xc7be, 0x2150, 0xc7d6, 
+  0x2124, 0xc7ee, 0x20f8, 0xc807, 0x20cc, 0xc81f, 0x20a0, 0xc838, 
+  0x2074, 0xc850, 0x2049, 0xc869, 0x201d, 0xc882, 0x1ff1, 0xc89b, 
+  0x1fc6, 0xc8b5, 0x1f9b, 0xc8ce, 0x1f6f, 0xc8e8, 0x1f44, 0xc901, 
+  0x1f19, 0xc91b, 0x1eee, 0xc935, 0x1ec3, 0xc94f, 0x1e98, 0xc969, 
+  0x1e6d, 0xc983, 0x1e42, 0xc99e, 0x1e18, 0xc9b8, 0x1ded, 0xc9d3, 
+  0x1dc3, 0xc9ee, 0x1d98, 0xca09, 0x1d6e, 0xca24, 0x1d44, 0xca3f, 
+  0x1d19, 0xca5b, 0x1cef, 0xca76, 0x1cc5, 0xca92, 0x1c9b, 0xcaad, 
+  0x1c72, 0xcac9, 0x1c48, 0xcae5, 0x1c1e, 0xcb01, 0x1bf5, 0xcb1e, 
+  0x1bcb, 0xcb3a, 0x1ba2, 0xcb56, 0x1b78, 0xcb73, 0x1b4f, 0xcb90, 
+  0x1b26, 0xcbad, 0x1afd, 0xcbca, 0x1ad4, 0xcbe7, 0x1aab, 0xcc04, 
+  0x1a82, 0xcc21, 0x1a5a, 0xcc3f, 0x1a31, 0xcc5d, 0x1a08, 0xcc7a, 
+  0x19e0, 0xcc98, 0x19b8, 0xccb6, 0x198f, 0xccd4, 0x1967, 0xccf3, 
+  0x193f, 0xcd11, 0x1917, 0xcd30, 0x18ef, 0xcd4e, 0x18c8, 0xcd6d, 
+  0x18a0, 0xcd8c, 0x1878, 0xcdab, 0x1851, 0xcdca, 0x182a, 0xcde9, 
+  0x1802, 0xce08, 0x17db, 0xce28, 0x17b4, 0xce47, 0x178d, 0xce67, 
+  0x1766, 0xce87, 0x173f, 0xcea7, 0x1719, 0xcec7, 0x16f2, 0xcee7, 
+  0x16cb, 0xcf07, 0x16a5, 0xcf28, 0x167f, 0xcf48, 0x1659, 0xcf69, 
+  0x1632, 0xcf8a, 0x160c, 0xcfab, 0x15e6, 0xcfcc, 0x15c1, 0xcfed, 
+  0x159b, 0xd00e, 0x1575, 0xd030, 0x1550, 0xd051, 0x152a, 0xd073, 
+  0x1505, 0xd094, 0x14e0, 0xd0b6, 0x14bb, 0xd0d8, 0x1496, 0xd0fa, 
+  0x1471, 0xd11c, 0x144c, 0xd13e, 0x1428, 0xd161, 0x1403, 0xd183, 
+  0x13df, 0xd1a6, 0x13ba, 0xd1c9, 0x1396, 0xd1eb, 0x1372, 0xd20e, 
+  0x134e, 0xd231, 0x132a, 0xd255, 0x1306, 0xd278, 0x12e2, 0xd29b, 
+  0x12bf, 0xd2bf, 0x129b, 0xd2e2, 0x1278, 0xd306, 0x1255, 0xd32a, 
+  0x1231, 0xd34e, 0x120e, 0xd372, 0x11eb, 0xd396, 0x11c9, 0xd3ba, 
+  0x11a6, 0xd3df, 0x1183, 0xd403, 0x1161, 0xd428, 0x113e, 0xd44c, 
+  0x111c, 0xd471, 0x10fa, 0xd496, 0x10d8, 0xd4bb, 0x10b6, 0xd4e0, 
+  0x1094, 0xd505, 0x1073, 0xd52a, 0x1051, 0xd550, 0x1030, 0xd575, 
+  0x100e, 0xd59b, 0xfed, 0xd5c1, 0xfcc, 0xd5e6, 0xfab, 0xd60c, 
+  0xf8a, 0xd632, 0xf69, 0xd659, 0xf48, 0xd67f, 0xf28, 0xd6a5, 
+  0xf07, 0xd6cb, 0xee7, 0xd6f2, 0xec7, 0xd719, 0xea7, 0xd73f, 
+  0xe87, 0xd766, 0xe67, 0xd78d, 0xe47, 0xd7b4, 0xe28, 0xd7db, 
+  0xe08, 0xd802, 0xde9, 0xd82a, 0xdca, 0xd851, 0xdab, 0xd878, 
+  0xd8c, 0xd8a0, 0xd6d, 0xd8c8, 0xd4e, 0xd8ef, 0xd30, 0xd917, 
+  0xd11, 0xd93f, 0xcf3, 0xd967, 0xcd4, 0xd98f, 0xcb6, 0xd9b8, 
+  0xc98, 0xd9e0, 0xc7a, 0xda08, 0xc5d, 0xda31, 0xc3f, 0xda5a, 
+  0xc21, 0xda82, 0xc04, 0xdaab, 0xbe7, 0xdad4, 0xbca, 0xdafd, 
+  0xbad, 0xdb26, 0xb90, 0xdb4f, 0xb73, 0xdb78, 0xb56, 0xdba2, 
+  0xb3a, 0xdbcb, 0xb1e, 0xdbf5, 0xb01, 0xdc1e, 0xae5, 0xdc48, 
+  0xac9, 0xdc72, 0xaad, 0xdc9b, 0xa92, 0xdcc5, 0xa76, 0xdcef, 
+  0xa5b, 0xdd19, 0xa3f, 0xdd44, 0xa24, 0xdd6e, 0xa09, 0xdd98, 
+  0x9ee, 0xddc3, 0x9d3, 0xdded, 0x9b8, 0xde18, 0x99e, 0xde42, 
+  0x983, 0xde6d, 0x969, 0xde98, 0x94f, 0xdec3, 0x935, 0xdeee, 
+  0x91b, 0xdf19, 0x901, 0xdf44, 0x8e8, 0xdf6f, 0x8ce, 0xdf9b, 
+  0x8b5, 0xdfc6, 0x89b, 0xdff1, 0x882, 0xe01d, 0x869, 0xe049, 
+  0x850, 0xe074, 0x838, 0xe0a0, 0x81f, 0xe0cc, 0x807, 0xe0f8, 
+  0x7ee, 0xe124, 0x7d6, 0xe150, 0x7be, 0xe17c, 0x7a6, 0xe1a8, 
+  0x78f, 0xe1d5, 0x777, 0xe201, 0x75f, 0xe22d, 0x748, 0xe25a, 
+  0x731, 0xe287, 0x71a, 0xe2b3, 0x703, 0xe2e0, 0x6ec, 0xe30d, 
+  0x6d5, 0xe33a, 0x6bf, 0xe367, 0x6a8, 0xe394, 0x692, 0xe3c1, 
+  0x67c, 0xe3ee, 0x666, 0xe41b, 0x650, 0xe448, 0x63b, 0xe476, 
+  0x625, 0xe4a3, 0x610, 0xe4d0, 0x5fa, 0xe4fe, 0x5e5, 0xe52c, 
+  0x5d0, 0xe559, 0x5bb, 0xe587, 0x5a7, 0xe5b5, 0x592, 0xe5e3, 
+  0x57e, 0xe611, 0x569, 0xe63f, 0x555, 0xe66d, 0x541, 0xe69b, 
+  0x52d, 0xe6c9, 0x51a, 0xe6f7, 0x506, 0xe725, 0x4f2, 0xe754, 
+  0x4df, 0xe782, 0x4cc, 0xe7b1, 0x4b9, 0xe7df, 0x4a6, 0xe80e, 
+  0x493, 0xe83c, 0x481, 0xe86b, 0x46e, 0xe89a, 0x45c, 0xe8c9, 
+  0x44a, 0xe8f7, 0x438, 0xe926, 0x426, 0xe955, 0x414, 0xe984, 
+  0x403, 0xe9b4, 0x3f1, 0xe9e3, 0x3e0, 0xea12, 0x3cf, 0xea41, 
+  0x3be, 0xea70, 0x3ad, 0xeaa0, 0x39c, 0xeacf, 0x38c, 0xeaff, 
+  0x37b, 0xeb2e, 0x36b, 0xeb5e, 0x35b, 0xeb8d, 0x34b, 0xebbd, 
+  0x33b, 0xebed, 0x32b, 0xec1c, 0x31c, 0xec4c, 0x30c, 0xec7c, 
+  0x2fd, 0xecac, 0x2ee, 0xecdc, 0x2df, 0xed0c, 0x2d0, 0xed3c, 
+  0x2c1, 0xed6c, 0x2b3, 0xed9c, 0x2a5, 0xedcc, 0x296, 0xedfc, 
+  0x288, 0xee2d, 0x27a, 0xee5d, 0x26d, 0xee8d, 0x25f, 0xeebe, 
+  0x251, 0xeeee, 0x244, 0xef1f, 0x237, 0xef4f, 0x22a, 0xef80, 
+  0x21d, 0xefb0, 0x210, 0xefe1, 0x204, 0xf012, 0x1f7, 0xf042, 
+  0x1eb, 0xf073, 0x1df, 0xf0a4, 0x1d3, 0xf0d5, 0x1c7, 0xf105, 
+  0x1bb, 0xf136, 0x1b0, 0xf167, 0x1a4, 0xf198, 0x199, 0xf1c9, 
+  0x18e, 0xf1fa, 0x183, 0xf22b, 0x178, 0xf25c, 0x16e, 0xf28e, 
+  0x163, 0xf2bf, 0x159, 0xf2f0, 0x14f, 0xf321, 0x145, 0xf352, 
+  0x13b, 0xf384, 0x131, 0xf3b5, 0x128, 0xf3e6, 0x11e, 0xf418, 
+  0x115, 0xf449, 0x10c, 0xf47b, 0x103, 0xf4ac, 0xfa, 0xf4dd, 
+  0xf1, 0xf50f, 0xe9, 0xf540, 0xe0, 0xf572, 0xd8, 0xf5a4, 
+  0xd0, 0xf5d5, 0xc8, 0xf607, 0xc0, 0xf639, 0xb9, 0xf66a, 
+  0xb1, 0xf69c, 0xaa, 0xf6ce, 0xa3, 0xf6ff, 0x9c, 0xf731, 
+  0x95, 0xf763, 0x8e, 0xf795, 0x88, 0xf7c7, 0x81, 0xf7f9, 
+  0x7b, 0xf82a, 0x75, 0xf85c, 0x6f, 0xf88e, 0x69, 0xf8c0, 
+  0x64, 0xf8f2, 0x5e, 0xf924, 0x59, 0xf956, 0x54, 0xf988, 
+  0x4f, 0xf9ba, 0x4a, 0xf9ec, 0x45, 0xfa1e, 0x41, 0xfa50, 
+  0x3c, 0xfa82, 0x38, 0xfab4, 0x34, 0xfae6, 0x30, 0xfb19, 
+  0x2c, 0xfb4b, 0x29, 0xfb7d, 0x25, 0xfbaf, 0x22, 0xfbe1, 
+  0x1f, 0xfc13, 0x1c, 0xfc45, 0x19, 0xfc78, 0x16, 0xfcaa, 
+  0x14, 0xfcdc, 0x11, 0xfd0e, 0xf, 0xfd40, 0xd, 0xfd73, 
+  0xb, 0xfda5, 0x9, 0xfdd7, 0x8, 0xfe09, 0x6, 0xfe3c, 
+  0x5, 0xfe6e, 0x4, 0xfea0, 0x3, 0xfed2, 0x2, 0xff05, 
+  0x1, 0xff37, 0x1, 0xff69, 0x0, 0xff9b, 0x0, 0xffce, 
+  0x0, 0x0, 0x0, 0x32, 0x0, 0x65, 0x1, 0x97, 
+  0x1, 0xc9, 0x2, 0xfb, 0x3, 0x12e, 0x4, 0x160, 
+  0x5, 0x192, 0x6, 0x1c4, 0x8, 0x1f7, 0x9, 0x229, 
+  0xb, 0x25b, 0xd, 0x28d, 0xf, 0x2c0, 0x11, 0x2f2, 
+  0x14, 0x324, 0x16, 0x356, 0x19, 0x388, 0x1c, 0x3bb, 
+  0x1f, 0x3ed, 0x22, 0x41f, 0x25, 0x451, 0x29, 0x483, 
+  0x2c, 0x4b5, 0x30, 0x4e7, 0x34, 0x51a, 0x38, 0x54c, 
+  0x3c, 0x57e, 0x41, 0x5b0, 0x45, 0x5e2, 0x4a, 0x614, 
+  0x4f, 0x646, 0x54, 0x678, 0x59, 0x6aa, 0x5e, 0x6dc, 
+  0x64, 0x70e, 0x69, 0x740, 0x6f, 0x772, 0x75, 0x7a4, 
+  0x7b, 0x7d6, 0x81, 0x807, 0x88, 0x839, 0x8e, 0x86b, 
+  0x95, 0x89d, 0x9c, 0x8cf, 0xa3, 0x901, 0xaa, 0x932, 
+  0xb1, 0x964, 0xb9, 0x996, 0xc0, 0x9c7, 0xc8, 0x9f9, 
+  0xd0, 0xa2b, 0xd8, 0xa5c, 0xe0, 0xa8e, 0xe9, 0xac0, 
+  0xf1, 0xaf1, 0xfa, 0xb23, 0x103, 0xb54, 0x10c, 0xb85, 
+  0x115, 0xbb7, 0x11e, 0xbe8, 0x128, 0xc1a, 0x131, 0xc4b, 
+  0x13b, 0xc7c, 0x145, 0xcae, 0x14f, 0xcdf, 0x159, 0xd10, 
+  0x163, 0xd41, 0x16e, 0xd72, 0x178, 0xda4, 0x183, 0xdd5, 
+  0x18e, 0xe06, 0x199, 0xe37, 0x1a4, 0xe68, 0x1b0, 0xe99, 
+  0x1bb, 0xeca, 0x1c7, 0xefb, 0x1d3, 0xf2b, 0x1df, 0xf5c, 
+  0x1eb, 0xf8d, 0x1f7, 0xfbe, 0x204, 0xfee, 0x210, 0x101f, 
+  0x21d, 0x1050, 0x22a, 0x1080, 0x237, 0x10b1, 0x244, 0x10e1, 
+  0x251, 0x1112, 0x25f, 0x1142, 0x26d, 0x1173, 0x27a, 0x11a3, 
+  0x288, 0x11d3, 0x296, 0x1204, 0x2a5, 0x1234, 0x2b3, 0x1264, 
+  0x2c1, 0x1294, 0x2d0, 0x12c4, 0x2df, 0x12f4, 0x2ee, 0x1324, 
+  0x2fd, 0x1354, 0x30c, 0x1384, 0x31c, 0x13b4, 0x32b, 0x13e4, 
+  0x33b, 0x1413, 0x34b, 0x1443, 0x35b, 0x1473, 0x36b, 0x14a2, 
+  0x37b, 0x14d2, 0x38c, 0x1501, 0x39c, 0x1531, 0x3ad, 0x1560, 
+  0x3be, 0x1590, 0x3cf, 0x15bf, 0x3e0, 0x15ee, 0x3f1, 0x161d, 
+  0x403, 0x164c, 0x414, 0x167c, 0x426, 0x16ab, 0x438, 0x16da, 
+  0x44a, 0x1709, 0x45c, 0x1737, 0x46e, 0x1766, 0x481, 0x1795, 
+  0x493, 0x17c4, 0x4a6, 0x17f2, 0x4b9, 0x1821, 0x4cc, 0x184f, 
+  0x4df, 0x187e, 0x4f2, 0x18ac, 0x506, 0x18db, 0x51a, 0x1909, 
+  0x52d, 0x1937, 0x541, 0x1965, 0x555, 0x1993, 0x569, 0x19c1, 
+  0x57e, 0x19ef, 0x592, 0x1a1d, 0x5a7, 0x1a4b, 0x5bb, 0x1a79, 
+  0x5d0, 0x1aa7, 0x5e5, 0x1ad4, 0x5fa, 0x1b02, 0x610, 0x1b30, 
+  0x625, 0x1b5d, 0x63b, 0x1b8a, 0x650, 0x1bb8, 0x666, 0x1be5, 
+  0x67c, 0x1c12, 0x692, 0x1c3f, 0x6a8, 0x1c6c, 0x6bf, 0x1c99, 
+  0x6d5, 0x1cc6, 0x6ec, 0x1cf3, 0x703, 0x1d20, 0x71a, 0x1d4d, 
+  0x731, 0x1d79, 0x748, 0x1da6, 0x75f, 0x1dd3, 0x777, 0x1dff, 
+  0x78f, 0x1e2b, 0x7a6, 0x1e58, 0x7be, 0x1e84, 0x7d6, 0x1eb0, 
+  0x7ee, 0x1edc, 0x807, 0x1f08, 0x81f, 0x1f34, 0x838, 0x1f60, 
+  0x850, 0x1f8c, 0x869, 0x1fb7, 0x882, 0x1fe3, 0x89b, 0x200f, 
+  0x8b5, 0x203a, 0x8ce, 0x2065, 0x8e8, 0x2091, 0x901, 0x20bc, 
+  0x91b, 0x20e7, 0x935, 0x2112, 0x94f, 0x213d, 0x969, 0x2168, 
+  0x983, 0x2193, 0x99e, 0x21be, 0x9b8, 0x21e8, 0x9d3, 0x2213, 
+  0x9ee, 0x223d, 0xa09, 0x2268, 0xa24, 0x2292, 0xa3f, 0x22bc, 
+  0xa5b, 0x22e7, 0xa76, 0x2311, 0xa92, 0x233b, 0xaad, 0x2365, 
+  0xac9, 0x238e, 0xae5, 0x23b8, 0xb01, 0x23e2, 0xb1e, 0x240b, 
+  0xb3a, 0x2435, 0xb56, 0x245e, 0xb73, 0x2488, 0xb90, 0x24b1, 
+  0xbad, 0x24da, 0xbca, 0x2503, 0xbe7, 0x252c, 0xc04, 0x2555, 
+  0xc21, 0x257e, 0xc3f, 0x25a6, 0xc5d, 0x25cf, 0xc7a, 0x25f8, 
+  0xc98, 0x2620, 0xcb6, 0x2648, 0xcd4, 0x2671, 0xcf3, 0x2699, 
+  0xd11, 0x26c1, 0xd30, 0x26e9, 0xd4e, 0x2711, 0xd6d, 0x2738, 
+  0xd8c, 0x2760, 0xdab, 0x2788, 0xdca, 0x27af, 0xde9, 0x27d6, 
+  0xe08, 0x27fe, 0xe28, 0x2825, 0xe47, 0x284c, 0xe67, 0x2873, 
+  0xe87, 0x289a, 0xea7, 0x28c1, 0xec7, 0x28e7, 0xee7, 0x290e, 
+  0xf07, 0x2935, 0xf28, 0x295b, 0xf48, 0x2981, 0xf69, 0x29a7, 
+  0xf8a, 0x29ce, 0xfab, 0x29f4, 0xfcc, 0x2a1a, 0xfed, 0x2a3f, 
+  0x100e, 0x2a65, 0x1030, 0x2a8b, 0x1051, 0x2ab0, 0x1073, 0x2ad6, 
+  0x1094, 0x2afb, 0x10b6, 0x2b20, 0x10d8, 0x2b45, 0x10fa, 0x2b6a, 
+  0x111c, 0x2b8f, 0x113e, 0x2bb4, 0x1161, 0x2bd8, 0x1183, 0x2bfd, 
+  0x11a6, 0x2c21, 0x11c9, 0x2c46, 0x11eb, 0x2c6a, 0x120e, 0x2c8e, 
+  0x1231, 0x2cb2, 0x1255, 0x2cd6, 0x1278, 0x2cfa, 0x129b, 0x2d1e, 
+  0x12bf, 0x2d41, 0x12e2, 0x2d65, 0x1306, 0x2d88, 0x132a, 0x2dab, 
+  0x134e, 0x2dcf, 0x1372, 0x2df2, 0x1396, 0x2e15, 0x13ba, 0x2e37, 
+  0x13df, 0x2e5a, 0x1403, 0x2e7d, 0x1428, 0x2e9f, 0x144c, 0x2ec2, 
+  0x1471, 0x2ee4, 0x1496, 0x2f06, 0x14bb, 0x2f28, 0x14e0, 0x2f4a, 
+  0x1505, 0x2f6c, 0x152a, 0x2f8d, 0x1550, 0x2faf, 0x1575, 0x2fd0, 
+  0x159b, 0x2ff2, 0x15c1, 0x3013, 0x15e6, 0x3034, 0x160c, 0x3055, 
+  0x1632, 0x3076, 0x1659, 0x3097, 0x167f, 0x30b8, 0x16a5, 0x30d8, 
+  0x16cb, 0x30f9, 0x16f2, 0x3119, 0x1719, 0x3139, 0x173f, 0x3159, 
+  0x1766, 0x3179, 0x178d, 0x3199, 0x17b4, 0x31b9, 0x17db, 0x31d8, 
+  0x1802, 0x31f8, 0x182a, 0x3217, 0x1851, 0x3236, 0x1878, 0x3255, 
+  0x18a0, 0x3274, 0x18c8, 0x3293, 0x18ef, 0x32b2, 0x1917, 0x32d0, 
+  0x193f, 0x32ef, 0x1967, 0x330d, 0x198f, 0x332c, 0x19b8, 0x334a, 
+  0x19e0, 0x3368, 0x1a08, 0x3386, 0x1a31, 0x33a3, 0x1a5a, 0x33c1, 
+  0x1a82, 0x33df, 0x1aab, 0x33fc, 0x1ad4, 0x3419, 0x1afd, 0x3436, 
+  0x1b26, 0x3453, 0x1b4f, 0x3470, 0x1b78, 0x348d, 0x1ba2, 0x34aa, 
+  0x1bcb, 0x34c6, 0x1bf5, 0x34e2, 0x1c1e, 0x34ff, 0x1c48, 0x351b, 
+  0x1c72, 0x3537, 0x1c9b, 0x3553, 0x1cc5, 0x356e, 0x1cef, 0x358a, 
+  0x1d19, 0x35a5, 0x1d44, 0x35c1, 0x1d6e, 0x35dc, 0x1d98, 0x35f7, 
+  0x1dc3, 0x3612, 0x1ded, 0x362d, 0x1e18, 0x3648, 0x1e42, 0x3662, 
+  0x1e6d, 0x367d, 0x1e98, 0x3697, 0x1ec3, 0x36b1, 0x1eee, 0x36cb, 
+  0x1f19, 0x36e5, 0x1f44, 0x36ff, 0x1f6f, 0x3718, 0x1f9b, 0x3732, 
+  0x1fc6, 0x374b, 0x1ff1, 0x3765, 0x201d, 0x377e, 0x2049, 0x3797, 
+  0x2074, 0x37b0, 0x20a0, 0x37c8, 0x20cc, 0x37e1, 0x20f8, 0x37f9, 
+  0x2124, 0x3812, 0x2150, 0x382a, 0x217c, 0x3842, 0x21a8, 0x385a, 
+  0x21d5, 0x3871, 0x2201, 0x3889, 0x222d, 0x38a1, 0x225a, 0x38b8, 
+  0x2287, 0x38cf, 0x22b3, 0x38e6, 0x22e0, 0x38fd, 0x230d, 0x3914, 
+  0x233a, 0x392b, 0x2367, 0x3941, 0x2394, 0x3958, 0x23c1, 0x396e, 
+  0x23ee, 0x3984, 0x241b, 0x399a, 0x2448, 0x39b0, 0x2476, 0x39c5, 
+  0x24a3, 0x39db, 0x24d0, 0x39f0, 0x24fe, 0x3a06, 0x252c, 0x3a1b, 
+  0x2559, 0x3a30, 0x2587, 0x3a45, 0x25b5, 0x3a59, 0x25e3, 0x3a6e, 
+  0x2611, 0x3a82, 0x263f, 0x3a97, 0x266d, 0x3aab, 0x269b, 0x3abf, 
+  0x26c9, 0x3ad3, 0x26f7, 0x3ae6, 0x2725, 0x3afa, 0x2754, 0x3b0e, 
+  0x2782, 0x3b21, 0x27b1, 0x3b34, 0x27df, 0x3b47, 0x280e, 0x3b5a, 
+  0x283c, 0x3b6d, 0x286b, 0x3b7f, 0x289a, 0x3b92, 0x28c9, 0x3ba4, 
+  0x28f7, 0x3bb6, 0x2926, 0x3bc8, 0x2955, 0x3bda, 0x2984, 0x3bec, 
+  0x29b4, 0x3bfd, 0x29e3, 0x3c0f, 0x2a12, 0x3c20, 0x2a41, 0x3c31, 
+  0x2a70, 0x3c42, 0x2aa0, 0x3c53, 0x2acf, 0x3c64, 0x2aff, 0x3c74, 
+  0x2b2e, 0x3c85, 0x2b5e, 0x3c95, 0x2b8d, 0x3ca5, 0x2bbd, 0x3cb5, 
+  0x2bed, 0x3cc5, 0x2c1c, 0x3cd5, 0x2c4c, 0x3ce4, 0x2c7c, 0x3cf4, 
+  0x2cac, 0x3d03, 0x2cdc, 0x3d12, 0x2d0c, 0x3d21, 0x2d3c, 0x3d30, 
+  0x2d6c, 0x3d3f, 0x2d9c, 0x3d4d, 0x2dcc, 0x3d5b, 0x2dfc, 0x3d6a, 
+  0x2e2d, 0x3d78, 0x2e5d, 0x3d86, 0x2e8d, 0x3d93, 0x2ebe, 0x3da1, 
+  0x2eee, 0x3daf, 0x2f1f, 0x3dbc, 0x2f4f, 0x3dc9, 0x2f80, 0x3dd6, 
+  0x2fb0, 0x3de3, 0x2fe1, 0x3df0, 0x3012, 0x3dfc, 0x3042, 0x3e09, 
+  0x3073, 0x3e15, 0x30a4, 0x3e21, 0x30d5, 0x3e2d, 0x3105, 0x3e39, 
+  0x3136, 0x3e45, 0x3167, 0x3e50, 0x3198, 0x3e5c, 0x31c9, 0x3e67, 
+  0x31fa, 0x3e72, 0x322b, 0x3e7d, 0x325c, 0x3e88, 0x328e, 0x3e92, 
+  0x32bf, 0x3e9d, 0x32f0, 0x3ea7, 0x3321, 0x3eb1, 0x3352, 0x3ebb, 
+  0x3384, 0x3ec5, 0x33b5, 0x3ecf, 0x33e6, 0x3ed8, 0x3418, 0x3ee2, 
+  0x3449, 0x3eeb, 0x347b, 0x3ef4, 0x34ac, 0x3efd, 0x34dd, 0x3f06, 
+  0x350f, 0x3f0f, 0x3540, 0x3f17, 0x3572, 0x3f20, 0x35a4, 0x3f28, 
+  0x35d5, 0x3f30, 0x3607, 0x3f38, 0x3639, 0x3f40, 0x366a, 0x3f47, 
+  0x369c, 0x3f4f, 0x36ce, 0x3f56, 0x36ff, 0x3f5d, 0x3731, 0x3f64, 
+  0x3763, 0x3f6b, 0x3795, 0x3f72, 0x37c7, 0x3f78, 0x37f9, 0x3f7f, 
+  0x382a, 0x3f85, 0x385c, 0x3f8b, 0x388e, 0x3f91, 0x38c0, 0x3f97, 
+  0x38f2, 0x3f9c, 0x3924, 0x3fa2, 0x3956, 0x3fa7, 0x3988, 0x3fac, 
+  0x39ba, 0x3fb1, 0x39ec, 0x3fb6, 0x3a1e, 0x3fbb, 0x3a50, 0x3fbf, 
+  0x3a82, 0x3fc4, 0x3ab4, 0x3fc8, 0x3ae6, 0x3fcc, 0x3b19, 0x3fd0, 
+  0x3b4b, 0x3fd4, 0x3b7d, 0x3fd7, 0x3baf, 0x3fdb, 0x3be1, 0x3fde, 
+  0x3c13, 0x3fe1, 0x3c45, 0x3fe4, 0x3c78, 0x3fe7, 0x3caa, 0x3fea, 
+  0x3cdc, 0x3fec, 0x3d0e, 0x3fef, 0x3d40, 0x3ff1, 0x3d73, 0x3ff3, 
+  0x3da5, 0x3ff5, 0x3dd7, 0x3ff7, 0x3e09, 0x3ff8, 0x3e3c, 0x3ffa, 
+  0x3e6e, 0x3ffb, 0x3ea0, 0x3ffc, 0x3ed2, 0x3ffd, 0x3f05, 0x3ffe, 
+  0x3f37, 0x3fff, 0x3f69, 0x3fff, 0x3f9b, 0x4000, 0x3fce, 0x4000 
+}; 
+ 
+/**  
+* \par 
+* Generation of real_CoefB array:  
+* \par  
+* n = 1024  
+* <pre>for (i = 0; i < n; i++)  
+*  {  
+*    pBTable[2 * i] = 0.5 * (1.0 + sin (2 * PI / (double) (2 * n) * (double) i));  
+*    pBTable[2 * i + 1] = 0.5 * (1.0 * cos (2 * PI / (double) (2 * n) * (double) i));  
+*  } </pre> 
+* \par  
+* Convert to fixed point Q15 format  
+*       round(pBTable[i] * pow(2, 15))  
+*  
+*/ 
+ 
+static const q15_t realCoefBQ15[2048] = { 
+  0x4000, 0x4000, 0x4032, 0x4000, 0x4065, 0x4000, 0x4097, 0x3fff, 
+  0x40c9, 0x3fff, 0x40fb, 0x3ffe, 0x412e, 0x3ffd, 0x4160, 0x3ffc, 
+  0x4192, 0x3ffb, 0x41c4, 0x3ffa, 0x41f7, 0x3ff8, 0x4229, 0x3ff7, 
+  0x425b, 0x3ff5, 0x428d, 0x3ff3, 0x42c0, 0x3ff1, 0x42f2, 0x3fef, 
+  0x4324, 0x3fec, 0x4356, 0x3fea, 0x4388, 0x3fe7, 0x43bb, 0x3fe4, 
+  0x43ed, 0x3fe1, 0x441f, 0x3fde, 0x4451, 0x3fdb, 0x4483, 0x3fd7, 
+  0x44b5, 0x3fd4, 0x44e7, 0x3fd0, 0x451a, 0x3fcc, 0x454c, 0x3fc8, 
+  0x457e, 0x3fc4, 0x45b0, 0x3fbf, 0x45e2, 0x3fbb, 0x4614, 0x3fb6, 
+  0x4646, 0x3fb1, 0x4678, 0x3fac, 0x46aa, 0x3fa7, 0x46dc, 0x3fa2, 
+  0x470e, 0x3f9c, 0x4740, 0x3f97, 0x4772, 0x3f91, 0x47a4, 0x3f8b, 
+  0x47d6, 0x3f85, 0x4807, 0x3f7f, 0x4839, 0x3f78, 0x486b, 0x3f72, 
+  0x489d, 0x3f6b, 0x48cf, 0x3f64, 0x4901, 0x3f5d, 0x4932, 0x3f56, 
+  0x4964, 0x3f4f, 0x4996, 0x3f47, 0x49c7, 0x3f40, 0x49f9, 0x3f38, 
+  0x4a2b, 0x3f30, 0x4a5c, 0x3f28, 0x4a8e, 0x3f20, 0x4ac0, 0x3f17, 
+  0x4af1, 0x3f0f, 0x4b23, 0x3f06, 0x4b54, 0x3efd, 0x4b85, 0x3ef4, 
+  0x4bb7, 0x3eeb, 0x4be8, 0x3ee2, 0x4c1a, 0x3ed8, 0x4c4b, 0x3ecf, 
+  0x4c7c, 0x3ec5, 0x4cae, 0x3ebb, 0x4cdf, 0x3eb1, 0x4d10, 0x3ea7, 
+  0x4d41, 0x3e9d, 0x4d72, 0x3e92, 0x4da4, 0x3e88, 0x4dd5, 0x3e7d, 
+  0x4e06, 0x3e72, 0x4e37, 0x3e67, 0x4e68, 0x3e5c, 0x4e99, 0x3e50, 
+  0x4eca, 0x3e45, 0x4efb, 0x3e39, 0x4f2b, 0x3e2d, 0x4f5c, 0x3e21, 
+  0x4f8d, 0x3e15, 0x4fbe, 0x3e09, 0x4fee, 0x3dfc, 0x501f, 0x3df0, 
+  0x5050, 0x3de3, 0x5080, 0x3dd6, 0x50b1, 0x3dc9, 0x50e1, 0x3dbc, 
+  0x5112, 0x3daf, 0x5142, 0x3da1, 0x5173, 0x3d93, 0x51a3, 0x3d86, 
+  0x51d3, 0x3d78, 0x5204, 0x3d6a, 0x5234, 0x3d5b, 0x5264, 0x3d4d, 
+  0x5294, 0x3d3f, 0x52c4, 0x3d30, 0x52f4, 0x3d21, 0x5324, 0x3d12, 
+  0x5354, 0x3d03, 0x5384, 0x3cf4, 0x53b4, 0x3ce4, 0x53e4, 0x3cd5, 
+  0x5413, 0x3cc5, 0x5443, 0x3cb5, 0x5473, 0x3ca5, 0x54a2, 0x3c95, 
+  0x54d2, 0x3c85, 0x5501, 0x3c74, 0x5531, 0x3c64, 0x5560, 0x3c53, 
+  0x5590, 0x3c42, 0x55bf, 0x3c31, 0x55ee, 0x3c20, 0x561d, 0x3c0f, 
+  0x564c, 0x3bfd, 0x567c, 0x3bec, 0x56ab, 0x3bda, 0x56da, 0x3bc8, 
+  0x5709, 0x3bb6, 0x5737, 0x3ba4, 0x5766, 0x3b92, 0x5795, 0x3b7f, 
+  0x57c4, 0x3b6d, 0x57f2, 0x3b5a, 0x5821, 0x3b47, 0x584f, 0x3b34, 
+  0x587e, 0x3b21, 0x58ac, 0x3b0e, 0x58db, 0x3afa, 0x5909, 0x3ae6, 
+  0x5937, 0x3ad3, 0x5965, 0x3abf, 0x5993, 0x3aab, 0x59c1, 0x3a97, 
+  0x59ef, 0x3a82, 0x5a1d, 0x3a6e, 0x5a4b, 0x3a59, 0x5a79, 0x3a45, 
+  0x5aa7, 0x3a30, 0x5ad4, 0x3a1b, 0x5b02, 0x3a06, 0x5b30, 0x39f0, 
+  0x5b5d, 0x39db, 0x5b8a, 0x39c5, 0x5bb8, 0x39b0, 0x5be5, 0x399a, 
+  0x5c12, 0x3984, 0x5c3f, 0x396e, 0x5c6c, 0x3958, 0x5c99, 0x3941, 
+  0x5cc6, 0x392b, 0x5cf3, 0x3914, 0x5d20, 0x38fd, 0x5d4d, 0x38e6, 
+  0x5d79, 0x38cf, 0x5da6, 0x38b8, 0x5dd3, 0x38a1, 0x5dff, 0x3889, 
+  0x5e2b, 0x3871, 0x5e58, 0x385a, 0x5e84, 0x3842, 0x5eb0, 0x382a, 
+  0x5edc, 0x3812, 0x5f08, 0x37f9, 0x5f34, 0x37e1, 0x5f60, 0x37c8, 
+  0x5f8c, 0x37b0, 0x5fb7, 0x3797, 0x5fe3, 0x377e, 0x600f, 0x3765, 
+  0x603a, 0x374b, 0x6065, 0x3732, 0x6091, 0x3718, 0x60bc, 0x36ff, 
+  0x60e7, 0x36e5, 0x6112, 0x36cb, 0x613d, 0x36b1, 0x6168, 0x3697, 
+  0x6193, 0x367d, 0x61be, 0x3662, 0x61e8, 0x3648, 0x6213, 0x362d, 
+  0x623d, 0x3612, 0x6268, 0x35f7, 0x6292, 0x35dc, 0x62bc, 0x35c1, 
+  0x62e7, 0x35a5, 0x6311, 0x358a, 0x633b, 0x356e, 0x6365, 0x3553, 
+  0x638e, 0x3537, 0x63b8, 0x351b, 0x63e2, 0x34ff, 0x640b, 0x34e2, 
+  0x6435, 0x34c6, 0x645e, 0x34aa, 0x6488, 0x348d, 0x64b1, 0x3470, 
+  0x64da, 0x3453, 0x6503, 0x3436, 0x652c, 0x3419, 0x6555, 0x33fc, 
+  0x657e, 0x33df, 0x65a6, 0x33c1, 0x65cf, 0x33a3, 0x65f8, 0x3386, 
+  0x6620, 0x3368, 0x6648, 0x334a, 0x6671, 0x332c, 0x6699, 0x330d, 
+  0x66c1, 0x32ef, 0x66e9, 0x32d0, 0x6711, 0x32b2, 0x6738, 0x3293, 
+  0x6760, 0x3274, 0x6788, 0x3255, 0x67af, 0x3236, 0x67d6, 0x3217, 
+  0x67fe, 0x31f8, 0x6825, 0x31d8, 0x684c, 0x31b9, 0x6873, 0x3199, 
+  0x689a, 0x3179, 0x68c1, 0x3159, 0x68e7, 0x3139, 0x690e, 0x3119, 
+  0x6935, 0x30f9, 0x695b, 0x30d8, 0x6981, 0x30b8, 0x69a7, 0x3097, 
+  0x69ce, 0x3076, 0x69f4, 0x3055, 0x6a1a, 0x3034, 0x6a3f, 0x3013, 
+  0x6a65, 0x2ff2, 0x6a8b, 0x2fd0, 0x6ab0, 0x2faf, 0x6ad6, 0x2f8d, 
+  0x6afb, 0x2f6c, 0x6b20, 0x2f4a, 0x6b45, 0x2f28, 0x6b6a, 0x2f06, 
+  0x6b8f, 0x2ee4, 0x6bb4, 0x2ec2, 0x6bd8, 0x2e9f, 0x6bfd, 0x2e7d, 
+  0x6c21, 0x2e5a, 0x6c46, 0x2e37, 0x6c6a, 0x2e15, 0x6c8e, 0x2df2, 
+  0x6cb2, 0x2dcf, 0x6cd6, 0x2dab, 0x6cfa, 0x2d88, 0x6d1e, 0x2d65, 
+  0x6d41, 0x2d41, 0x6d65, 0x2d1e, 0x6d88, 0x2cfa, 0x6dab, 0x2cd6, 
+  0x6dcf, 0x2cb2, 0x6df2, 0x2c8e, 0x6e15, 0x2c6a, 0x6e37, 0x2c46, 
+  0x6e5a, 0x2c21, 0x6e7d, 0x2bfd, 0x6e9f, 0x2bd8, 0x6ec2, 0x2bb4, 
+  0x6ee4, 0x2b8f, 0x6f06, 0x2b6a, 0x6f28, 0x2b45, 0x6f4a, 0x2b20, 
+  0x6f6c, 0x2afb, 0x6f8d, 0x2ad6, 0x6faf, 0x2ab0, 0x6fd0, 0x2a8b, 
+  0x6ff2, 0x2a65, 0x7013, 0x2a3f, 0x7034, 0x2a1a, 0x7055, 0x29f4, 
+  0x7076, 0x29ce, 0x7097, 0x29a7, 0x70b8, 0x2981, 0x70d8, 0x295b, 
+  0x70f9, 0x2935, 0x7119, 0x290e, 0x7139, 0x28e7, 0x7159, 0x28c1, 
+  0x7179, 0x289a, 0x7199, 0x2873, 0x71b9, 0x284c, 0x71d8, 0x2825, 
+  0x71f8, 0x27fe, 0x7217, 0x27d6, 0x7236, 0x27af, 0x7255, 0x2788, 
+  0x7274, 0x2760, 0x7293, 0x2738, 0x72b2, 0x2711, 0x72d0, 0x26e9, 
+  0x72ef, 0x26c1, 0x730d, 0x2699, 0x732c, 0x2671, 0x734a, 0x2648, 
+  0x7368, 0x2620, 0x7386, 0x25f8, 0x73a3, 0x25cf, 0x73c1, 0x25a6, 
+  0x73df, 0x257e, 0x73fc, 0x2555, 0x7419, 0x252c, 0x7436, 0x2503, 
+  0x7453, 0x24da, 0x7470, 0x24b1, 0x748d, 0x2488, 0x74aa, 0x245e, 
+  0x74c6, 0x2435, 0x74e2, 0x240b, 0x74ff, 0x23e2, 0x751b, 0x23b8, 
+  0x7537, 0x238e, 0x7553, 0x2365, 0x756e, 0x233b, 0x758a, 0x2311, 
+  0x75a5, 0x22e7, 0x75c1, 0x22bc, 0x75dc, 0x2292, 0x75f7, 0x2268, 
+  0x7612, 0x223d, 0x762d, 0x2213, 0x7648, 0x21e8, 0x7662, 0x21be, 
+  0x767d, 0x2193, 0x7697, 0x2168, 0x76b1, 0x213d, 0x76cb, 0x2112, 
+  0x76e5, 0x20e7, 0x76ff, 0x20bc, 0x7718, 0x2091, 0x7732, 0x2065, 
+  0x774b, 0x203a, 0x7765, 0x200f, 0x777e, 0x1fe3, 0x7797, 0x1fb7, 
+  0x77b0, 0x1f8c, 0x77c8, 0x1f60, 0x77e1, 0x1f34, 0x77f9, 0x1f08, 
+  0x7812, 0x1edc, 0x782a, 0x1eb0, 0x7842, 0x1e84, 0x785a, 0x1e58, 
+  0x7871, 0x1e2b, 0x7889, 0x1dff, 0x78a1, 0x1dd3, 0x78b8, 0x1da6, 
+  0x78cf, 0x1d79, 0x78e6, 0x1d4d, 0x78fd, 0x1d20, 0x7914, 0x1cf3, 
+  0x792b, 0x1cc6, 0x7941, 0x1c99, 0x7958, 0x1c6c, 0x796e, 0x1c3f, 
+  0x7984, 0x1c12, 0x799a, 0x1be5, 0x79b0, 0x1bb8, 0x79c5, 0x1b8a, 
+  0x79db, 0x1b5d, 0x79f0, 0x1b30, 0x7a06, 0x1b02, 0x7a1b, 0x1ad4, 
+  0x7a30, 0x1aa7, 0x7a45, 0x1a79, 0x7a59, 0x1a4b, 0x7a6e, 0x1a1d, 
+  0x7a82, 0x19ef, 0x7a97, 0x19c1, 0x7aab, 0x1993, 0x7abf, 0x1965, 
+  0x7ad3, 0x1937, 0x7ae6, 0x1909, 0x7afa, 0x18db, 0x7b0e, 0x18ac, 
+  0x7b21, 0x187e, 0x7b34, 0x184f, 0x7b47, 0x1821, 0x7b5a, 0x17f2, 
+  0x7b6d, 0x17c4, 0x7b7f, 0x1795, 0x7b92, 0x1766, 0x7ba4, 0x1737, 
+  0x7bb6, 0x1709, 0x7bc8, 0x16da, 0x7bda, 0x16ab, 0x7bec, 0x167c, 
+  0x7bfd, 0x164c, 0x7c0f, 0x161d, 0x7c20, 0x15ee, 0x7c31, 0x15bf, 
+  0x7c42, 0x1590, 0x7c53, 0x1560, 0x7c64, 0x1531, 0x7c74, 0x1501, 
+  0x7c85, 0x14d2, 0x7c95, 0x14a2, 0x7ca5, 0x1473, 0x7cb5, 0x1443, 
+  0x7cc5, 0x1413, 0x7cd5, 0x13e4, 0x7ce4, 0x13b4, 0x7cf4, 0x1384, 
+  0x7d03, 0x1354, 0x7d12, 0x1324, 0x7d21, 0x12f4, 0x7d30, 0x12c4, 
+  0x7d3f, 0x1294, 0x7d4d, 0x1264, 0x7d5b, 0x1234, 0x7d6a, 0x1204, 
+  0x7d78, 0x11d3, 0x7d86, 0x11a3, 0x7d93, 0x1173, 0x7da1, 0x1142, 
+  0x7daf, 0x1112, 0x7dbc, 0x10e1, 0x7dc9, 0x10b1, 0x7dd6, 0x1080, 
+  0x7de3, 0x1050, 0x7df0, 0x101f, 0x7dfc, 0xfee, 0x7e09, 0xfbe, 
+  0x7e15, 0xf8d, 0x7e21, 0xf5c, 0x7e2d, 0xf2b, 0x7e39, 0xefb, 
+  0x7e45, 0xeca, 0x7e50, 0xe99, 0x7e5c, 0xe68, 0x7e67, 0xe37, 
+  0x7e72, 0xe06, 0x7e7d, 0xdd5, 0x7e88, 0xda4, 0x7e92, 0xd72, 
+  0x7e9d, 0xd41, 0x7ea7, 0xd10, 0x7eb1, 0xcdf, 0x7ebb, 0xcae, 
+  0x7ec5, 0xc7c, 0x7ecf, 0xc4b, 0x7ed8, 0xc1a, 0x7ee2, 0xbe8, 
+  0x7eeb, 0xbb7, 0x7ef4, 0xb85, 0x7efd, 0xb54, 0x7f06, 0xb23, 
+  0x7f0f, 0xaf1, 0x7f17, 0xac0, 0x7f20, 0xa8e, 0x7f28, 0xa5c, 
+  0x7f30, 0xa2b, 0x7f38, 0x9f9, 0x7f40, 0x9c7, 0x7f47, 0x996, 
+  0x7f4f, 0x964, 0x7f56, 0x932, 0x7f5d, 0x901, 0x7f64, 0x8cf, 
+  0x7f6b, 0x89d, 0x7f72, 0x86b, 0x7f78, 0x839, 0x7f7f, 0x807, 
+  0x7f85, 0x7d6, 0x7f8b, 0x7a4, 0x7f91, 0x772, 0x7f97, 0x740, 
+  0x7f9c, 0x70e, 0x7fa2, 0x6dc, 0x7fa7, 0x6aa, 0x7fac, 0x678, 
+  0x7fb1, 0x646, 0x7fb6, 0x614, 0x7fbb, 0x5e2, 0x7fbf, 0x5b0, 
+  0x7fc4, 0x57e, 0x7fc8, 0x54c, 0x7fcc, 0x51a, 0x7fd0, 0x4e7, 
+  0x7fd4, 0x4b5, 0x7fd7, 0x483, 0x7fdb, 0x451, 0x7fde, 0x41f, 
+  0x7fe1, 0x3ed, 0x7fe4, 0x3bb, 0x7fe7, 0x388, 0x7fea, 0x356, 
+  0x7fec, 0x324, 0x7fef, 0x2f2, 0x7ff1, 0x2c0, 0x7ff3, 0x28d, 
+  0x7ff5, 0x25b, 0x7ff7, 0x229, 0x7ff8, 0x1f7, 0x7ffa, 0x1c4, 
+  0x7ffb, 0x192, 0x7ffc, 0x160, 0x7ffd, 0x12e, 0x7ffe, 0xfb, 
+  0x7fff, 0xc9, 0x7fff, 0x97, 0x7fff, 0x65, 0x7fff, 0x32, 
+  0x7fff, 0x0, 0x7fff, 0xffce, 0x7fff, 0xff9b, 0x7fff, 0xff69, 
+  0x7fff, 0xff37, 0x7ffe, 0xff05, 0x7ffd, 0xfed2, 0x7ffc, 0xfea0, 
+  0x7ffb, 0xfe6e, 0x7ffa, 0xfe3c, 0x7ff8, 0xfe09, 0x7ff7, 0xfdd7, 
+  0x7ff5, 0xfda5, 0x7ff3, 0xfd73, 0x7ff1, 0xfd40, 0x7fef, 0xfd0e, 
+  0x7fec, 0xfcdc, 0x7fea, 0xfcaa, 0x7fe7, 0xfc78, 0x7fe4, 0xfc45, 
+  0x7fe1, 0xfc13, 0x7fde, 0xfbe1, 0x7fdb, 0xfbaf, 0x7fd7, 0xfb7d, 
+  0x7fd4, 0xfb4b, 0x7fd0, 0xfb19, 0x7fcc, 0xfae6, 0x7fc8, 0xfab4, 
+  0x7fc4, 0xfa82, 0x7fbf, 0xfa50, 0x7fbb, 0xfa1e, 0x7fb6, 0xf9ec, 
+  0x7fb1, 0xf9ba, 0x7fac, 0xf988, 0x7fa7, 0xf956, 0x7fa2, 0xf924, 
+  0x7f9c, 0xf8f2, 0x7f97, 0xf8c0, 0x7f91, 0xf88e, 0x7f8b, 0xf85c, 
+  0x7f85, 0xf82a, 0x7f7f, 0xf7f9, 0x7f78, 0xf7c7, 0x7f72, 0xf795, 
+  0x7f6b, 0xf763, 0x7f64, 0xf731, 0x7f5d, 0xf6ff, 0x7f56, 0xf6ce, 
+  0x7f4f, 0xf69c, 0x7f47, 0xf66a, 0x7f40, 0xf639, 0x7f38, 0xf607, 
+  0x7f30, 0xf5d5, 0x7f28, 0xf5a4, 0x7f20, 0xf572, 0x7f17, 0xf540, 
+  0x7f0f, 0xf50f, 0x7f06, 0xf4dd, 0x7efd, 0xf4ac, 0x7ef4, 0xf47b, 
+  0x7eeb, 0xf449, 0x7ee2, 0xf418, 0x7ed8, 0xf3e6, 0x7ecf, 0xf3b5, 
+  0x7ec5, 0xf384, 0x7ebb, 0xf352, 0x7eb1, 0xf321, 0x7ea7, 0xf2f0, 
+  0x7e9d, 0xf2bf, 0x7e92, 0xf28e, 0x7e88, 0xf25c, 0x7e7d, 0xf22b, 
+  0x7e72, 0xf1fa, 0x7e67, 0xf1c9, 0x7e5c, 0xf198, 0x7e50, 0xf167, 
+  0x7e45, 0xf136, 0x7e39, 0xf105, 0x7e2d, 0xf0d5, 0x7e21, 0xf0a4, 
+  0x7e15, 0xf073, 0x7e09, 0xf042, 0x7dfc, 0xf012, 0x7df0, 0xefe1, 
+  0x7de3, 0xefb0, 0x7dd6, 0xef80, 0x7dc9, 0xef4f, 0x7dbc, 0xef1f, 
+  0x7daf, 0xeeee, 0x7da1, 0xeebe, 0x7d93, 0xee8d, 0x7d86, 0xee5d, 
+  0x7d78, 0xee2d, 0x7d6a, 0xedfc, 0x7d5b, 0xedcc, 0x7d4d, 0xed9c, 
+  0x7d3f, 0xed6c, 0x7d30, 0xed3c, 0x7d21, 0xed0c, 0x7d12, 0xecdc, 
+  0x7d03, 0xecac, 0x7cf4, 0xec7c, 0x7ce4, 0xec4c, 0x7cd5, 0xec1c, 
+  0x7cc5, 0xebed, 0x7cb5, 0xebbd, 0x7ca5, 0xeb8d, 0x7c95, 0xeb5e, 
+  0x7c85, 0xeb2e, 0x7c74, 0xeaff, 0x7c64, 0xeacf, 0x7c53, 0xeaa0, 
+  0x7c42, 0xea70, 0x7c31, 0xea41, 0x7c20, 0xea12, 0x7c0f, 0xe9e3, 
+  0x7bfd, 0xe9b4, 0x7bec, 0xe984, 0x7bda, 0xe955, 0x7bc8, 0xe926, 
+  0x7bb6, 0xe8f7, 0x7ba4, 0xe8c9, 0x7b92, 0xe89a, 0x7b7f, 0xe86b, 
+  0x7b6d, 0xe83c, 0x7b5a, 0xe80e, 0x7b47, 0xe7df, 0x7b34, 0xe7b1, 
+  0x7b21, 0xe782, 0x7b0e, 0xe754, 0x7afa, 0xe725, 0x7ae6, 0xe6f7, 
+  0x7ad3, 0xe6c9, 0x7abf, 0xe69b, 0x7aab, 0xe66d, 0x7a97, 0xe63f, 
+  0x7a82, 0xe611, 0x7a6e, 0xe5e3, 0x7a59, 0xe5b5, 0x7a45, 0xe587, 
+  0x7a30, 0xe559, 0x7a1b, 0xe52c, 0x7a06, 0xe4fe, 0x79f0, 0xe4d0, 
+  0x79db, 0xe4a3, 0x79c5, 0xe476, 0x79b0, 0xe448, 0x799a, 0xe41b, 
+  0x7984, 0xe3ee, 0x796e, 0xe3c1, 0x7958, 0xe394, 0x7941, 0xe367, 
+  0x792b, 0xe33a, 0x7914, 0xe30d, 0x78fd, 0xe2e0, 0x78e6, 0xe2b3, 
+  0x78cf, 0xe287, 0x78b8, 0xe25a, 0x78a1, 0xe22d, 0x7889, 0xe201, 
+  0x7871, 0xe1d5, 0x785a, 0xe1a8, 0x7842, 0xe17c, 0x782a, 0xe150, 
+  0x7812, 0xe124, 0x77f9, 0xe0f8, 0x77e1, 0xe0cc, 0x77c8, 0xe0a0, 
+  0x77b0, 0xe074, 0x7797, 0xe049, 0x777e, 0xe01d, 0x7765, 0xdff1, 
+  0x774b, 0xdfc6, 0x7732, 0xdf9b, 0x7718, 0xdf6f, 0x76ff, 0xdf44, 
+  0x76e5, 0xdf19, 0x76cb, 0xdeee, 0x76b1, 0xdec3, 0x7697, 0xde98, 
+  0x767d, 0xde6d, 0x7662, 0xde42, 0x7648, 0xde18, 0x762d, 0xdded, 
+  0x7612, 0xddc3, 0x75f7, 0xdd98, 0x75dc, 0xdd6e, 0x75c1, 0xdd44, 
+  0x75a5, 0xdd19, 0x758a, 0xdcef, 0x756e, 0xdcc5, 0x7553, 0xdc9b, 
+  0x7537, 0xdc72, 0x751b, 0xdc48, 0x74ff, 0xdc1e, 0x74e2, 0xdbf5, 
+  0x74c6, 0xdbcb, 0x74aa, 0xdba2, 0x748d, 0xdb78, 0x7470, 0xdb4f, 
+  0x7453, 0xdb26, 0x7436, 0xdafd, 0x7419, 0xdad4, 0x73fc, 0xdaab, 
+  0x73df, 0xda82, 0x73c1, 0xda5a, 0x73a3, 0xda31, 0x7386, 0xda08, 
+  0x7368, 0xd9e0, 0x734a, 0xd9b8, 0x732c, 0xd98f, 0x730d, 0xd967, 
+  0x72ef, 0xd93f, 0x72d0, 0xd917, 0x72b2, 0xd8ef, 0x7293, 0xd8c8, 
+  0x7274, 0xd8a0, 0x7255, 0xd878, 0x7236, 0xd851, 0x7217, 0xd82a, 
+  0x71f8, 0xd802, 0x71d8, 0xd7db, 0x71b9, 0xd7b4, 0x7199, 0xd78d, 
+  0x7179, 0xd766, 0x7159, 0xd73f, 0x7139, 0xd719, 0x7119, 0xd6f2, 
+  0x70f9, 0xd6cb, 0x70d8, 0xd6a5, 0x70b8, 0xd67f, 0x7097, 0xd659, 
+  0x7076, 0xd632, 0x7055, 0xd60c, 0x7034, 0xd5e6, 0x7013, 0xd5c1, 
+  0x6ff2, 0xd59b, 0x6fd0, 0xd575, 0x6faf, 0xd550, 0x6f8d, 0xd52a, 
+  0x6f6c, 0xd505, 0x6f4a, 0xd4e0, 0x6f28, 0xd4bb, 0x6f06, 0xd496, 
+  0x6ee4, 0xd471, 0x6ec2, 0xd44c, 0x6e9f, 0xd428, 0x6e7d, 0xd403, 
+  0x6e5a, 0xd3df, 0x6e37, 0xd3ba, 0x6e15, 0xd396, 0x6df2, 0xd372, 
+  0x6dcf, 0xd34e, 0x6dab, 0xd32a, 0x6d88, 0xd306, 0x6d65, 0xd2e2, 
+  0x6d41, 0xd2bf, 0x6d1e, 0xd29b, 0x6cfa, 0xd278, 0x6cd6, 0xd255, 
+  0x6cb2, 0xd231, 0x6c8e, 0xd20e, 0x6c6a, 0xd1eb, 0x6c46, 0xd1c9, 
+  0x6c21, 0xd1a6, 0x6bfd, 0xd183, 0x6bd8, 0xd161, 0x6bb4, 0xd13e, 
+  0x6b8f, 0xd11c, 0x6b6a, 0xd0fa, 0x6b45, 0xd0d8, 0x6b20, 0xd0b6, 
+  0x6afb, 0xd094, 0x6ad6, 0xd073, 0x6ab0, 0xd051, 0x6a8b, 0xd030, 
+  0x6a65, 0xd00e, 0x6a3f, 0xcfed, 0x6a1a, 0xcfcc, 0x69f4, 0xcfab, 
+  0x69ce, 0xcf8a, 0x69a7, 0xcf69, 0x6981, 0xcf48, 0x695b, 0xcf28, 
+  0x6935, 0xcf07, 0x690e, 0xcee7, 0x68e7, 0xcec7, 0x68c1, 0xcea7, 
+  0x689a, 0xce87, 0x6873, 0xce67, 0x684c, 0xce47, 0x6825, 0xce28, 
+  0x67fe, 0xce08, 0x67d6, 0xcde9, 0x67af, 0xcdca, 0x6788, 0xcdab, 
+  0x6760, 0xcd8c, 0x6738, 0xcd6d, 0x6711, 0xcd4e, 0x66e9, 0xcd30, 
+  0x66c1, 0xcd11, 0x6699, 0xccf3, 0x6671, 0xccd4, 0x6648, 0xccb6, 
+  0x6620, 0xcc98, 0x65f8, 0xcc7a, 0x65cf, 0xcc5d, 0x65a6, 0xcc3f, 
+  0x657e, 0xcc21, 0x6555, 0xcc04, 0x652c, 0xcbe7, 0x6503, 0xcbca, 
+  0x64da, 0xcbad, 0x64b1, 0xcb90, 0x6488, 0xcb73, 0x645e, 0xcb56, 
+  0x6435, 0xcb3a, 0x640b, 0xcb1e, 0x63e2, 0xcb01, 0x63b8, 0xcae5, 
+  0x638e, 0xcac9, 0x6365, 0xcaad, 0x633b, 0xca92, 0x6311, 0xca76, 
+  0x62e7, 0xca5b, 0x62bc, 0xca3f, 0x6292, 0xca24, 0x6268, 0xca09, 
+  0x623d, 0xc9ee, 0x6213, 0xc9d3, 0x61e8, 0xc9b8, 0x61be, 0xc99e, 
+  0x6193, 0xc983, 0x6168, 0xc969, 0x613d, 0xc94f, 0x6112, 0xc935, 
+  0x60e7, 0xc91b, 0x60bc, 0xc901, 0x6091, 0xc8e8, 0x6065, 0xc8ce, 
+  0x603a, 0xc8b5, 0x600f, 0xc89b, 0x5fe3, 0xc882, 0x5fb7, 0xc869, 
+  0x5f8c, 0xc850, 0x5f60, 0xc838, 0x5f34, 0xc81f, 0x5f08, 0xc807, 
+  0x5edc, 0xc7ee, 0x5eb0, 0xc7d6, 0x5e84, 0xc7be, 0x5e58, 0xc7a6, 
+  0x5e2b, 0xc78f, 0x5dff, 0xc777, 0x5dd3, 0xc75f, 0x5da6, 0xc748, 
+  0x5d79, 0xc731, 0x5d4d, 0xc71a, 0x5d20, 0xc703, 0x5cf3, 0xc6ec, 
+  0x5cc6, 0xc6d5, 0x5c99, 0xc6bf, 0x5c6c, 0xc6a8, 0x5c3f, 0xc692, 
+  0x5c12, 0xc67c, 0x5be5, 0xc666, 0x5bb8, 0xc650, 0x5b8a, 0xc63b, 
+  0x5b5d, 0xc625, 0x5b30, 0xc610, 0x5b02, 0xc5fa, 0x5ad4, 0xc5e5, 
+  0x5aa7, 0xc5d0, 0x5a79, 0xc5bb, 0x5a4b, 0xc5a7, 0x5a1d, 0xc592, 
+  0x59ef, 0xc57e, 0x59c1, 0xc569, 0x5993, 0xc555, 0x5965, 0xc541, 
+  0x5937, 0xc52d, 0x5909, 0xc51a, 0x58db, 0xc506, 0x58ac, 0xc4f2, 
+  0x587e, 0xc4df, 0x584f, 0xc4cc, 0x5821, 0xc4b9, 0x57f2, 0xc4a6, 
+  0x57c4, 0xc493, 0x5795, 0xc481, 0x5766, 0xc46e, 0x5737, 0xc45c, 
+  0x5709, 0xc44a, 0x56da, 0xc438, 0x56ab, 0xc426, 0x567c, 0xc414, 
+  0x564c, 0xc403, 0x561d, 0xc3f1, 0x55ee, 0xc3e0, 0x55bf, 0xc3cf, 
+  0x5590, 0xc3be, 0x5560, 0xc3ad, 0x5531, 0xc39c, 0x5501, 0xc38c, 
+  0x54d2, 0xc37b, 0x54a2, 0xc36b, 0x5473, 0xc35b, 0x5443, 0xc34b, 
+  0x5413, 0xc33b, 0x53e4, 0xc32b, 0x53b4, 0xc31c, 0x5384, 0xc30c, 
+  0x5354, 0xc2fd, 0x5324, 0xc2ee, 0x52f4, 0xc2df, 0x52c4, 0xc2d0, 
+  0x5294, 0xc2c1, 0x5264, 0xc2b3, 0x5234, 0xc2a5, 0x5204, 0xc296, 
+  0x51d3, 0xc288, 0x51a3, 0xc27a, 0x5173, 0xc26d, 0x5142, 0xc25f, 
+  0x5112, 0xc251, 0x50e1, 0xc244, 0x50b1, 0xc237, 0x5080, 0xc22a, 
+  0x5050, 0xc21d, 0x501f, 0xc210, 0x4fee, 0xc204, 0x4fbe, 0xc1f7, 
+  0x4f8d, 0xc1eb, 0x4f5c, 0xc1df, 0x4f2b, 0xc1d3, 0x4efb, 0xc1c7, 
+  0x4eca, 0xc1bb, 0x4e99, 0xc1b0, 0x4e68, 0xc1a4, 0x4e37, 0xc199, 
+  0x4e06, 0xc18e, 0x4dd5, 0xc183, 0x4da4, 0xc178, 0x4d72, 0xc16e, 
+  0x4d41, 0xc163, 0x4d10, 0xc159, 0x4cdf, 0xc14f, 0x4cae, 0xc145, 
+  0x4c7c, 0xc13b, 0x4c4b, 0xc131, 0x4c1a, 0xc128, 0x4be8, 0xc11e, 
+  0x4bb7, 0xc115, 0x4b85, 0xc10c, 0x4b54, 0xc103, 0x4b23, 0xc0fa, 
+  0x4af1, 0xc0f1, 0x4ac0, 0xc0e9, 0x4a8e, 0xc0e0, 0x4a5c, 0xc0d8, 
+  0x4a2b, 0xc0d0, 0x49f9, 0xc0c8, 0x49c7, 0xc0c0, 0x4996, 0xc0b9, 
+  0x4964, 0xc0b1, 0x4932, 0xc0aa, 0x4901, 0xc0a3, 0x48cf, 0xc09c, 
+  0x489d, 0xc095, 0x486b, 0xc08e, 0x4839, 0xc088, 0x4807, 0xc081, 
+  0x47d6, 0xc07b, 0x47a4, 0xc075, 0x4772, 0xc06f, 0x4740, 0xc069, 
+  0x470e, 0xc064, 0x46dc, 0xc05e, 0x46aa, 0xc059, 0x4678, 0xc054, 
+  0x4646, 0xc04f, 0x4614, 0xc04a, 0x45e2, 0xc045, 0x45b0, 0xc041, 
+  0x457e, 0xc03c, 0x454c, 0xc038, 0x451a, 0xc034, 0x44e7, 0xc030, 
+  0x44b5, 0xc02c, 0x4483, 0xc029, 0x4451, 0xc025, 0x441f, 0xc022, 
+  0x43ed, 0xc01f, 0x43bb, 0xc01c, 0x4388, 0xc019, 0x4356, 0xc016, 
+  0x4324, 0xc014, 0x42f2, 0xc011, 0x42c0, 0xc00f, 0x428d, 0xc00d, 
+  0x425b, 0xc00b, 0x4229, 0xc009, 0x41f7, 0xc008, 0x41c4, 0xc006, 
+  0x4192, 0xc005, 0x4160, 0xc004, 0x412e, 0xc003, 0x40fb, 0xc002, 
+  0x40c9, 0xc001, 0x4097, 0xc001, 0x4065, 0xc000, 0x4032, 0xc000 
+}; 
+ 
+/**  
+* @brief  Initialization function for the Q15 RFFT/RIFFT. 
+* @param[in, out] *S             points to an instance of the Q15 RFFT/RIFFT structure. 
+* @param[in]      *S_CFFT        points to an instance of the Q15 CFFT/CIFFT structure. 
+* @param[in]      fftLenReal     length of the FFT. 
+* @param[in]      ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. 
+* @param[in]      bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. 
+* @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value. 
+*  
+* \par Description: 
+* \par 
+* The parameter <code>fftLenReal</code>	Specifies length of RFFT/RIFFT Process. Supported FFT Lengths are 128, 512, 2048.  
+* \par  
+* The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed.  
+* Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated.  
+* \par  
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.  
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. 
+* \par  
+* This function also initializes Twiddle factor table.   
+*/ 
+ 
+arm_status arm_rfft_init_q15( 
+  arm_rfft_instance_q15 * S, 
+  arm_cfft_radix4_instance_q15 * S_CFFT, 
+  uint32_t fftLenReal, 
+  uint32_t ifftFlagR, 
+  uint32_t bitReverseFlag) 
+{ 
+ 
+  /*  Initialise the default arm status */ 
+  arm_status status = ARM_MATH_SUCCESS; 
+ 
+  /*  Initialize the Real FFT length */ 
+  S->fftLenReal = (uint16_t) fftLenReal; 
+ 
+  /*  Initialize the Complex FFT length */ 
+  S->fftLenBy2 = (uint16_t) fftLenReal / 2u; 
+ 
+  /*  Initialize the Twiddle coefficientA pointer */ 
+  S->pTwiddleAReal = (q15_t *) realCoefAQ15; 
+ 
+  /*  Initialize the Twiddle coefficientB pointer */ 
+  S->pTwiddleBReal = (q15_t *) realCoefBQ15; 
+ 
+  /*  Initialize the Flag for selection of RFFT or RIFFT */ 
+  S->ifftFlagR = (uint8_t) ifftFlagR; 
+ 
+  /*  Initialize the Flag for calculation Bit reversal or not */ 
+  S->bitReverseFlagR = (uint8_t) bitReverseFlag; 
+ 
+  /*  Initialization of coef modifier depending on the FFT length */ 
+  switch (S->fftLenReal) 
+  { 
+  case 2048u: 
+    S->twidCoefRModifier = 1u; 
+    break; 
+  case 512u: 
+    S->twidCoefRModifier = 4u; 
+    break; 
+  case 128u: 
+    S->twidCoefRModifier = 16u; 
+    break; 
+  default: 
+    /*  Reporting argument error if rfftSize is not valid value */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+    break; 
+  } 
+ 
+  /* Init Complex FFT Instance */ 
+  S->pCfft = S_CFFT; 
+ 
+  if(S->ifftFlagR) 
+  { 
+    /* Initializes the CIFFT Module for fftLenreal/2 length */ 
+    arm_cfft_radix4_init_q15(S->pCfft, S->fftLenBy2, 1u, 1u); 
+  } 
+  else 
+  { 
+    /* Initializes the CFFT Module for fftLenreal/2 length */ 
+    arm_cfft_radix4_init_q15(S->pCfft, S->fftLenBy2, 0u, 1u); 
+  } 
+ 
+  /* return the status of RFFT Init function */ 
+  return (status); 
+ 
+} 
+ 
+  /**  
+   * @} end of RFFT_RIFFT group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_rfft_init_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,1192 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_rfft_init_q31.c  
+*  
+* Description:	RFFT & RIFFT Q31 initialisation function  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/**  
+ * @ingroup groupTransforms  
+ */ 
+ 
+/**  
+ * @addtogroup RFFT_RIFFT  
+ * @{  
+ */ 
+ 
+/**  
+* \par  
+* Generation floating point realCoefAQ31 array:  
+* \par  
+* n = 1024  
+* <pre>for (i = 0; i < n; i++)  
+* {  
+*    pATable[2 * i] = 0.5 * (1.0 - sin (2 * PI / (double) (2 * n) * (double) i));  
+*    pATable[2 * i + 1] = 0.5 * (-1.0 * cos (2 * PI / (double) (2 * n) * (double) i));  
+* }</pre>  
+* \par  
+* Convert to fixed point Q31 format  
+*     round(pATable[i] * pow(2, 31))  
+*/ 
+ 
+ 
+static const q31_t realCoefAQ31[2048] = { 
+  0x40000000, 0xc0000000, 0x3fcdbc0e, 0xc00013be, 0x3f9b783c, 0xc0004ef5, 
+  0x3f6934a7, 0xc000b1a7, 
+  0x3f36f170, 0xc0013bd3, 0x3f04aeb4, 0xc001ed79, 0x3ed26c94, 0xc002c698, 
+  0x3ea02b2d, 0xc003c730, 
+  0x3e6deaa0, 0xc004ef3f, 0x3e3bab0b, 0xc0063ec7, 0x3e096c8c, 0xc007b5c5, 
+  0x3dd72f44, 0xc0095438, 
+  0x3da4f351, 0xc00b1a21, 0x3d72b8d2, 0xc00d077c, 0x3d407fe5, 0xc00f1c4b, 
+  0x3d0e48ab, 0xc011588a, 
+  0x3cdc1341, 0xc013bc3a, 0x3ca9dfc8, 0xc0164757, 0x3c77ae5d, 0xc018f9e1, 
+  0x3c457f20, 0xc01bd3d7, 
+  0x3c135230, 0xc01ed535, 0x3be127ac, 0xc021fdfb, 0x3baeffb2, 0xc0254e27, 
+  0x3b7cda62, 0xc028c5b6, 
+  0x3b4ab7db, 0xc02c64a6, 0x3b18983b, 0xc0302af6, 0x3ae67ba1, 0xc03418a2, 
+  0x3ab4622d, 0xc0382da9, 
+  0x3a824bfd, 0xc03c6a07, 0x3a503930, 0xc040cdbb, 0x3a1e29e5, 0xc04558c1, 
+  0x39ec1e3b, 0xc04a0b16, 
+  0x39ba1650, 0xc04ee4b9, 0x39881245, 0xc053e5a5, 0x39561236, 0xc0590dd8, 
+  0x39241644, 0xc05e5d4f, 
+  0x38f21e8e, 0xc063d406, 0x38c02b31, 0xc06971f9, 0x388e3c4d, 0xc06f3727, 
+  0x385c5200, 0xc075238a, 
+  0x382a6c6a, 0xc07b371f, 0x37f88ba9, 0xc08171e2, 0x37c6afdc, 0xc087d3d1, 
+  0x3794d921, 0xc08e5ce6, 
+  0x37630798, 0xc0950d1d, 0x37313b5f, 0xc09be473, 0x36ff7495, 0xc0a2e2e4, 
+  0x36cdb359, 0xc0aa086a, 
+  0x369bf7c8, 0xc0b15502, 0x366a4203, 0xc0b8c8a8, 0x36389227, 0xc0c06355, 
+  0x3606e854, 0xc0c82507, 
+  0x35d544a7, 0xc0d00db7, 0x35a3a740, 0xc0d81d61, 0x3572103d, 0xc0e05401, 
+  0x35407fbc, 0xc0e8b191, 
+  0x350ef5dd, 0xc0f1360c, 0x34dd72be, 0xc0f9e16c, 0x34abf67d, 0xc102b3ad, 
+  0x347a8139, 0xc10bacc8, 
+  0x34491310, 0xc114ccb9, 0x3417ac22, 0xc11e137a, 0x33e64c8b, 0xc1278105, 
+  0x33b4f46c, 0xc1311553, 
+  0x3383a3e1, 0xc13ad061, 0x33525b0b, 0xc144b226, 0x33211a06, 0xc14eba9e, 
+  0x32efe0f2, 0xc158e9c1, 
+  0x32beafed, 0xc1633f8a, 0x328d8715, 0xc16dbbf3, 0x325c6688, 0xc1785ef5, 
+  0x322b4e65, 0xc1832888, 
+  0x31fa3eca, 0xc18e18a8, 0x31c937d6, 0xc1992f4c, 0x319839a6, 0xc1a46c6e, 
+  0x31674459, 0xc1afd008, 
+  0x3136580d, 0xc1bb5a12, 0x310574e0, 0xc1c70a84, 0x30d49af0, 0xc1d2e159, 
+  0x30a3ca5c, 0xc1dede87, 
+  0x30730342, 0xc1eb0209, 0x304245bf, 0xc1f74bd7, 0x301191f2, 0xc203bbe8, 
+  0x2fe0e7f9, 0xc2105236, 
+  0x2fb047f1, 0xc21d0eb9, 0x2f7fb1fa, 0xc229f168, 0x2f4f2630, 0xc236fa3c, 
+  0x2f1ea4b1, 0xc244292c, 
+  0x2eee2d9d, 0xc2517e32, 0x2ebdc110, 0xc25ef943, 0x2e8d5f28, 0xc26c9a59, 
+  0x2e5d0804, 0xc27a616a, 
+  0x2e2cbbc0, 0xc2884e6f, 0x2dfc7a7c, 0xc296615e, 0x2dcc4454, 0xc2a49a2f, 
+  0x2d9c1966, 0xc2b2f8d9, 
+  0x2d6bf9d1, 0xc2c17d53, 0x2d3be5b1, 0xc2d02794, 0x2d0bdd25, 0xc2def794, 
+  0x2cdbe049, 0xc2eded49, 
+  0x2cabef3d, 0xc2fd08aa, 0x2c7c0a1c, 0xc30c49ad, 0x2c4c3105, 0xc31bb04a, 
+  0x2c1c6416, 0xc32b3c76, 
+  0x2beca36b, 0xc33aee28, 0x2bbcef23, 0xc34ac556, 0x2b8d475a, 0xc35ac1f8, 
+  0x2b5dac2e, 0xc36ae401, 
+  0x2b2e1dbd, 0xc37b2b6a, 0x2afe9c23, 0xc38b9828, 0x2acf277f, 0xc39c2a30, 
+  0x2a9fbfed, 0xc3ace178, 
+  0x2a70658a, 0xc3bdbdf7, 0x2a411874, 0xc3cebfa1, 0x2a11d8c8, 0xc3dfe66c, 
+  0x29e2a6a3, 0xc3f1324e, 
+  0x29b38222, 0xc402a33c, 0x29846b63, 0xc414392b, 0x29556282, 0xc425f411, 
+  0x2926679c, 0xc437d3e2, 
+  0x28f77acf, 0xc449d893, 0x28c89c36, 0xc45c0219, 0x2899cbf0, 0xc46e506a, 
+  0x286b0a19, 0xc480c379, 
+  0x283c56ce, 0xc4935b3c, 0x280db22c, 0xc4a617a7, 0x27df1c4f, 0xc4b8f8ae, 
+  0x27b09555, 0xc4cbfe45, 
+  0x27821d59, 0xc4df2862, 0x2753b479, 0xc4f276f8, 0x27255ad1, 0xc505e9fb, 
+  0x26f7107e, 0xc519815f, 
+  0x26c8d59c, 0xc52d3d19, 0x269aaa48, 0xc5411d1b, 0x266c8e9e, 0xc555215b, 
+  0x263e82bb, 0xc56949ca, 
+  0x261086bc, 0xc57d965e, 0x25e29abc, 0xc5920709, 0x25b4bed8, 0xc5a69bbf, 
+  0x2586f32c, 0xc5bb5473, 
+  0x255937d4, 0xc5d03118, 0x252b8ced, 0xc5e531a2, 0x24fdf293, 0xc5fa5603, 
+  0x24d068e2, 0xc60f9e2f, 
+  0x24a2eff6, 0xc6250a18, 0x247587eb, 0xc63a99b1, 0x244830dc, 0xc6504ced, 
+  0x241aeae8, 0xc66623bf, 
+  0x23edb627, 0xc67c1e19, 0x23c092b8, 0xc6923bec, 0x239380b6, 0xc6a87d2d, 
+  0x2366803c, 0xc6bee1cd, 
+  0x23399166, 0xc6d569be, 0x230cb451, 0xc6ec14f2, 0x22dfe917, 0xc702e35c, 
+  0x22b32fd4, 0xc719d4ed, 
+  0x228688a4, 0xc730e997, 0x2259f3a3, 0xc748214d, 0x222d70eb, 0xc75f7bfe, 
+  0x22010099, 0xc776f99e, 
+  0x21d4a2c7, 0xc78e9a1d, 0x21a85792, 0xc7a65d6e, 0x217c1f15, 0xc7be4381, 
+  0x214ff96a, 0xc7d64c48, 
+  0x2123e6ad, 0xc7ee77b4, 0x20f7e6f9, 0xc806c5b5, 0x20cbfa69, 0xc81f363e, 
+  0x20a02119, 0xc837c93e, 
+  0x20745b24, 0xc8507ea8, 0x2048a8a3, 0xc869566a, 0x201d09b4, 0xc8825077, 
+  0x1ff17e6f, 0xc89b6cbf, 
+  0x1fc606f1, 0xc8b4ab32, 0x1f9aa354, 0xc8ce0bc1, 0x1f6f53b2, 0xc8e78e5c, 
+  0x1f441827, 0xc90132f3, 
+  0x1f18f0cd, 0xc91af976, 0x1eedddbf, 0xc934e1d7, 0x1ec2df17, 0xc94eec03, 
+  0x1e97f4f0, 0xc96917ed, 
+  0x1e6d1f65, 0xc9836583, 0x1e425e8f, 0xc99dd4b5, 0x1e17b289, 0xc9b86572, 
+  0x1ded1b6e, 0xc9d317ac, 
+  0x1dc29957, 0xc9edeb50, 0x1d982c60, 0xca08e04f, 0x1d6dd4a1, 0xca23f698, 
+  0x1d439236, 0xca3f2e1a, 
+  0x1d196538, 0xca5a86c4, 0x1cef4dc1, 0xca760086, 0x1cc54bec, 0xca919b4e, 
+  0x1c9b5fd1, 0xcaad570c, 
+  0x1c71898c, 0xcac933ae, 0x1c47c936, 0xcae53124, 0x1c1e1ee8, 0xcb014f5b, 
+  0x1bf48abd, 0xcb1d8e43, 
+  0x1bcb0ccd, 0xcb39edca, 0x1ba1a533, 0xcb566ddf, 0x1b785408, 0xcb730e70, 
+  0x1b4f1966, 0xcb8fcf6c, 
+  0x1b25f566, 0xcbacb0c0, 0x1afce821, 0xcbc9b25b, 0x1ad3f1b1, 0xcbe6d42b, 
+  0x1aab122f, 0xcc04161e, 
+  0x1a8249b4, 0xcc217822, 0x1a599859, 0xcc3efa25, 0x1a30fe38, 0xcc5c9c15, 
+  0x1a087b69, 0xcc7a5ddf, 
+  0x19e01006, 0xcc983f71, 0x19b7bc27, 0xccb640b8, 0x198f7fe5, 0xccd461a3, 
+  0x19675b5a, 0xccf2a21e, 
+  0x193f4e9d, 0xcd110217, 0x191759c8, 0xcd2f817b, 0x18ef7cf4, 0xcd4e2037, 
+  0x18c7b838, 0xcd6cde39, 
+  0x18a00bad, 0xcd8bbb6d, 0x1878776c, 0xcdaab7c1, 0x1850fb8e, 0xcdc9d321, 
+  0x1829982a, 0xcde90d7a, 
+  0x18024d59, 0xce0866b9, 0x17db1b33, 0xce27deca, 0x17b401d0, 0xce47759a, 
+  0x178d0149, 0xce672b16, 
+  0x176619b5, 0xce86ff2a, 0x173f4b2d, 0xcea6f1c3, 0x171895c8, 0xcec702cc, 
+  0x16f1f99f, 0xcee73232, 
+  0x16cb76c8, 0xcf077fe1, 0x16a50d5d, 0xcf27ebc5, 0x167ebd74, 0xcf4875cb, 
+  0x16588725, 0xcf691ddd, 
+  0x16326a88, 0xcf89e3e9, 0x160c67b4, 0xcfaac7d9, 0x15e67ec1, 0xcfcbc999, 
+  0x15c0afc6, 0xcfece916, 
+  0x159afada, 0xd00e263a, 0x15756015, 0xd02f80f1, 0x154fdf8e, 0xd050f927, 
+  0x152a795c, 0xd0728ec7, 
+  0x15052d96, 0xd09441bc, 0x14dffc54, 0xd0b611f1, 0x14bae5ab, 0xd0d7ff52, 
+  0x1495e9b3, 0xd0fa09c9, 
+  0x14710883, 0xd11c3142, 0x144c4231, 0xd13e75a8, 0x142796d4, 0xd160d6e5, 
+  0x14030684, 0xd18354e4, 
+  0x13de9155, 0xd1a5ef91, 0x13ba3760, 0xd1c8a6d4, 0x1395f8b9, 0xd1eb7a9a, 
+  0x1371d579, 0xd20e6acd, 
+  0x134dcdb4, 0xd2317757, 0x1329e181, 0xd254a022, 0x130610f6, 0xd277e519, 
+  0x12e25c2a, 0xd29b4626, 
+  0x12bec333, 0xd2bec334, 0x129b4625, 0xd2e25c2b, 0x1277e518, 0xd30610f7, 
+  0x1254a021, 0xd329e182, 
+  0x12317756, 0xd34dcdb5, 0x120e6acc, 0xd371d57a, 0x11eb7a99, 0xd395f8ba, 
+  0x11c8a6d3, 0xd3ba3761, 
+  0x11a5ef90, 0xd3de9156, 0x118354e3, 0xd4030685, 0x1160d6e4, 0xd42796d5, 
+  0x113e75a7, 0xd44c4232, 
+  0x111c3141, 0xd4710884, 0x10fa09c8, 0xd495e9b4, 0x10d7ff51, 0xd4bae5ac, 
+  0x10b611f0, 0xd4dffc55, 
+  0x109441bb, 0xd5052d97, 0x10728ec6, 0xd52a795d, 0x1050f926, 0xd54fdf8f, 
+  0x102f80f0, 0xd5756016, 
+  0x100e2639, 0xd59afadb, 0xfece915, 0xd5c0afc7, 0xfcbc998, 0xd5e67ec2, 
+  0xfaac7d8, 0xd60c67b5, 
+  0xf89e3e8, 0xd6326a89, 0xf691ddc, 0xd6588726, 0xf4875ca, 0xd67ebd75, 
+  0xf27ebc4, 0xd6a50d5e, 
+  0xf077fe0, 0xd6cb76c9, 0xee73231, 0xd6f1f9a0, 0xec702cb, 0xd71895c9, 
+  0xea6f1c2, 0xd73f4b2e, 
+  0xe86ff29, 0xd76619b6, 0xe672b15, 0xd78d014a, 0xe477599, 0xd7b401d1, 
+  0xe27dec9, 0xd7db1b34, 
+  0xe0866b8, 0xd8024d5a, 0xde90d79, 0xd829982b, 0xdc9d320, 0xd850fb8f, 
+  0xdaab7c0, 0xd878776d, 
+  0xd8bbb6c, 0xd8a00bae, 0xd6cde38, 0xd8c7b839, 0xd4e2036, 0xd8ef7cf5, 
+  0xd2f817a, 0xd91759c9, 
+  0xd110216, 0xd93f4e9e, 0xcf2a21d, 0xd9675b5b, 0xcd461a2, 0xd98f7fe6, 
+  0xcb640b7, 0xd9b7bc28, 
+  0xc983f70, 0xd9e01007, 0xc7a5dde, 0xda087b6a, 0xc5c9c14, 0xda30fe39, 
+  0xc3efa24, 0xda59985a, 
+  0xc217821, 0xda8249b5, 0xc04161d, 0xdaab1230, 0xbe6d42a, 0xdad3f1b2, 
+  0xbc9b25a, 0xdafce822, 
+  0xbacb0bf, 0xdb25f567, 0xb8fcf6b, 0xdb4f1967, 0xb730e6f, 0xdb785409, 
+  0xb566dde, 0xdba1a534, 
+  0xb39edc9, 0xdbcb0cce, 0xb1d8e42, 0xdbf48abe, 0xb014f5a, 0xdc1e1ee9, 
+  0xae53123, 0xdc47c937, 
+  0xac933ad, 0xdc71898d, 0xaad570b, 0xdc9b5fd2, 0xa919b4d, 0xdcc54bed, 
+  0xa760085, 0xdcef4dc2, 
+  0xa5a86c3, 0xdd196539, 0xa3f2e19, 0xdd439237, 0xa23f697, 0xdd6dd4a2, 
+  0xa08e04e, 0xdd982c61, 
+  0x9edeb4f, 0xddc29958, 0x9d317ab, 0xdded1b6f, 0x9b86571, 0xde17b28a, 
+  0x99dd4b4, 0xde425e90, 
+  0x9836582, 0xde6d1f66, 0x96917ec, 0xde97f4f1, 0x94eec02, 0xdec2df18, 
+  0x934e1d6, 0xdeedddc0, 
+  0x91af975, 0xdf18f0ce, 0x90132f2, 0xdf441828, 0x8e78e5b, 0xdf6f53b3, 
+  0x8ce0bc0, 0xdf9aa355, 
+  0x8b4ab31, 0xdfc606f2, 0x89b6cbe, 0xdff17e70, 0x8825076, 0xe01d09b5, 
+  0x8695669, 0xe048a8a4, 
+  0x8507ea7, 0xe0745b25, 0x837c93d, 0xe0a0211a, 0x81f363d, 0xe0cbfa6a, 
+  0x806c5b4, 0xe0f7e6fa, 
+  0x7ee77b3, 0xe123e6ae, 0x7d64c47, 0xe14ff96b, 0x7be4380, 0xe17c1f16, 
+  0x7a65d6d, 0xe1a85793, 
+  0x78e9a1c, 0xe1d4a2c8, 0x776f99d, 0xe201009a, 0x75f7bfd, 0xe22d70ec, 
+  0x748214c, 0xe259f3a4, 
+  0x730e996, 0xe28688a5, 0x719d4ec, 0xe2b32fd5, 0x702e35b, 0xe2dfe918, 
+  0x6ec14f1, 0xe30cb452, 
+  0x6d569bd, 0xe3399167, 0x6bee1cc, 0xe366803d, 0x6a87d2c, 0xe39380b7, 
+  0x6923beb, 0xe3c092b9, 
+  0x67c1e18, 0xe3edb628, 0x66623be, 0xe41aeae9, 0x6504cec, 0xe44830dd, 
+  0x63a99b0, 0xe47587ec, 
+  0x6250a17, 0xe4a2eff7, 0x60f9e2e, 0xe4d068e3, 0x5fa5602, 0xe4fdf294, 
+  0x5e531a1, 0xe52b8cee, 
+  0x5d03117, 0xe55937d5, 0x5bb5472, 0xe586f32d, 0x5a69bbe, 0xe5b4bed9, 
+  0x5920708, 0xe5e29abd, 
+  0x57d965d, 0xe61086bd, 0x56949c9, 0xe63e82bc, 0x555215a, 0xe66c8e9f, 
+  0x5411d1a, 0xe69aaa49, 
+  0x52d3d18, 0xe6c8d59d, 0x519815e, 0xe6f7107f, 0x505e9fa, 0xe7255ad2, 
+  0x4f276f7, 0xe753b47a, 
+  0x4df2861, 0xe7821d5a, 0x4cbfe44, 0xe7b09556, 0x4b8f8ad, 0xe7df1c50, 
+  0x4a617a6, 0xe80db22d, 
+  0x4935b3b, 0xe83c56cf, 0x480c378, 0xe86b0a1a, 0x46e5069, 0xe899cbf1, 
+  0x45c0218, 0xe8c89c37, 
+  0x449d892, 0xe8f77ad0, 0x437d3e1, 0xe926679d, 0x425f410, 0xe9556283, 
+  0x414392a, 0xe9846b64, 
+  0x402a33b, 0xe9b38223, 0x3f1324d, 0xe9e2a6a4, 0x3dfe66b, 0xea11d8c9, 
+  0x3cebfa0, 0xea411875, 
+  0x3bdbdf6, 0xea70658b, 0x3ace177, 0xea9fbfee, 0x39c2a2f, 0xeacf2780, 
+  0x38b9827, 0xeafe9c24, 
+  0x37b2b69, 0xeb2e1dbe, 0x36ae400, 0xeb5dac2f, 0x35ac1f7, 0xeb8d475b, 
+  0x34ac555, 0xebbcef24, 
+  0x33aee27, 0xebeca36c, 0x32b3c75, 0xec1c6417, 0x31bb049, 0xec4c3106, 
+  0x30c49ac, 0xec7c0a1d, 
+  0x2fd08a9, 0xecabef3e, 0x2eded48, 0xecdbe04a, 0x2def793, 0xed0bdd26, 
+  0x2d02793, 0xed3be5b2, 
+  0x2c17d52, 0xed6bf9d2, 0x2b2f8d8, 0xed9c1967, 0x2a49a2e, 0xedcc4455, 
+  0x296615d, 0xedfc7a7d, 
+  0x2884e6e, 0xee2cbbc1, 0x27a6169, 0xee5d0805, 0x26c9a58, 0xee8d5f29, 
+  0x25ef942, 0xeebdc111, 
+  0x2517e31, 0xeeee2d9e, 0x244292b, 0xef1ea4b2, 0x236fa3b, 0xef4f2631, 
+  0x229f167, 0xef7fb1fb, 
+  0x21d0eb8, 0xefb047f2, 0x2105235, 0xefe0e7fa, 0x203bbe7, 0xf01191f3, 
+  0x1f74bd6, 0xf04245c0, 
+  0x1eb0208, 0xf0730343, 0x1dede86, 0xf0a3ca5d, 0x1d2e158, 0xf0d49af1, 
+  0x1c70a83, 0xf10574e1, 
+  0x1bb5a11, 0xf136580e, 0x1afd007, 0xf167445a, 0x1a46c6d, 0xf19839a7, 
+  0x1992f4b, 0xf1c937d7, 
+  0x18e18a7, 0xf1fa3ecb, 0x1832887, 0xf22b4e66, 0x1785ef4, 0xf25c6689, 
+  0x16dbbf2, 0xf28d8716, 
+  0x1633f89, 0xf2beafee, 0x158e9c0, 0xf2efe0f3, 0x14eba9d, 0xf3211a07, 
+  0x144b225, 0xf3525b0c, 
+  0x13ad060, 0xf383a3e2, 0x1311552, 0xf3b4f46d, 0x1278104, 0xf3e64c8c, 
+  0x11e1379, 0xf417ac23, 
+  0x114ccb8, 0xf4491311, 0x10bacc7, 0xf47a813a, 0x102b3ac, 0xf4abf67e, 
+  0xf9e16b, 0xf4dd72bf, 
+  0xf1360b, 0xf50ef5de, 0xe8b190, 0xf5407fbd, 0xe05400, 0xf572103e, 0xd81d60, 
+  0xf5a3a741, 
+  0xd00db6, 0xf5d544a8, 0xc82506, 0xf606e855, 0xc06354, 0xf6389228, 0xb8c8a7, 
+  0xf66a4204, 
+  0xb15501, 0xf69bf7c9, 0xaa0869, 0xf6cdb35a, 0xa2e2e3, 0xf6ff7496, 0x9be472, 
+  0xf7313b60, 
+  0x950d1c, 0xf7630799, 0x8e5ce5, 0xf794d922, 0x87d3d0, 0xf7c6afdd, 0x8171e1, 
+  0xf7f88baa, 
+  0x7b371e, 0xf82a6c6b, 0x752389, 0xf85c5201, 0x6f3726, 0xf88e3c4e, 0x6971f8, 
+  0xf8c02b32, 
+  0x63d405, 0xf8f21e8f, 0x5e5d4e, 0xf9241645, 0x590dd7, 0xf9561237, 0x53e5a4, 
+  0xf9881246, 
+  0x4ee4b8, 0xf9ba1651, 0x4a0b15, 0xf9ec1e3c, 0x4558c0, 0xfa1e29e6, 0x40cdba, 
+  0xfa503931, 
+  0x3c6a06, 0xfa824bfe, 0x382da8, 0xfab4622e, 0x3418a1, 0xfae67ba2, 0x302af5, 
+  0xfb18983c, 
+  0x2c64a5, 0xfb4ab7dc, 0x28c5b5, 0xfb7cda63, 0x254e26, 0xfbaeffb3, 0x21fdfa, 
+  0xfbe127ad, 
+  0x1ed534, 0xfc135231, 0x1bd3d6, 0xfc457f21, 0x18f9e0, 0xfc77ae5e, 0x164756, 
+  0xfca9dfc9, 
+  0x13bc39, 0xfcdc1342, 0x115889, 0xfd0e48ac, 0xf1c4a, 0xfd407fe6, 0xd077b, 
+  0xfd72b8d3, 
+  0xb1a20, 0xfda4f352, 0x95437, 0xfdd72f45, 0x7b5c4, 0xfe096c8d, 0x63ec6, 
+  0xfe3bab0c, 
+  0x4ef3e, 0xfe6deaa1, 0x3c72f, 0xfea02b2e, 0x2c697, 0xfed26c95, 0x1ed78, 
+  0xff04aeb5, 
+  0x13bd2, 0xff36f171, 0xb1a6, 0xff6934a8, 0x4ef4, 0xff9b783d, 0x13bd, 
+  0xffcdbc0f, 
+  0x0, 0x0, 0x13bd, 0x3243f1, 0x4ef4, 0x6487c3, 0xb1a6, 0x96cb58, 
+  0x13bd2, 0xc90e8f, 0x1ed78, 0xfb514b, 0x2c697, 0x12d936b, 0x3c72f, 
+  0x15fd4d2, 
+  0x4ef3e, 0x192155f, 0x63ec6, 0x1c454f4, 0x7b5c4, 0x1f69373, 0x95437, 
+  0x228d0bb, 
+  0xb1a20, 0x25b0cae, 0xd077b, 0x28d472d, 0xf1c4a, 0x2bf801a, 0x115889, 
+  0x2f1b754, 
+  0x13bc39, 0x323ecbe, 0x164756, 0x3562037, 0x18f9e0, 0x38851a2, 0x1bd3d6, 
+  0x3ba80df, 
+  0x1ed534, 0x3ecadcf, 0x21fdfa, 0x41ed853, 0x254e26, 0x451004d, 0x28c5b5, 
+  0x483259d, 
+  0x2c64a5, 0x4b54824, 0x302af5, 0x4e767c4, 0x3418a1, 0x519845e, 0x382da8, 
+  0x54b9dd2, 
+  0x3c6a06, 0x57db402, 0x40cdba, 0x5afc6cf, 0x4558c0, 0x5e1d61a, 0x4a0b15, 
+  0x613e1c4, 
+  0x4ee4b8, 0x645e9af, 0x53e5a4, 0x677edba, 0x590dd7, 0x6a9edc9, 0x5e5d4e, 
+  0x6dbe9bb, 
+  0x63d405, 0x70de171, 0x6971f8, 0x73fd4ce, 0x6f3726, 0x771c3b2, 0x752389, 
+  0x7a3adff, 
+  0x7b371e, 0x7d59395, 0x8171e1, 0x8077456, 0x87d3d0, 0x8395023, 0x8e5ce5, 
+  0x86b26de, 
+  0x950d1c, 0x89cf867, 0x9be472, 0x8cec4a0, 0xa2e2e3, 0x9008b6a, 0xaa0869, 
+  0x9324ca6, 
+  0xb15501, 0x9640837, 0xb8c8a7, 0x995bdfc, 0xc06354, 0x9c76dd8, 0xc82506, 
+  0x9f917ab, 
+  0xd00db6, 0xa2abb58, 0xd81d60, 0xa5c58bf, 0xe05400, 0xa8defc2, 0xe8b190, 
+  0xabf8043, 
+  0xf1360b, 0xaf10a22, 0xf9e16b, 0xb228d41, 0x102b3ac, 0xb540982, 0x10bacc7, 
+  0xb857ec6, 
+  0x114ccb8, 0xbb6ecef, 0x11e1379, 0xbe853dd, 0x1278104, 0xc19b374, 0x1311552, 
+  0xc4b0b93, 
+  0x13ad060, 0xc7c5c1e, 0x144b225, 0xcada4f4, 0x14eba9d, 0xcdee5f9, 0x158e9c0, 
+  0xd101f0d, 
+  0x1633f89, 0xd415012, 0x16dbbf2, 0xd7278ea, 0x1785ef4, 0xda39977, 0x1832887, 
+  0xdd4b19a, 
+  0x18e18a7, 0xe05c135, 0x1992f4b, 0xe36c829, 0x1a46c6d, 0xe67c659, 0x1afd007, 
+  0xe98bba6, 
+  0x1bb5a11, 0xec9a7f2, 0x1c70a83, 0xefa8b1f, 0x1d2e158, 0xf2b650f, 0x1dede86, 
+  0xf5c35a3, 
+  0x1eb0208, 0xf8cfcbd, 0x1f74bd6, 0xfbdba40, 0x203bbe7, 0xfee6e0d, 0x2105235, 
+  0x101f1806, 
+  0x21d0eb8, 0x104fb80e, 0x229f167, 0x10804e05, 0x236fa3b, 0x10b0d9cf, 
+  0x244292b, 0x10e15b4e, 
+  0x2517e31, 0x1111d262, 0x25ef942, 0x11423eef, 0x26c9a58, 0x1172a0d7, 
+  0x27a6169, 0x11a2f7fb, 
+  0x2884e6e, 0x11d3443f, 0x296615d, 0x12038583, 0x2a49a2e, 0x1233bbab, 
+  0x2b2f8d8, 0x1263e699, 
+  0x2c17d52, 0x1294062e, 0x2d02793, 0x12c41a4e, 0x2def793, 0x12f422da, 
+  0x2eded48, 0x13241fb6, 
+  0x2fd08a9, 0x135410c2, 0x30c49ac, 0x1383f5e3, 0x31bb049, 0x13b3cefa, 
+  0x32b3c75, 0x13e39be9, 
+  0x33aee27, 0x14135c94, 0x34ac555, 0x144310dc, 0x35ac1f7, 0x1472b8a5, 
+  0x36ae400, 0x14a253d1, 
+  0x37b2b69, 0x14d1e242, 0x38b9827, 0x150163dc, 0x39c2a2f, 0x1530d880, 
+  0x3ace177, 0x15604012, 
+  0x3bdbdf6, 0x158f9a75, 0x3cebfa0, 0x15bee78b, 0x3dfe66b, 0x15ee2737, 
+  0x3f1324d, 0x161d595c, 
+  0x402a33b, 0x164c7ddd, 0x414392a, 0x167b949c, 0x425f410, 0x16aa9d7d, 
+  0x437d3e1, 0x16d99863, 
+  0x449d892, 0x17088530, 0x45c0218, 0x173763c9, 0x46e5069, 0x1766340f, 
+  0x480c378, 0x1794f5e6, 
+  0x4935b3b, 0x17c3a931, 0x4a617a6, 0x17f24dd3, 0x4b8f8ad, 0x1820e3b0, 
+  0x4cbfe44, 0x184f6aaa, 
+  0x4df2861, 0x187de2a6, 0x4f276f7, 0x18ac4b86, 0x505e9fa, 0x18daa52e, 
+  0x519815e, 0x1908ef81, 
+  0x52d3d18, 0x19372a63, 0x5411d1a, 0x196555b7, 0x555215a, 0x19937161, 
+  0x56949c9, 0x19c17d44, 
+  0x57d965d, 0x19ef7943, 0x5920708, 0x1a1d6543, 0x5a69bbe, 0x1a4b4127, 
+  0x5bb5472, 0x1a790cd3, 
+  0x5d03117, 0x1aa6c82b, 0x5e531a1, 0x1ad47312, 0x5fa5602, 0x1b020d6c, 
+  0x60f9e2e, 0x1b2f971d, 
+  0x6250a17, 0x1b5d1009, 0x63a99b0, 0x1b8a7814, 0x6504cec, 0x1bb7cf23, 
+  0x66623be, 0x1be51517, 
+  0x67c1e18, 0x1c1249d8, 0x6923beb, 0x1c3f6d47, 0x6a87d2c, 0x1c6c7f49, 
+  0x6bee1cc, 0x1c997fc3, 
+  0x6d569bd, 0x1cc66e99, 0x6ec14f1, 0x1cf34bae, 0x702e35b, 0x1d2016e8, 
+  0x719d4ec, 0x1d4cd02b, 
+  0x730e996, 0x1d79775b, 0x748214c, 0x1da60c5c, 0x75f7bfd, 0x1dd28f14, 
+  0x776f99d, 0x1dfeff66, 
+  0x78e9a1c, 0x1e2b5d38, 0x7a65d6d, 0x1e57a86d, 0x7be4380, 0x1e83e0ea, 
+  0x7d64c47, 0x1eb00695, 
+  0x7ee77b3, 0x1edc1952, 0x806c5b4, 0x1f081906, 0x81f363d, 0x1f340596, 
+  0x837c93d, 0x1f5fdee6, 
+  0x8507ea7, 0x1f8ba4db, 0x8695669, 0x1fb7575c, 0x8825076, 0x1fe2f64b, 
+  0x89b6cbe, 0x200e8190, 
+  0x8b4ab31, 0x2039f90e, 0x8ce0bc0, 0x20655cab, 0x8e78e5b, 0x2090ac4d, 
+  0x90132f2, 0x20bbe7d8, 
+  0x91af975, 0x20e70f32, 0x934e1d6, 0x21122240, 0x94eec02, 0x213d20e8, 
+  0x96917ec, 0x21680b0f, 
+  0x9836582, 0x2192e09a, 0x99dd4b4, 0x21bda170, 0x9b86571, 0x21e84d76, 
+  0x9d317ab, 0x2212e491, 
+  0x9edeb4f, 0x223d66a8, 0xa08e04e, 0x2267d39f, 0xa23f697, 0x22922b5e, 
+  0xa3f2e19, 0x22bc6dc9, 
+  0xa5a86c3, 0x22e69ac7, 0xa760085, 0x2310b23e, 0xa919b4d, 0x233ab413, 
+  0xaad570b, 0x2364a02e, 
+  0xac933ad, 0x238e7673, 0xae53123, 0x23b836c9, 0xb014f5a, 0x23e1e117, 
+  0xb1d8e42, 0x240b7542, 
+  0xb39edc9, 0x2434f332, 0xb566dde, 0x245e5acc, 0xb730e6f, 0x2487abf7, 
+  0xb8fcf6b, 0x24b0e699, 
+  0xbacb0bf, 0x24da0a99, 0xbc9b25a, 0x250317de, 0xbe6d42a, 0x252c0e4e, 
+  0xc04161d, 0x2554edd0, 
+  0xc217821, 0x257db64b, 0xc3efa24, 0x25a667a6, 0xc5c9c14, 0x25cf01c7, 
+  0xc7a5dde, 0x25f78496, 
+  0xc983f70, 0x261feff9, 0xcb640b7, 0x264843d8, 0xcd461a2, 0x2670801a, 
+  0xcf2a21d, 0x2698a4a5, 
+  0xd110216, 0x26c0b162, 0xd2f817a, 0x26e8a637, 0xd4e2036, 0x2710830b, 
+  0xd6cde38, 0x273847c7, 
+  0xd8bbb6c, 0x275ff452, 0xdaab7c0, 0x27878893, 0xdc9d320, 0x27af0471, 
+  0xde90d79, 0x27d667d5, 
+  0xe0866b8, 0x27fdb2a6, 0xe27dec9, 0x2824e4cc, 0xe477599, 0x284bfe2f, 
+  0xe672b15, 0x2872feb6, 
+  0xe86ff29, 0x2899e64a, 0xea6f1c2, 0x28c0b4d2, 0xec702cb, 0x28e76a37, 
+  0xee73231, 0x290e0660, 
+  0xf077fe0, 0x29348937, 0xf27ebc4, 0x295af2a2, 0xf4875ca, 0x2981428b, 
+  0xf691ddc, 0x29a778da, 
+  0xf89e3e8, 0x29cd9577, 0xfaac7d8, 0x29f3984b, 0xfcbc998, 0x2a19813e, 
+  0xfece915, 0x2a3f5039, 
+  0x100e2639, 0x2a650525, 0x102f80f0, 0x2a8a9fea, 0x1050f926, 0x2ab02071, 
+  0x10728ec6, 0x2ad586a3, 
+  0x109441bb, 0x2afad269, 0x10b611f0, 0x2b2003ab, 0x10d7ff51, 0x2b451a54, 
+  0x10fa09c8, 0x2b6a164c, 
+  0x111c3141, 0x2b8ef77c, 0x113e75a7, 0x2bb3bdce, 0x1160d6e4, 0x2bd8692b, 
+  0x118354e3, 0x2bfcf97b, 
+  0x11a5ef90, 0x2c216eaa, 0x11c8a6d3, 0x2c45c89f, 0x11eb7a99, 0x2c6a0746, 
+  0x120e6acc, 0x2c8e2a86, 
+  0x12317756, 0x2cb2324b, 0x1254a021, 0x2cd61e7e, 0x1277e518, 0x2cf9ef09, 
+  0x129b4625, 0x2d1da3d5, 
+  0x12bec333, 0x2d413ccc, 0x12e25c2a, 0x2d64b9da, 0x130610f6, 0x2d881ae7, 
+  0x1329e181, 0x2dab5fde, 
+  0x134dcdb4, 0x2dce88a9, 0x1371d579, 0x2df19533, 0x1395f8b9, 0x2e148566, 
+  0x13ba3760, 0x2e37592c, 
+  0x13de9155, 0x2e5a106f, 0x14030684, 0x2e7cab1c, 0x142796d4, 0x2e9f291b, 
+  0x144c4231, 0x2ec18a58, 
+  0x14710883, 0x2ee3cebe, 0x1495e9b3, 0x2f05f637, 0x14bae5ab, 0x2f2800ae, 
+  0x14dffc54, 0x2f49ee0f, 
+  0x15052d96, 0x2f6bbe44, 0x152a795c, 0x2f8d7139, 0x154fdf8e, 0x2faf06d9, 
+  0x15756015, 0x2fd07f0f, 
+  0x159afada, 0x2ff1d9c6, 0x15c0afc6, 0x301316ea, 0x15e67ec1, 0x30343667, 
+  0x160c67b4, 0x30553827, 
+  0x16326a88, 0x30761c17, 0x16588725, 0x3096e223, 0x167ebd74, 0x30b78a35, 
+  0x16a50d5d, 0x30d8143b, 
+  0x16cb76c8, 0x30f8801f, 0x16f1f99f, 0x3118cdce, 0x171895c8, 0x3138fd34, 
+  0x173f4b2d, 0x31590e3d, 
+  0x176619b5, 0x317900d6, 0x178d0149, 0x3198d4ea, 0x17b401d0, 0x31b88a66, 
+  0x17db1b33, 0x31d82136, 
+  0x18024d59, 0x31f79947, 0x1829982a, 0x3216f286, 0x1850fb8e, 0x32362cdf, 
+  0x1878776c, 0x3255483f, 
+  0x18a00bad, 0x32744493, 0x18c7b838, 0x329321c7, 0x18ef7cf4, 0x32b1dfc9, 
+  0x191759c8, 0x32d07e85, 
+  0x193f4e9d, 0x32eefde9, 0x19675b5a, 0x330d5de2, 0x198f7fe5, 0x332b9e5d, 
+  0x19b7bc27, 0x3349bf48, 
+  0x19e01006, 0x3367c08f, 0x1a087b69, 0x3385a221, 0x1a30fe38, 0x33a363eb, 
+  0x1a599859, 0x33c105db, 
+  0x1a8249b4, 0x33de87de, 0x1aab122f, 0x33fbe9e2, 0x1ad3f1b1, 0x34192bd5, 
+  0x1afce821, 0x34364da5, 
+  0x1b25f566, 0x34534f40, 0x1b4f1966, 0x34703094, 0x1b785408, 0x348cf190, 
+  0x1ba1a533, 0x34a99221, 
+  0x1bcb0ccd, 0x34c61236, 0x1bf48abd, 0x34e271bd, 0x1c1e1ee8, 0x34feb0a5, 
+  0x1c47c936, 0x351acedc, 
+  0x1c71898c, 0x3536cc52, 0x1c9b5fd1, 0x3552a8f4, 0x1cc54bec, 0x356e64b2, 
+  0x1cef4dc1, 0x3589ff7a, 
+  0x1d196538, 0x35a5793c, 0x1d439236, 0x35c0d1e6, 0x1d6dd4a1, 0x35dc0968, 
+  0x1d982c60, 0x35f71fb1, 
+  0x1dc29957, 0x361214b0, 0x1ded1b6e, 0x362ce854, 0x1e17b289, 0x36479a8e, 
+  0x1e425e8f, 0x36622b4b, 
+  0x1e6d1f65, 0x367c9a7d, 0x1e97f4f0, 0x3696e813, 0x1ec2df17, 0x36b113fd, 
+  0x1eedddbf, 0x36cb1e29, 
+  0x1f18f0cd, 0x36e5068a, 0x1f441827, 0x36fecd0d, 0x1f6f53b2, 0x371871a4, 
+  0x1f9aa354, 0x3731f43f, 
+  0x1fc606f1, 0x374b54ce, 0x1ff17e6f, 0x37649341, 0x201d09b4, 0x377daf89, 
+  0x2048a8a3, 0x3796a996, 
+  0x20745b24, 0x37af8158, 0x20a02119, 0x37c836c2, 0x20cbfa69, 0x37e0c9c2, 
+  0x20f7e6f9, 0x37f93a4b, 
+  0x2123e6ad, 0x3811884c, 0x214ff96a, 0x3829b3b8, 0x217c1f15, 0x3841bc7f, 
+  0x21a85792, 0x3859a292, 
+  0x21d4a2c7, 0x387165e3, 0x22010099, 0x38890662, 0x222d70eb, 0x38a08402, 
+  0x2259f3a3, 0x38b7deb3, 
+  0x228688a4, 0x38cf1669, 0x22b32fd4, 0x38e62b13, 0x22dfe917, 0x38fd1ca4, 
+  0x230cb451, 0x3913eb0e, 
+  0x23399166, 0x392a9642, 0x2366803c, 0x39411e33, 0x239380b6, 0x395782d3, 
+  0x23c092b8, 0x396dc414, 
+  0x23edb627, 0x3983e1e7, 0x241aeae8, 0x3999dc41, 0x244830dc, 0x39afb313, 
+  0x247587eb, 0x39c5664f, 
+  0x24a2eff6, 0x39daf5e8, 0x24d068e2, 0x39f061d1, 0x24fdf293, 0x3a05a9fd, 
+  0x252b8ced, 0x3a1ace5e, 
+  0x255937d4, 0x3a2fcee8, 0x2586f32c, 0x3a44ab8d, 0x25b4bed8, 0x3a596441, 
+  0x25e29abc, 0x3a6df8f7, 
+  0x261086bc, 0x3a8269a2, 0x263e82bb, 0x3a96b636, 0x266c8e9e, 0x3aaadea5, 
+  0x269aaa48, 0x3abee2e5, 
+  0x26c8d59c, 0x3ad2c2e7, 0x26f7107e, 0x3ae67ea1, 0x27255ad1, 0x3afa1605, 
+  0x2753b479, 0x3b0d8908, 
+  0x27821d59, 0x3b20d79e, 0x27b09555, 0x3b3401bb, 0x27df1c4f, 0x3b470752, 
+  0x280db22c, 0x3b59e859, 
+  0x283c56ce, 0x3b6ca4c4, 0x286b0a19, 0x3b7f3c87, 0x2899cbf0, 0x3b91af96, 
+  0x28c89c36, 0x3ba3fde7, 
+  0x28f77acf, 0x3bb6276d, 0x2926679c, 0x3bc82c1e, 0x29556282, 0x3bda0bef, 
+  0x29846b63, 0x3bebc6d5, 
+  0x29b38222, 0x3bfd5cc4, 0x29e2a6a3, 0x3c0ecdb2, 0x2a11d8c8, 0x3c201994, 
+  0x2a411874, 0x3c31405f, 
+  0x2a70658a, 0x3c424209, 0x2a9fbfed, 0x3c531e88, 0x2acf277f, 0x3c63d5d0, 
+  0x2afe9c23, 0x3c7467d8, 
+  0x2b2e1dbd, 0x3c84d496, 0x2b5dac2e, 0x3c951bff, 0x2b8d475a, 0x3ca53e08, 
+  0x2bbcef23, 0x3cb53aaa, 
+  0x2beca36b, 0x3cc511d8, 0x2c1c6416, 0x3cd4c38a, 0x2c4c3105, 0x3ce44fb6, 
+  0x2c7c0a1c, 0x3cf3b653, 
+  0x2cabef3d, 0x3d02f756, 0x2cdbe049, 0x3d1212b7, 0x2d0bdd25, 0x3d21086c, 
+  0x2d3be5b1, 0x3d2fd86c, 
+  0x2d6bf9d1, 0x3d3e82ad, 0x2d9c1966, 0x3d4d0727, 0x2dcc4454, 0x3d5b65d1, 
+  0x2dfc7a7c, 0x3d699ea2, 
+  0x2e2cbbc0, 0x3d77b191, 0x2e5d0804, 0x3d859e96, 0x2e8d5f28, 0x3d9365a7, 
+  0x2ebdc110, 0x3da106bd, 
+  0x2eee2d9d, 0x3dae81ce, 0x2f1ea4b1, 0x3dbbd6d4, 0x2f4f2630, 0x3dc905c4, 
+  0x2f7fb1fa, 0x3dd60e98, 
+  0x2fb047f1, 0x3de2f147, 0x2fe0e7f9, 0x3defadca, 0x301191f2, 0x3dfc4418, 
+  0x304245bf, 0x3e08b429, 
+  0x30730342, 0x3e14fdf7, 0x30a3ca5c, 0x3e212179, 0x30d49af0, 0x3e2d1ea7, 
+  0x310574e0, 0x3e38f57c, 
+  0x3136580d, 0x3e44a5ee, 0x31674459, 0x3e502ff8, 0x319839a6, 0x3e5b9392, 
+  0x31c937d6, 0x3e66d0b4, 
+  0x31fa3eca, 0x3e71e758, 0x322b4e65, 0x3e7cd778, 0x325c6688, 0x3e87a10b, 
+  0x328d8715, 0x3e92440d, 
+  0x32beafed, 0x3e9cc076, 0x32efe0f2, 0x3ea7163f, 0x33211a06, 0x3eb14562, 
+  0x33525b0b, 0x3ebb4dda, 
+  0x3383a3e1, 0x3ec52f9f, 0x33b4f46c, 0x3eceeaad, 0x33e64c8b, 0x3ed87efb, 
+  0x3417ac22, 0x3ee1ec86, 
+  0x34491310, 0x3eeb3347, 0x347a8139, 0x3ef45338, 0x34abf67d, 0x3efd4c53, 
+  0x34dd72be, 0x3f061e94, 
+  0x350ef5dd, 0x3f0ec9f4, 0x35407fbc, 0x3f174e6f, 0x3572103d, 0x3f1fabff, 
+  0x35a3a740, 0x3f27e29f, 
+  0x35d544a7, 0x3f2ff249, 0x3606e854, 0x3f37daf9, 0x36389227, 0x3f3f9cab, 
+  0x366a4203, 0x3f473758, 
+  0x369bf7c8, 0x3f4eaafe, 0x36cdb359, 0x3f55f796, 0x36ff7495, 0x3f5d1d1c, 
+  0x37313b5f, 0x3f641b8d, 
+  0x37630798, 0x3f6af2e3, 0x3794d921, 0x3f71a31a, 0x37c6afdc, 0x3f782c2f, 
+  0x37f88ba9, 0x3f7e8e1e, 
+  0x382a6c6a, 0x3f84c8e1, 0x385c5200, 0x3f8adc76, 0x388e3c4d, 0x3f90c8d9, 
+  0x38c02b31, 0x3f968e07, 
+  0x38f21e8e, 0x3f9c2bfa, 0x39241644, 0x3fa1a2b1, 0x39561236, 0x3fa6f228, 
+  0x39881245, 0x3fac1a5b, 
+  0x39ba1650, 0x3fb11b47, 0x39ec1e3b, 0x3fb5f4ea, 0x3a1e29e5, 0x3fbaa73f, 
+  0x3a503930, 0x3fbf3245, 
+  0x3a824bfd, 0x3fc395f9, 0x3ab4622d, 0x3fc7d257, 0x3ae67ba1, 0x3fcbe75e, 
+  0x3b18983b, 0x3fcfd50a, 
+  0x3b4ab7db, 0x3fd39b5a, 0x3b7cda62, 0x3fd73a4a, 0x3baeffb2, 0x3fdab1d9, 
+  0x3be127ac, 0x3fde0205, 
+  0x3c135230, 0x3fe12acb, 0x3c457f20, 0x3fe42c29, 0x3c77ae5d, 0x3fe7061f, 
+  0x3ca9dfc8, 0x3fe9b8a9, 
+  0x3cdc1341, 0x3fec43c6, 0x3d0e48ab, 0x3feea776, 0x3d407fe5, 0x3ff0e3b5, 
+  0x3d72b8d2, 0x3ff2f884, 
+  0x3da4f351, 0x3ff4e5df, 0x3dd72f44, 0x3ff6abc8, 0x3e096c8c, 0x3ff84a3b, 
+  0x3e3bab0b, 0x3ff9c139, 
+  0x3e6deaa0, 0x3ffb10c1, 0x3ea02b2d, 0x3ffc38d0, 0x3ed26c94, 0x3ffd3968, 
+  0x3f04aeb4, 0x3ffe1287, 
+  0x3f36f170, 0x3ffec42d, 0x3f6934a7, 0x3fff4e59, 0x3f9b783c, 0x3fffb10b, 
+  0x3fcdbc0e, 0x3fffec42 
+}; 
+ 
+ 
+/**  
+* \par 
+* Generation of realCoefBQ31 array:  
+* \par  
+*  n = 1024  
+* <pre>for (i = 0; i < n; i++)  
+* {  
+*    pBTable[2 * i] = 0.5 * (1.0 + sin (2 * PI / (double) (2 * n) * (double) i));  
+*    pBTable[2 * i + 1] = 0.5 * (1.0 * cos (2 * PI / (double) (2 * n) * (double) i));  
+* } </pre>  
+* \par  
+* Convert to fixed point Q31 format  
+*     round(pBTable[i] * pow(2, 31))  
+*  
+*/ 
+ 
+static const q31_t realCoefBQ31[2048] = { 
+  0x40000000, 0x40000000, 0x403243f1, 0x3fffec42, 0x406487c3, 0x3fffb10b, 
+  0x4096cb58, 0x3fff4e59, 
+  0x40c90e8f, 0x3ffec42d, 0x40fb514b, 0x3ffe1287, 0x412d936b, 0x3ffd3968, 
+  0x415fd4d2, 0x3ffc38d0, 
+  0x4192155f, 0x3ffb10c1, 0x41c454f4, 0x3ff9c139, 0x41f69373, 0x3ff84a3b, 
+  0x4228d0bb, 0x3ff6abc8, 
+  0x425b0cae, 0x3ff4e5df, 0x428d472d, 0x3ff2f884, 0x42bf801a, 0x3ff0e3b5, 
+  0x42f1b754, 0x3feea776, 
+  0x4323ecbe, 0x3fec43c6, 0x43562037, 0x3fe9b8a9, 0x438851a2, 0x3fe7061f, 
+  0x43ba80df, 0x3fe42c29, 
+  0x43ecadcf, 0x3fe12acb, 0x441ed853, 0x3fde0205, 0x4451004d, 0x3fdab1d9, 
+  0x4483259d, 0x3fd73a4a, 
+  0x44b54824, 0x3fd39b5a, 0x44e767c4, 0x3fcfd50a, 0x4519845e, 0x3fcbe75e, 
+  0x454b9dd2, 0x3fc7d257, 
+  0x457db402, 0x3fc395f9, 0x45afc6cf, 0x3fbf3245, 0x45e1d61a, 0x3fbaa73f, 
+  0x4613e1c4, 0x3fb5f4ea, 
+  0x4645e9af, 0x3fb11b47, 0x4677edba, 0x3fac1a5b, 0x46a9edc9, 0x3fa6f228, 
+  0x46dbe9bb, 0x3fa1a2b1, 
+  0x470de171, 0x3f9c2bfa, 0x473fd4ce, 0x3f968e07, 0x4771c3b2, 0x3f90c8d9, 
+  0x47a3adff, 0x3f8adc76, 
+  0x47d59395, 0x3f84c8e1, 0x48077456, 0x3f7e8e1e, 0x48395023, 0x3f782c2f, 
+  0x486b26de, 0x3f71a31a, 
+  0x489cf867, 0x3f6af2e3, 0x48cec4a0, 0x3f641b8d, 0x49008b6a, 0x3f5d1d1c, 
+  0x49324ca6, 0x3f55f796, 
+  0x49640837, 0x3f4eaafe, 0x4995bdfc, 0x3f473758, 0x49c76dd8, 0x3f3f9cab, 
+  0x49f917ab, 0x3f37daf9, 
+  0x4a2abb58, 0x3f2ff249, 0x4a5c58bf, 0x3f27e29f, 0x4a8defc2, 0x3f1fabff, 
+  0x4abf8043, 0x3f174e6f, 
+  0x4af10a22, 0x3f0ec9f4, 0x4b228d41, 0x3f061e94, 0x4b540982, 0x3efd4c53, 
+  0x4b857ec6, 0x3ef45338, 
+  0x4bb6ecef, 0x3eeb3347, 0x4be853dd, 0x3ee1ec86, 0x4c19b374, 0x3ed87efb, 
+  0x4c4b0b93, 0x3eceeaad, 
+  0x4c7c5c1e, 0x3ec52f9f, 0x4cada4f4, 0x3ebb4dda, 0x4cdee5f9, 0x3eb14562, 
+  0x4d101f0d, 0x3ea7163f, 
+  0x4d415012, 0x3e9cc076, 0x4d7278ea, 0x3e92440d, 0x4da39977, 0x3e87a10b, 
+  0x4dd4b19a, 0x3e7cd778, 
+  0x4e05c135, 0x3e71e758, 0x4e36c829, 0x3e66d0b4, 0x4e67c659, 0x3e5b9392, 
+  0x4e98bba6, 0x3e502ff8, 
+  0x4ec9a7f2, 0x3e44a5ee, 0x4efa8b1f, 0x3e38f57c, 0x4f2b650f, 0x3e2d1ea7, 
+  0x4f5c35a3, 0x3e212179, 
+  0x4f8cfcbd, 0x3e14fdf7, 0x4fbdba40, 0x3e08b429, 0x4fee6e0d, 0x3dfc4418, 
+  0x501f1806, 0x3defadca, 
+  0x504fb80e, 0x3de2f147, 0x50804e05, 0x3dd60e98, 0x50b0d9cf, 0x3dc905c4, 
+  0x50e15b4e, 0x3dbbd6d4, 
+  0x5111d262, 0x3dae81ce, 0x51423eef, 0x3da106bd, 0x5172a0d7, 0x3d9365a7, 
+  0x51a2f7fb, 0x3d859e96, 
+  0x51d3443f, 0x3d77b191, 0x52038583, 0x3d699ea2, 0x5233bbab, 0x3d5b65d1, 
+  0x5263e699, 0x3d4d0727, 
+  0x5294062e, 0x3d3e82ad, 0x52c41a4e, 0x3d2fd86c, 0x52f422da, 0x3d21086c, 
+  0x53241fb6, 0x3d1212b7, 
+  0x535410c2, 0x3d02f756, 0x5383f5e3, 0x3cf3b653, 0x53b3cefa, 0x3ce44fb6, 
+  0x53e39be9, 0x3cd4c38a, 
+  0x54135c94, 0x3cc511d8, 0x544310dc, 0x3cb53aaa, 0x5472b8a5, 0x3ca53e08, 
+  0x54a253d1, 0x3c951bff, 
+  0x54d1e242, 0x3c84d496, 0x550163dc, 0x3c7467d8, 0x5530d880, 0x3c63d5d0, 
+  0x55604012, 0x3c531e88, 
+  0x558f9a75, 0x3c424209, 0x55bee78b, 0x3c31405f, 0x55ee2737, 0x3c201994, 
+  0x561d595c, 0x3c0ecdb2, 
+  0x564c7ddd, 0x3bfd5cc4, 0x567b949c, 0x3bebc6d5, 0x56aa9d7d, 0x3bda0bef, 
+  0x56d99863, 0x3bc82c1e, 
+  0x57088530, 0x3bb6276d, 0x573763c9, 0x3ba3fde7, 0x5766340f, 0x3b91af96, 
+  0x5794f5e6, 0x3b7f3c87, 
+  0x57c3a931, 0x3b6ca4c4, 0x57f24dd3, 0x3b59e859, 0x5820e3b0, 0x3b470752, 
+  0x584f6aaa, 0x3b3401bb, 
+  0x587de2a6, 0x3b20d79e, 0x58ac4b86, 0x3b0d8908, 0x58daa52e, 0x3afa1605, 
+  0x5908ef81, 0x3ae67ea1, 
+  0x59372a63, 0x3ad2c2e7, 0x596555b7, 0x3abee2e5, 0x59937161, 0x3aaadea5, 
+  0x59c17d44, 0x3a96b636, 
+  0x59ef7943, 0x3a8269a2, 0x5a1d6543, 0x3a6df8f7, 0x5a4b4127, 0x3a596441, 
+  0x5a790cd3, 0x3a44ab8d, 
+  0x5aa6c82b, 0x3a2fcee8, 0x5ad47312, 0x3a1ace5e, 0x5b020d6c, 0x3a05a9fd, 
+  0x5b2f971d, 0x39f061d1, 
+  0x5b5d1009, 0x39daf5e8, 0x5b8a7814, 0x39c5664f, 0x5bb7cf23, 0x39afb313, 
+  0x5be51517, 0x3999dc41, 
+  0x5c1249d8, 0x3983e1e7, 0x5c3f6d47, 0x396dc414, 0x5c6c7f49, 0x395782d3, 
+  0x5c997fc3, 0x39411e33, 
+  0x5cc66e99, 0x392a9642, 0x5cf34bae, 0x3913eb0e, 0x5d2016e8, 0x38fd1ca4, 
+  0x5d4cd02b, 0x38e62b13, 
+  0x5d79775b, 0x38cf1669, 0x5da60c5c, 0x38b7deb3, 0x5dd28f14, 0x38a08402, 
+  0x5dfeff66, 0x38890662, 
+  0x5e2b5d38, 0x387165e3, 0x5e57a86d, 0x3859a292, 0x5e83e0ea, 0x3841bc7f, 
+  0x5eb00695, 0x3829b3b8, 
+  0x5edc1952, 0x3811884c, 0x5f081906, 0x37f93a4b, 0x5f340596, 0x37e0c9c2, 
+  0x5f5fdee6, 0x37c836c2, 
+  0x5f8ba4db, 0x37af8158, 0x5fb7575c, 0x3796a996, 0x5fe2f64b, 0x377daf89, 
+  0x600e8190, 0x37649341, 
+  0x6039f90e, 0x374b54ce, 0x60655cab, 0x3731f43f, 0x6090ac4d, 0x371871a4, 
+  0x60bbe7d8, 0x36fecd0d, 
+  0x60e70f32, 0x36e5068a, 0x61122240, 0x36cb1e29, 0x613d20e8, 0x36b113fd, 
+  0x61680b0f, 0x3696e813, 
+  0x6192e09a, 0x367c9a7d, 0x61bda170, 0x36622b4b, 0x61e84d76, 0x36479a8e, 
+  0x6212e491, 0x362ce854, 
+  0x623d66a8, 0x361214b0, 0x6267d39f, 0x35f71fb1, 0x62922b5e, 0x35dc0968, 
+  0x62bc6dc9, 0x35c0d1e6, 
+  0x62e69ac7, 0x35a5793c, 0x6310b23e, 0x3589ff7a, 0x633ab413, 0x356e64b2, 
+  0x6364a02e, 0x3552a8f4, 
+  0x638e7673, 0x3536cc52, 0x63b836c9, 0x351acedc, 0x63e1e117, 0x34feb0a5, 
+  0x640b7542, 0x34e271bd, 
+  0x6434f332, 0x34c61236, 0x645e5acc, 0x34a99221, 0x6487abf7, 0x348cf190, 
+  0x64b0e699, 0x34703094, 
+  0x64da0a99, 0x34534f40, 0x650317de, 0x34364da5, 0x652c0e4e, 0x34192bd5, 
+  0x6554edd0, 0x33fbe9e2, 
+  0x657db64b, 0x33de87de, 0x65a667a6, 0x33c105db, 0x65cf01c7, 0x33a363eb, 
+  0x65f78496, 0x3385a221, 
+  0x661feff9, 0x3367c08f, 0x664843d8, 0x3349bf48, 0x6670801a, 0x332b9e5d, 
+  0x6698a4a5, 0x330d5de2, 
+  0x66c0b162, 0x32eefde9, 0x66e8a637, 0x32d07e85, 0x6710830b, 0x32b1dfc9, 
+  0x673847c7, 0x329321c7, 
+  0x675ff452, 0x32744493, 0x67878893, 0x3255483f, 0x67af0471, 0x32362cdf, 
+  0x67d667d5, 0x3216f286, 
+  0x67fdb2a6, 0x31f79947, 0x6824e4cc, 0x31d82136, 0x684bfe2f, 0x31b88a66, 
+  0x6872feb6, 0x3198d4ea, 
+  0x6899e64a, 0x317900d6, 0x68c0b4d2, 0x31590e3d, 0x68e76a37, 0x3138fd34, 
+  0x690e0660, 0x3118cdce, 
+  0x69348937, 0x30f8801f, 0x695af2a2, 0x30d8143b, 0x6981428b, 0x30b78a35, 
+  0x69a778da, 0x3096e223, 
+  0x69cd9577, 0x30761c17, 0x69f3984b, 0x30553827, 0x6a19813e, 0x30343667, 
+  0x6a3f5039, 0x301316ea, 
+  0x6a650525, 0x2ff1d9c6, 0x6a8a9fea, 0x2fd07f0f, 0x6ab02071, 0x2faf06d9, 
+  0x6ad586a3, 0x2f8d7139, 
+  0x6afad269, 0x2f6bbe44, 0x6b2003ab, 0x2f49ee0f, 0x6b451a54, 0x2f2800ae, 
+  0x6b6a164c, 0x2f05f637, 
+  0x6b8ef77c, 0x2ee3cebe, 0x6bb3bdce, 0x2ec18a58, 0x6bd8692b, 0x2e9f291b, 
+  0x6bfcf97b, 0x2e7cab1c, 
+  0x6c216eaa, 0x2e5a106f, 0x6c45c89f, 0x2e37592c, 0x6c6a0746, 0x2e148566, 
+  0x6c8e2a86, 0x2df19533, 
+  0x6cb2324b, 0x2dce88a9, 0x6cd61e7e, 0x2dab5fde, 0x6cf9ef09, 0x2d881ae7, 
+  0x6d1da3d5, 0x2d64b9da, 
+  0x6d413ccc, 0x2d413ccc, 0x6d64b9da, 0x2d1da3d5, 0x6d881ae7, 0x2cf9ef09, 
+  0x6dab5fde, 0x2cd61e7e, 
+  0x6dce88a9, 0x2cb2324b, 0x6df19533, 0x2c8e2a86, 0x6e148566, 0x2c6a0746, 
+  0x6e37592c, 0x2c45c89f, 
+  0x6e5a106f, 0x2c216eaa, 0x6e7cab1c, 0x2bfcf97b, 0x6e9f291b, 0x2bd8692b, 
+  0x6ec18a58, 0x2bb3bdce, 
+  0x6ee3cebe, 0x2b8ef77c, 0x6f05f637, 0x2b6a164c, 0x6f2800ae, 0x2b451a54, 
+  0x6f49ee0f, 0x2b2003ab, 
+  0x6f6bbe44, 0x2afad269, 0x6f8d7139, 0x2ad586a3, 0x6faf06d9, 0x2ab02071, 
+  0x6fd07f0f, 0x2a8a9fea, 
+  0x6ff1d9c6, 0x2a650525, 0x701316ea, 0x2a3f5039, 0x70343667, 0x2a19813e, 
+  0x70553827, 0x29f3984b, 
+  0x70761c17, 0x29cd9577, 0x7096e223, 0x29a778da, 0x70b78a35, 0x2981428b, 
+  0x70d8143b, 0x295af2a2, 
+  0x70f8801f, 0x29348937, 0x7118cdce, 0x290e0660, 0x7138fd34, 0x28e76a37, 
+  0x71590e3d, 0x28c0b4d2, 
+  0x717900d6, 0x2899e64a, 0x7198d4ea, 0x2872feb6, 0x71b88a66, 0x284bfe2f, 
+  0x71d82136, 0x2824e4cc, 
+  0x71f79947, 0x27fdb2a6, 0x7216f286, 0x27d667d5, 0x72362cdf, 0x27af0471, 
+  0x7255483f, 0x27878893, 
+  0x72744493, 0x275ff452, 0x729321c7, 0x273847c7, 0x72b1dfc9, 0x2710830b, 
+  0x72d07e85, 0x26e8a637, 
+  0x72eefde9, 0x26c0b162, 0x730d5de2, 0x2698a4a5, 0x732b9e5d, 0x2670801a, 
+  0x7349bf48, 0x264843d8, 
+  0x7367c08f, 0x261feff9, 0x7385a221, 0x25f78496, 0x73a363eb, 0x25cf01c7, 
+  0x73c105db, 0x25a667a6, 
+  0x73de87de, 0x257db64b, 0x73fbe9e2, 0x2554edd0, 0x74192bd5, 0x252c0e4e, 
+  0x74364da5, 0x250317de, 
+  0x74534f40, 0x24da0a99, 0x74703094, 0x24b0e699, 0x748cf190, 0x2487abf7, 
+  0x74a99221, 0x245e5acc, 
+  0x74c61236, 0x2434f332, 0x74e271bd, 0x240b7542, 0x74feb0a5, 0x23e1e117, 
+  0x751acedc, 0x23b836c9, 
+  0x7536cc52, 0x238e7673, 0x7552a8f4, 0x2364a02e, 0x756e64b2, 0x233ab413, 
+  0x7589ff7a, 0x2310b23e, 
+  0x75a5793c, 0x22e69ac7, 0x75c0d1e6, 0x22bc6dc9, 0x75dc0968, 0x22922b5e, 
+  0x75f71fb1, 0x2267d39f, 
+  0x761214b0, 0x223d66a8, 0x762ce854, 0x2212e491, 0x76479a8e, 0x21e84d76, 
+  0x76622b4b, 0x21bda170, 
+  0x767c9a7d, 0x2192e09a, 0x7696e813, 0x21680b0f, 0x76b113fd, 0x213d20e8, 
+  0x76cb1e29, 0x21122240, 
+  0x76e5068a, 0x20e70f32, 0x76fecd0d, 0x20bbe7d8, 0x771871a4, 0x2090ac4d, 
+  0x7731f43f, 0x20655cab, 
+  0x774b54ce, 0x2039f90e, 0x77649341, 0x200e8190, 0x777daf89, 0x1fe2f64b, 
+  0x7796a996, 0x1fb7575c, 
+  0x77af8158, 0x1f8ba4db, 0x77c836c2, 0x1f5fdee6, 0x77e0c9c2, 0x1f340596, 
+  0x77f93a4b, 0x1f081906, 
+  0x7811884c, 0x1edc1952, 0x7829b3b8, 0x1eb00695, 0x7841bc7f, 0x1e83e0ea, 
+  0x7859a292, 0x1e57a86d, 
+  0x787165e3, 0x1e2b5d38, 0x78890662, 0x1dfeff66, 0x78a08402, 0x1dd28f14, 
+  0x78b7deb3, 0x1da60c5c, 
+  0x78cf1669, 0x1d79775b, 0x78e62b13, 0x1d4cd02b, 0x78fd1ca4, 0x1d2016e8, 
+  0x7913eb0e, 0x1cf34bae, 
+  0x792a9642, 0x1cc66e99, 0x79411e33, 0x1c997fc3, 0x795782d3, 0x1c6c7f49, 
+  0x796dc414, 0x1c3f6d47, 
+  0x7983e1e7, 0x1c1249d8, 0x7999dc41, 0x1be51517, 0x79afb313, 0x1bb7cf23, 
+  0x79c5664f, 0x1b8a7814, 
+  0x79daf5e8, 0x1b5d1009, 0x79f061d1, 0x1b2f971d, 0x7a05a9fd, 0x1b020d6c, 
+  0x7a1ace5e, 0x1ad47312, 
+  0x7a2fcee8, 0x1aa6c82b, 0x7a44ab8d, 0x1a790cd3, 0x7a596441, 0x1a4b4127, 
+  0x7a6df8f7, 0x1a1d6543, 
+  0x7a8269a2, 0x19ef7943, 0x7a96b636, 0x19c17d44, 0x7aaadea5, 0x19937161, 
+  0x7abee2e5, 0x196555b7, 
+  0x7ad2c2e7, 0x19372a63, 0x7ae67ea1, 0x1908ef81, 0x7afa1605, 0x18daa52e, 
+  0x7b0d8908, 0x18ac4b86, 
+  0x7b20d79e, 0x187de2a6, 0x7b3401bb, 0x184f6aaa, 0x7b470752, 0x1820e3b0, 
+  0x7b59e859, 0x17f24dd3, 
+  0x7b6ca4c4, 0x17c3a931, 0x7b7f3c87, 0x1794f5e6, 0x7b91af96, 0x1766340f, 
+  0x7ba3fde7, 0x173763c9, 
+  0x7bb6276d, 0x17088530, 0x7bc82c1e, 0x16d99863, 0x7bda0bef, 0x16aa9d7d, 
+  0x7bebc6d5, 0x167b949c, 
+  0x7bfd5cc4, 0x164c7ddd, 0x7c0ecdb2, 0x161d595c, 0x7c201994, 0x15ee2737, 
+  0x7c31405f, 0x15bee78b, 
+  0x7c424209, 0x158f9a75, 0x7c531e88, 0x15604012, 0x7c63d5d0, 0x1530d880, 
+  0x7c7467d8, 0x150163dc, 
+  0x7c84d496, 0x14d1e242, 0x7c951bff, 0x14a253d1, 0x7ca53e08, 0x1472b8a5, 
+  0x7cb53aaa, 0x144310dc, 
+  0x7cc511d8, 0x14135c94, 0x7cd4c38a, 0x13e39be9, 0x7ce44fb6, 0x13b3cefa, 
+  0x7cf3b653, 0x1383f5e3, 
+  0x7d02f756, 0x135410c2, 0x7d1212b7, 0x13241fb6, 0x7d21086c, 0x12f422da, 
+  0x7d2fd86c, 0x12c41a4e, 
+  0x7d3e82ad, 0x1294062e, 0x7d4d0727, 0x1263e699, 0x7d5b65d1, 0x1233bbab, 
+  0x7d699ea2, 0x12038583, 
+  0x7d77b191, 0x11d3443f, 0x7d859e96, 0x11a2f7fb, 0x7d9365a7, 0x1172a0d7, 
+  0x7da106bd, 0x11423eef, 
+  0x7dae81ce, 0x1111d262, 0x7dbbd6d4, 0x10e15b4e, 0x7dc905c4, 0x10b0d9cf, 
+  0x7dd60e98, 0x10804e05, 
+  0x7de2f147, 0x104fb80e, 0x7defadca, 0x101f1806, 0x7dfc4418, 0xfee6e0d, 
+  0x7e08b429, 0xfbdba40, 
+  0x7e14fdf7, 0xf8cfcbd, 0x7e212179, 0xf5c35a3, 0x7e2d1ea7, 0xf2b650f, 
+  0x7e38f57c, 0xefa8b1f, 
+  0x7e44a5ee, 0xec9a7f2, 0x7e502ff8, 0xe98bba6, 0x7e5b9392, 0xe67c659, 
+  0x7e66d0b4, 0xe36c829, 
+  0x7e71e758, 0xe05c135, 0x7e7cd778, 0xdd4b19a, 0x7e87a10b, 0xda39977, 
+  0x7e92440d, 0xd7278ea, 
+  0x7e9cc076, 0xd415012, 0x7ea7163f, 0xd101f0d, 0x7eb14562, 0xcdee5f9, 
+  0x7ebb4dda, 0xcada4f4, 
+  0x7ec52f9f, 0xc7c5c1e, 0x7eceeaad, 0xc4b0b93, 0x7ed87efb, 0xc19b374, 
+  0x7ee1ec86, 0xbe853dd, 
+  0x7eeb3347, 0xbb6ecef, 0x7ef45338, 0xb857ec6, 0x7efd4c53, 0xb540982, 
+  0x7f061e94, 0xb228d41, 
+  0x7f0ec9f4, 0xaf10a22, 0x7f174e6f, 0xabf8043, 0x7f1fabff, 0xa8defc2, 
+  0x7f27e29f, 0xa5c58bf, 
+  0x7f2ff249, 0xa2abb58, 0x7f37daf9, 0x9f917ab, 0x7f3f9cab, 0x9c76dd8, 
+  0x7f473758, 0x995bdfc, 
+  0x7f4eaafe, 0x9640837, 0x7f55f796, 0x9324ca6, 0x7f5d1d1c, 0x9008b6a, 
+  0x7f641b8d, 0x8cec4a0, 
+  0x7f6af2e3, 0x89cf867, 0x7f71a31a, 0x86b26de, 0x7f782c2f, 0x8395023, 
+  0x7f7e8e1e, 0x8077456, 
+  0x7f84c8e1, 0x7d59395, 0x7f8adc76, 0x7a3adff, 0x7f90c8d9, 0x771c3b2, 
+  0x7f968e07, 0x73fd4ce, 
+  0x7f9c2bfa, 0x70de171, 0x7fa1a2b1, 0x6dbe9bb, 0x7fa6f228, 0x6a9edc9, 
+  0x7fac1a5b, 0x677edba, 
+  0x7fb11b47, 0x645e9af, 0x7fb5f4ea, 0x613e1c4, 0x7fbaa73f, 0x5e1d61a, 
+  0x7fbf3245, 0x5afc6cf, 
+  0x7fc395f9, 0x57db402, 0x7fc7d257, 0x54b9dd2, 0x7fcbe75e, 0x519845e, 
+  0x7fcfd50a, 0x4e767c4, 
+  0x7fd39b5a, 0x4b54824, 0x7fd73a4a, 0x483259d, 0x7fdab1d9, 0x451004d, 
+  0x7fde0205, 0x41ed853, 
+  0x7fe12acb, 0x3ecadcf, 0x7fe42c29, 0x3ba80df, 0x7fe7061f, 0x38851a2, 
+  0x7fe9b8a9, 0x3562037, 
+  0x7fec43c6, 0x323ecbe, 0x7feea776, 0x2f1b754, 0x7ff0e3b5, 0x2bf801a, 
+  0x7ff2f884, 0x28d472d, 
+  0x7ff4e5df, 0x25b0cae, 0x7ff6abc8, 0x228d0bb, 0x7ff84a3b, 0x1f69373, 
+  0x7ff9c139, 0x1c454f4, 
+  0x7ffb10c1, 0x192155f, 0x7ffc38d0, 0x15fd4d2, 0x7ffd3968, 0x12d936b, 
+  0x7ffe1287, 0xfb514b, 
+  0x7ffec42d, 0xc90e8f, 0x7fff4e59, 0x96cb58, 0x7fffb10b, 0x6487c3, 
+  0x7fffec42, 0x3243f1, 
+  0x7fffffff, 0x0, 0x7fffec42, 0xffcdbc0f, 0x7fffb10b, 0xff9b783d, 0x7fff4e59, 
+  0xff6934a8, 
+  0x7ffec42d, 0xff36f171, 0x7ffe1287, 0xff04aeb5, 0x7ffd3968, 0xfed26c95, 
+  0x7ffc38d0, 0xfea02b2e, 
+  0x7ffb10c1, 0xfe6deaa1, 0x7ff9c139, 0xfe3bab0c, 0x7ff84a3b, 0xfe096c8d, 
+  0x7ff6abc8, 0xfdd72f45, 
+  0x7ff4e5df, 0xfda4f352, 0x7ff2f884, 0xfd72b8d3, 0x7ff0e3b5, 0xfd407fe6, 
+  0x7feea776, 0xfd0e48ac, 
+  0x7fec43c6, 0xfcdc1342, 0x7fe9b8a9, 0xfca9dfc9, 0x7fe7061f, 0xfc77ae5e, 
+  0x7fe42c29, 0xfc457f21, 
+  0x7fe12acb, 0xfc135231, 0x7fde0205, 0xfbe127ad, 0x7fdab1d9, 0xfbaeffb3, 
+  0x7fd73a4a, 0xfb7cda63, 
+  0x7fd39b5a, 0xfb4ab7dc, 0x7fcfd50a, 0xfb18983c, 0x7fcbe75e, 0xfae67ba2, 
+  0x7fc7d257, 0xfab4622e, 
+  0x7fc395f9, 0xfa824bfe, 0x7fbf3245, 0xfa503931, 0x7fbaa73f, 0xfa1e29e6, 
+  0x7fb5f4ea, 0xf9ec1e3c, 
+  0x7fb11b47, 0xf9ba1651, 0x7fac1a5b, 0xf9881246, 0x7fa6f228, 0xf9561237, 
+  0x7fa1a2b1, 0xf9241645, 
+  0x7f9c2bfa, 0xf8f21e8f, 0x7f968e07, 0xf8c02b32, 0x7f90c8d9, 0xf88e3c4e, 
+  0x7f8adc76, 0xf85c5201, 
+  0x7f84c8e1, 0xf82a6c6b, 0x7f7e8e1e, 0xf7f88baa, 0x7f782c2f, 0xf7c6afdd, 
+  0x7f71a31a, 0xf794d922, 
+  0x7f6af2e3, 0xf7630799, 0x7f641b8d, 0xf7313b60, 0x7f5d1d1c, 0xf6ff7496, 
+  0x7f55f796, 0xf6cdb35a, 
+  0x7f4eaafe, 0xf69bf7c9, 0x7f473758, 0xf66a4204, 0x7f3f9cab, 0xf6389228, 
+  0x7f37daf9, 0xf606e855, 
+  0x7f2ff249, 0xf5d544a8, 0x7f27e29f, 0xf5a3a741, 0x7f1fabff, 0xf572103e, 
+  0x7f174e6f, 0xf5407fbd, 
+  0x7f0ec9f4, 0xf50ef5de, 0x7f061e94, 0xf4dd72bf, 0x7efd4c53, 0xf4abf67e, 
+  0x7ef45338, 0xf47a813a, 
+  0x7eeb3347, 0xf4491311, 0x7ee1ec86, 0xf417ac23, 0x7ed87efb, 0xf3e64c8c, 
+  0x7eceeaad, 0xf3b4f46d, 
+  0x7ec52f9f, 0xf383a3e2, 0x7ebb4dda, 0xf3525b0c, 0x7eb14562, 0xf3211a07, 
+  0x7ea7163f, 0xf2efe0f3, 
+  0x7e9cc076, 0xf2beafee, 0x7e92440d, 0xf28d8716, 0x7e87a10b, 0xf25c6689, 
+  0x7e7cd778, 0xf22b4e66, 
+  0x7e71e758, 0xf1fa3ecb, 0x7e66d0b4, 0xf1c937d7, 0x7e5b9392, 0xf19839a7, 
+  0x7e502ff8, 0xf167445a, 
+  0x7e44a5ee, 0xf136580e, 0x7e38f57c, 0xf10574e1, 0x7e2d1ea7, 0xf0d49af1, 
+  0x7e212179, 0xf0a3ca5d, 
+  0x7e14fdf7, 0xf0730343, 0x7e08b429, 0xf04245c0, 0x7dfc4418, 0xf01191f3, 
+  0x7defadca, 0xefe0e7fa, 
+  0x7de2f147, 0xefb047f2, 0x7dd60e98, 0xef7fb1fb, 0x7dc905c4, 0xef4f2631, 
+  0x7dbbd6d4, 0xef1ea4b2, 
+  0x7dae81ce, 0xeeee2d9e, 0x7da106bd, 0xeebdc111, 0x7d9365a7, 0xee8d5f29, 
+  0x7d859e96, 0xee5d0805, 
+  0x7d77b191, 0xee2cbbc1, 0x7d699ea2, 0xedfc7a7d, 0x7d5b65d1, 0xedcc4455, 
+  0x7d4d0727, 0xed9c1967, 
+  0x7d3e82ad, 0xed6bf9d2, 0x7d2fd86c, 0xed3be5b2, 0x7d21086c, 0xed0bdd26, 
+  0x7d1212b7, 0xecdbe04a, 
+  0x7d02f756, 0xecabef3e, 0x7cf3b653, 0xec7c0a1d, 0x7ce44fb6, 0xec4c3106, 
+  0x7cd4c38a, 0xec1c6417, 
+  0x7cc511d8, 0xebeca36c, 0x7cb53aaa, 0xebbcef24, 0x7ca53e08, 0xeb8d475b, 
+  0x7c951bff, 0xeb5dac2f, 
+  0x7c84d496, 0xeb2e1dbe, 0x7c7467d8, 0xeafe9c24, 0x7c63d5d0, 0xeacf2780, 
+  0x7c531e88, 0xea9fbfee, 
+  0x7c424209, 0xea70658b, 0x7c31405f, 0xea411875, 0x7c201994, 0xea11d8c9, 
+  0x7c0ecdb2, 0xe9e2a6a4, 
+  0x7bfd5cc4, 0xe9b38223, 0x7bebc6d5, 0xe9846b64, 0x7bda0bef, 0xe9556283, 
+  0x7bc82c1e, 0xe926679d, 
+  0x7bb6276d, 0xe8f77ad0, 0x7ba3fde7, 0xe8c89c37, 0x7b91af96, 0xe899cbf1, 
+  0x7b7f3c87, 0xe86b0a1a, 
+  0x7b6ca4c4, 0xe83c56cf, 0x7b59e859, 0xe80db22d, 0x7b470752, 0xe7df1c50, 
+  0x7b3401bb, 0xe7b09556, 
+  0x7b20d79e, 0xe7821d5a, 0x7b0d8908, 0xe753b47a, 0x7afa1605, 0xe7255ad2, 
+  0x7ae67ea1, 0xe6f7107f, 
+  0x7ad2c2e7, 0xe6c8d59d, 0x7abee2e5, 0xe69aaa49, 0x7aaadea5, 0xe66c8e9f, 
+  0x7a96b636, 0xe63e82bc, 
+  0x7a8269a2, 0xe61086bd, 0x7a6df8f7, 0xe5e29abd, 0x7a596441, 0xe5b4bed9, 
+  0x7a44ab8d, 0xe586f32d, 
+  0x7a2fcee8, 0xe55937d5, 0x7a1ace5e, 0xe52b8cee, 0x7a05a9fd, 0xe4fdf294, 
+  0x79f061d1, 0xe4d068e3, 
+  0x79daf5e8, 0xe4a2eff7, 0x79c5664f, 0xe47587ec, 0x79afb313, 0xe44830dd, 
+  0x7999dc41, 0xe41aeae9, 
+  0x7983e1e7, 0xe3edb628, 0x796dc414, 0xe3c092b9, 0x795782d3, 0xe39380b7, 
+  0x79411e33, 0xe366803d, 
+  0x792a9642, 0xe3399167, 0x7913eb0e, 0xe30cb452, 0x78fd1ca4, 0xe2dfe918, 
+  0x78e62b13, 0xe2b32fd5, 
+  0x78cf1669, 0xe28688a5, 0x78b7deb3, 0xe259f3a4, 0x78a08402, 0xe22d70ec, 
+  0x78890662, 0xe201009a, 
+  0x787165e3, 0xe1d4a2c8, 0x7859a292, 0xe1a85793, 0x7841bc7f, 0xe17c1f16, 
+  0x7829b3b8, 0xe14ff96b, 
+  0x7811884c, 0xe123e6ae, 0x77f93a4b, 0xe0f7e6fa, 0x77e0c9c2, 0xe0cbfa6a, 
+  0x77c836c2, 0xe0a0211a, 
+  0x77af8158, 0xe0745b25, 0x7796a996, 0xe048a8a4, 0x777daf89, 0xe01d09b5, 
+  0x77649341, 0xdff17e70, 
+  0x774b54ce, 0xdfc606f2, 0x7731f43f, 0xdf9aa355, 0x771871a4, 0xdf6f53b3, 
+  0x76fecd0d, 0xdf441828, 
+  0x76e5068a, 0xdf18f0ce, 0x76cb1e29, 0xdeedddc0, 0x76b113fd, 0xdec2df18, 
+  0x7696e813, 0xde97f4f1, 
+  0x767c9a7d, 0xde6d1f66, 0x76622b4b, 0xde425e90, 0x76479a8e, 0xde17b28a, 
+  0x762ce854, 0xdded1b6f, 
+  0x761214b0, 0xddc29958, 0x75f71fb1, 0xdd982c61, 0x75dc0968, 0xdd6dd4a2, 
+  0x75c0d1e6, 0xdd439237, 
+  0x75a5793c, 0xdd196539, 0x7589ff7a, 0xdcef4dc2, 0x756e64b2, 0xdcc54bed, 
+  0x7552a8f4, 0xdc9b5fd2, 
+  0x7536cc52, 0xdc71898d, 0x751acedc, 0xdc47c937, 0x74feb0a5, 0xdc1e1ee9, 
+  0x74e271bd, 0xdbf48abe, 
+  0x74c61236, 0xdbcb0cce, 0x74a99221, 0xdba1a534, 0x748cf190, 0xdb785409, 
+  0x74703094, 0xdb4f1967, 
+  0x74534f40, 0xdb25f567, 0x74364da5, 0xdafce822, 0x74192bd5, 0xdad3f1b2, 
+  0x73fbe9e2, 0xdaab1230, 
+  0x73de87de, 0xda8249b5, 0x73c105db, 0xda59985a, 0x73a363eb, 0xda30fe39, 
+  0x7385a221, 0xda087b6a, 
+  0x7367c08f, 0xd9e01007, 0x7349bf48, 0xd9b7bc28, 0x732b9e5d, 0xd98f7fe6, 
+  0x730d5de2, 0xd9675b5b, 
+  0x72eefde9, 0xd93f4e9e, 0x72d07e85, 0xd91759c9, 0x72b1dfc9, 0xd8ef7cf5, 
+  0x729321c7, 0xd8c7b839, 
+  0x72744493, 0xd8a00bae, 0x7255483f, 0xd878776d, 0x72362cdf, 0xd850fb8f, 
+  0x7216f286, 0xd829982b, 
+  0x71f79947, 0xd8024d5a, 0x71d82136, 0xd7db1b34, 0x71b88a66, 0xd7b401d1, 
+  0x7198d4ea, 0xd78d014a, 
+  0x717900d6, 0xd76619b6, 0x71590e3d, 0xd73f4b2e, 0x7138fd34, 0xd71895c9, 
+  0x7118cdce, 0xd6f1f9a0, 
+  0x70f8801f, 0xd6cb76c9, 0x70d8143b, 0xd6a50d5e, 0x70b78a35, 0xd67ebd75, 
+  0x7096e223, 0xd6588726, 
+  0x70761c17, 0xd6326a89, 0x70553827, 0xd60c67b5, 0x70343667, 0xd5e67ec2, 
+  0x701316ea, 0xd5c0afc7, 
+  0x6ff1d9c6, 0xd59afadb, 0x6fd07f0f, 0xd5756016, 0x6faf06d9, 0xd54fdf8f, 
+  0x6f8d7139, 0xd52a795d, 
+  0x6f6bbe44, 0xd5052d97, 0x6f49ee0f, 0xd4dffc55, 0x6f2800ae, 0xd4bae5ac, 
+  0x6f05f637, 0xd495e9b4, 
+  0x6ee3cebe, 0xd4710884, 0x6ec18a58, 0xd44c4232, 0x6e9f291b, 0xd42796d5, 
+  0x6e7cab1c, 0xd4030685, 
+  0x6e5a106f, 0xd3de9156, 0x6e37592c, 0xd3ba3761, 0x6e148566, 0xd395f8ba, 
+  0x6df19533, 0xd371d57a, 
+  0x6dce88a9, 0xd34dcdb5, 0x6dab5fde, 0xd329e182, 0x6d881ae7, 0xd30610f7, 
+  0x6d64b9da, 0xd2e25c2b, 
+  0x6d413ccc, 0xd2bec334, 0x6d1da3d5, 0xd29b4626, 0x6cf9ef09, 0xd277e519, 
+  0x6cd61e7e, 0xd254a022, 
+  0x6cb2324b, 0xd2317757, 0x6c8e2a86, 0xd20e6acd, 0x6c6a0746, 0xd1eb7a9a, 
+  0x6c45c89f, 0xd1c8a6d4, 
+  0x6c216eaa, 0xd1a5ef91, 0x6bfcf97b, 0xd18354e4, 0x6bd8692b, 0xd160d6e5, 
+  0x6bb3bdce, 0xd13e75a8, 
+  0x6b8ef77c, 0xd11c3142, 0x6b6a164c, 0xd0fa09c9, 0x6b451a54, 0xd0d7ff52, 
+  0x6b2003ab, 0xd0b611f1, 
+  0x6afad269, 0xd09441bc, 0x6ad586a3, 0xd0728ec7, 0x6ab02071, 0xd050f927, 
+  0x6a8a9fea, 0xd02f80f1, 
+  0x6a650525, 0xd00e263a, 0x6a3f5039, 0xcfece916, 0x6a19813e, 0xcfcbc999, 
+  0x69f3984b, 0xcfaac7d9, 
+  0x69cd9577, 0xcf89e3e9, 0x69a778da, 0xcf691ddd, 0x6981428b, 0xcf4875cb, 
+  0x695af2a2, 0xcf27ebc5, 
+  0x69348937, 0xcf077fe1, 0x690e0660, 0xcee73232, 0x68e76a37, 0xcec702cc, 
+  0x68c0b4d2, 0xcea6f1c3, 
+  0x6899e64a, 0xce86ff2a, 0x6872feb6, 0xce672b16, 0x684bfe2f, 0xce47759a, 
+  0x6824e4cc, 0xce27deca, 
+  0x67fdb2a6, 0xce0866b9, 0x67d667d5, 0xcde90d7a, 0x67af0471, 0xcdc9d321, 
+  0x67878893, 0xcdaab7c1, 
+  0x675ff452, 0xcd8bbb6d, 0x673847c7, 0xcd6cde39, 0x6710830b, 0xcd4e2037, 
+  0x66e8a637, 0xcd2f817b, 
+  0x66c0b162, 0xcd110217, 0x6698a4a5, 0xccf2a21e, 0x6670801a, 0xccd461a3, 
+  0x664843d8, 0xccb640b8, 
+  0x661feff9, 0xcc983f71, 0x65f78496, 0xcc7a5ddf, 0x65cf01c7, 0xcc5c9c15, 
+  0x65a667a6, 0xcc3efa25, 
+  0x657db64b, 0xcc217822, 0x6554edd0, 0xcc04161e, 0x652c0e4e, 0xcbe6d42b, 
+  0x650317de, 0xcbc9b25b, 
+  0x64da0a99, 0xcbacb0c0, 0x64b0e699, 0xcb8fcf6c, 0x6487abf7, 0xcb730e70, 
+  0x645e5acc, 0xcb566ddf, 
+  0x6434f332, 0xcb39edca, 0x640b7542, 0xcb1d8e43, 0x63e1e117, 0xcb014f5b, 
+  0x63b836c9, 0xcae53124, 
+  0x638e7673, 0xcac933ae, 0x6364a02e, 0xcaad570c, 0x633ab413, 0xca919b4e, 
+  0x6310b23e, 0xca760086, 
+  0x62e69ac7, 0xca5a86c4, 0x62bc6dc9, 0xca3f2e1a, 0x62922b5e, 0xca23f698, 
+  0x6267d39f, 0xca08e04f, 
+  0x623d66a8, 0xc9edeb50, 0x6212e491, 0xc9d317ac, 0x61e84d76, 0xc9b86572, 
+  0x61bda170, 0xc99dd4b5, 
+  0x6192e09a, 0xc9836583, 0x61680b0f, 0xc96917ed, 0x613d20e8, 0xc94eec03, 
+  0x61122240, 0xc934e1d7, 
+  0x60e70f32, 0xc91af976, 0x60bbe7d8, 0xc90132f3, 0x6090ac4d, 0xc8e78e5c, 
+  0x60655cab, 0xc8ce0bc1, 
+  0x6039f90e, 0xc8b4ab32, 0x600e8190, 0xc89b6cbf, 0x5fe2f64b, 0xc8825077, 
+  0x5fb7575c, 0xc869566a, 
+  0x5f8ba4db, 0xc8507ea8, 0x5f5fdee6, 0xc837c93e, 0x5f340596, 0xc81f363e, 
+  0x5f081906, 0xc806c5b5, 
+  0x5edc1952, 0xc7ee77b4, 0x5eb00695, 0xc7d64c48, 0x5e83e0ea, 0xc7be4381, 
+  0x5e57a86d, 0xc7a65d6e, 
+  0x5e2b5d38, 0xc78e9a1d, 0x5dfeff66, 0xc776f99e, 0x5dd28f14, 0xc75f7bfe, 
+  0x5da60c5c, 0xc748214d, 
+  0x5d79775b, 0xc730e997, 0x5d4cd02b, 0xc719d4ed, 0x5d2016e8, 0xc702e35c, 
+  0x5cf34bae, 0xc6ec14f2, 
+  0x5cc66e99, 0xc6d569be, 0x5c997fc3, 0xc6bee1cd, 0x5c6c7f49, 0xc6a87d2d, 
+  0x5c3f6d47, 0xc6923bec, 
+  0x5c1249d8, 0xc67c1e19, 0x5be51517, 0xc66623bf, 0x5bb7cf23, 0xc6504ced, 
+  0x5b8a7814, 0xc63a99b1, 
+  0x5b5d1009, 0xc6250a18, 0x5b2f971d, 0xc60f9e2f, 0x5b020d6c, 0xc5fa5603, 
+  0x5ad47312, 0xc5e531a2, 
+  0x5aa6c82b, 0xc5d03118, 0x5a790cd3, 0xc5bb5473, 0x5a4b4127, 0xc5a69bbf, 
+  0x5a1d6543, 0xc5920709, 
+  0x59ef7943, 0xc57d965e, 0x59c17d44, 0xc56949ca, 0x59937161, 0xc555215b, 
+  0x596555b7, 0xc5411d1b, 
+  0x59372a63, 0xc52d3d19, 0x5908ef81, 0xc519815f, 0x58daa52e, 0xc505e9fb, 
+  0x58ac4b86, 0xc4f276f8, 
+  0x587de2a6, 0xc4df2862, 0x584f6aaa, 0xc4cbfe45, 0x5820e3b0, 0xc4b8f8ae, 
+  0x57f24dd3, 0xc4a617a7, 
+  0x57c3a931, 0xc4935b3c, 0x5794f5e6, 0xc480c379, 0x5766340f, 0xc46e506a, 
+  0x573763c9, 0xc45c0219, 
+  0x57088530, 0xc449d893, 0x56d99863, 0xc437d3e2, 0x56aa9d7d, 0xc425f411, 
+  0x567b949c, 0xc414392b, 
+  0x564c7ddd, 0xc402a33c, 0x561d595c, 0xc3f1324e, 0x55ee2737, 0xc3dfe66c, 
+  0x55bee78b, 0xc3cebfa1, 
+  0x558f9a75, 0xc3bdbdf7, 0x55604012, 0xc3ace178, 0x5530d880, 0xc39c2a30, 
+  0x550163dc, 0xc38b9828, 
+  0x54d1e242, 0xc37b2b6a, 0x54a253d1, 0xc36ae401, 0x5472b8a5, 0xc35ac1f8, 
+  0x544310dc, 0xc34ac556, 
+  0x54135c94, 0xc33aee28, 0x53e39be9, 0xc32b3c76, 0x53b3cefa, 0xc31bb04a, 
+  0x5383f5e3, 0xc30c49ad, 
+  0x535410c2, 0xc2fd08aa, 0x53241fb6, 0xc2eded49, 0x52f422da, 0xc2def794, 
+  0x52c41a4e, 0xc2d02794, 
+  0x5294062e, 0xc2c17d53, 0x5263e699, 0xc2b2f8d9, 0x5233bbab, 0xc2a49a2f, 
+  0x52038583, 0xc296615e, 
+  0x51d3443f, 0xc2884e6f, 0x51a2f7fb, 0xc27a616a, 0x5172a0d7, 0xc26c9a59, 
+  0x51423eef, 0xc25ef943, 
+  0x5111d262, 0xc2517e32, 0x50e15b4e, 0xc244292c, 0x50b0d9cf, 0xc236fa3c, 
+  0x50804e05, 0xc229f168, 
+  0x504fb80e, 0xc21d0eb9, 0x501f1806, 0xc2105236, 0x4fee6e0d, 0xc203bbe8, 
+  0x4fbdba40, 0xc1f74bd7, 
+  0x4f8cfcbd, 0xc1eb0209, 0x4f5c35a3, 0xc1dede87, 0x4f2b650f, 0xc1d2e159, 
+  0x4efa8b1f, 0xc1c70a84, 
+  0x4ec9a7f2, 0xc1bb5a12, 0x4e98bba6, 0xc1afd008, 0x4e67c659, 0xc1a46c6e, 
+  0x4e36c829, 0xc1992f4c, 
+  0x4e05c135, 0xc18e18a8, 0x4dd4b19a, 0xc1832888, 0x4da39977, 0xc1785ef5, 
+  0x4d7278ea, 0xc16dbbf3, 
+  0x4d415012, 0xc1633f8a, 0x4d101f0d, 0xc158e9c1, 0x4cdee5f9, 0xc14eba9e, 
+  0x4cada4f4, 0xc144b226, 
+  0x4c7c5c1e, 0xc13ad061, 0x4c4b0b93, 0xc1311553, 0x4c19b374, 0xc1278105, 
+  0x4be853dd, 0xc11e137a, 
+  0x4bb6ecef, 0xc114ccb9, 0x4b857ec6, 0xc10bacc8, 0x4b540982, 0xc102b3ad, 
+  0x4b228d41, 0xc0f9e16c, 
+  0x4af10a22, 0xc0f1360c, 0x4abf8043, 0xc0e8b191, 0x4a8defc2, 0xc0e05401, 
+  0x4a5c58bf, 0xc0d81d61, 
+  0x4a2abb58, 0xc0d00db7, 0x49f917ab, 0xc0c82507, 0x49c76dd8, 0xc0c06355, 
+  0x4995bdfc, 0xc0b8c8a8, 
+  0x49640837, 0xc0b15502, 0x49324ca6, 0xc0aa086a, 0x49008b6a, 0xc0a2e2e4, 
+  0x48cec4a0, 0xc09be473, 
+  0x489cf867, 0xc0950d1d, 0x486b26de, 0xc08e5ce6, 0x48395023, 0xc087d3d1, 
+  0x48077456, 0xc08171e2, 
+  0x47d59395, 0xc07b371f, 0x47a3adff, 0xc075238a, 0x4771c3b2, 0xc06f3727, 
+  0x473fd4ce, 0xc06971f9, 
+  0x470de171, 0xc063d406, 0x46dbe9bb, 0xc05e5d4f, 0x46a9edc9, 0xc0590dd8, 
+  0x4677edba, 0xc053e5a5, 
+  0x4645e9af, 0xc04ee4b9, 0x4613e1c4, 0xc04a0b16, 0x45e1d61a, 0xc04558c1, 
+  0x45afc6cf, 0xc040cdbb, 
+  0x457db402, 0xc03c6a07, 0x454b9dd2, 0xc0382da9, 0x4519845e, 0xc03418a2, 
+  0x44e767c4, 0xc0302af6, 
+  0x44b54824, 0xc02c64a6, 0x4483259d, 0xc028c5b6, 0x4451004d, 0xc0254e27, 
+  0x441ed853, 0xc021fdfb, 
+  0x43ecadcf, 0xc01ed535, 0x43ba80df, 0xc01bd3d7, 0x438851a2, 0xc018f9e1, 
+  0x43562037, 0xc0164757, 
+  0x4323ecbe, 0xc013bc3a, 0x42f1b754, 0xc011588a, 0x42bf801a, 0xc00f1c4b, 
+  0x428d472d, 0xc00d077c, 
+  0x425b0cae, 0xc00b1a21, 0x4228d0bb, 0xc0095438, 0x41f69373, 0xc007b5c5, 
+  0x41c454f4, 0xc0063ec7, 
+  0x4192155f, 0xc004ef3f, 0x415fd4d2, 0xc003c730, 0x412d936b, 0xc002c698, 
+  0x40fb514b, 0xc001ed79, 
+  0x40c90e8f, 0xc0013bd3, 0x4096cb58, 0xc000b1a7, 0x406487c3, 0xc0004ef5, 
+  0x403243f1, 0xc00013be 
+}; 
+ 
+/**  
+* @brief  Initialization function for the Q31 RFFT/RIFFT. 
+* @param[in, out] *S             points to an instance of the Q31 RFFT/RIFFT structure. 
+* @param[in, out] *S_CFFT        points to an instance of the Q31 CFFT/CIFFT structure. 
+* @param[in]      fftLenReal     length of the FFT. 
+* @param[in]      ifftFlagR      flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. 
+* @param[in]      bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. 
+* @return		The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported value. 
+*  
+* \par Description: 
+* \par 
+* The parameter <code>fftLenReal</code>	Specifies length of RFFT/RIFFT Process. Supported FFT Lengths are 128, 512, 2048.  
+* \par  
+* The parameter <code>ifftFlagR</code> controls whether a forward or inverse transform is computed.  
+* Set(=1) ifftFlagR to calculate RIFFT, otherwise RFFT is calculated.  
+* \par  
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.  
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order. 
+* \par  
+* This function also initializes Twiddle factor table.  
+*/ 
+ 
+arm_status arm_rfft_init_q31( 
+  arm_rfft_instance_q31 * S, 
+  arm_cfft_radix4_instance_q31 * S_CFFT, 
+  uint32_t fftLenReal, 
+  uint32_t ifftFlagR, 
+  uint32_t bitReverseFlag) 
+{ 
+  /*  Initialise the default arm status */ 
+  arm_status status = ARM_MATH_SUCCESS; 
+ 
+  /*  Initialize the Real FFT length */ 
+  S->fftLenReal = (uint16_t) fftLenReal; 
+ 
+  /*  Initialize the Complex FFT length */ 
+  S->fftLenBy2 = (uint16_t) fftLenReal / 2u; 
+ 
+  /*  Initialize the Twiddle coefficientA pointer */ 
+  S->pTwiddleAReal = (q31_t *) realCoefAQ31; 
+ 
+  /*  Initialize the Twiddle coefficientB pointer */ 
+  S->pTwiddleBReal = (q31_t *) realCoefBQ31; 
+ 
+  /*  Initialize the Flag for selection of RFFT or RIFFT */ 
+  S->ifftFlagR = (uint8_t) ifftFlagR; 
+ 
+  /*  Initialize the Flag for calculation Bit reversal or not */ 
+  S->bitReverseFlagR = (uint8_t) bitReverseFlag; 
+ 
+  /*  Initialization of coef modifier depending on the FFT length */ 
+  switch (S->fftLenReal) 
+  { 
+  case 2048u: 
+    S->twidCoefRModifier = 1u; 
+    break; 
+  case 512u: 
+    S->twidCoefRModifier = 4u; 
+    break; 
+  case 128u: 
+    S->twidCoefRModifier = 16u; 
+    break; 
+  default: 
+    /*  Reporting argument error if rfftSize is not valid value */ 
+    status = ARM_MATH_ARGUMENT_ERROR; 
+    break; 
+  } 
+ 
+  /* Init Complex FFT Instance */ 
+  S->pCfft = S_CFFT; 
+ 
+  if(S->ifftFlagR) 
+  { 
+    /* Initializes the CIFFT Module for fftLenreal/2 length */ 
+    arm_cfft_radix4_init_q31(S->pCfft, (uint16_t) S->fftLenBy2, 1u, 1u); 
+  } 
+  else 
+  { 
+    /* Initializes the CFFT Module for fftLenreal/2 length */ 
+    arm_cfft_radix4_init_q31(S->pCfft, (uint16_t) S->fftLenBy2, 0u, 1u); 
+  } 
+ 
+  /* return the status of RFFT Init function */ 
+  return (status); 
+ 
+} 
+ 
+  /**  
+   * @} end of RFFT_RIFFT group  
+   */

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_rfft_q15.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,292 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_rfft_q15.c  
+*  
+* Description:	RFFT & RIFFT Q15 process function  
+*  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+ 
+#include "arm_math.h" 
+ 
+/*--------------------------------------------------------------------  
+*		Internal functions prototypes  
+--------------------------------------------------------------------*/ 
+ 
+void arm_split_rfft_q15( 
+  q15_t * pSrc, 
+  uint32_t fftLen, 
+  q15_t * pATable, 
+  q15_t * pBTable, 
+  q15_t * pDst, 
+  uint32_t modifier); 
+ 
+void arm_split_rifft_q15( 
+  q15_t * pSrc, 
+  uint32_t fftLen, 
+  q15_t * pATable, 
+  q15_t * pBTable, 
+  q15_t * pDst, 
+  uint32_t modifier); 
+ 
+/**  
+ * @addtogroup RFFT_RIFFT  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q15 RFFT/RIFFT. 
+ * @param[in]  *S    points to an instance of the Q15 RFFT/RIFFT structure. 
+ * @param[in]  *pSrc points to the input buffer. 
+ * @param[out] *pDst points to the output buffer. 
+ * @return none. 
+ *  
+ * \par Input an output formats: 
+ * \par  
+ * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.  
+ * Hence the output format is different for different RFFT sizes.  
+ * The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT: 
+ * \par  
+ * \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"  
+ * \par  
+ * \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"  
+ */ 
+ 
+void arm_rfft_q15( 
+  const arm_rfft_instance_q15 * S, 
+  q15_t * pSrc, 
+  q15_t * pDst) 
+{ 
+  const arm_cfft_radix4_instance_q15 *S_CFFT = S->pCfft; 
+ 
+  /* Calculation of RIFFT of input */ 
+  if(S->ifftFlagR == 1u) 
+  { 
+    /*  Real IFFT core process */ 
+    arm_split_rifft_q15(pSrc, S->fftLenBy2, S->pTwiddleAReal, 
+                        S->pTwiddleBReal, pDst, S->twidCoefRModifier); 
+ 
+    /* Complex readix-4 IFFT process */ 
+    arm_radix4_butterfly_inverse_q15(pDst, S_CFFT->fftLen, 
+                                     S_CFFT->pTwiddle, 
+                                     S_CFFT->twidCoefModifier); 
+ 
+    /* Bit reversal process */ 
+    if(S->bitReverseFlagR == 1u) 
+    { 
+      arm_bitreversal_q15(pDst, S_CFFT->fftLen, 
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable); 
+    } 
+  } 
+  else 
+  { 
+    /* Calculation of RFFT of input */ 
+ 
+    /* Complex readix-4 FFT process */ 
+    arm_radix4_butterfly_q15(pSrc, S_CFFT->fftLen, 
+                             S_CFFT->pTwiddle, S_CFFT->twidCoefModifier); 
+ 
+    /* Bit reversal process */ 
+    if(S->bitReverseFlagR == 1u) 
+    { 
+      arm_bitreversal_q15(pSrc, S_CFFT->fftLen, 
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable); 
+    } 
+ 
+    arm_split_rfft_q15(pSrc, S->fftLenBy2, S->pTwiddleAReal, 
+                       S->pTwiddleBReal, pDst, S->twidCoefRModifier); 
+  } 
+ 
+} 
+ 
+  /**  
+   * @} end of RFFT_RIFFT group  
+   */ 
+ 
+/**  
+ * @brief  Core Real FFT process  
+ * @param  *pSrc 				points to the input buffer. 
+ * @param  fftLen  				length of FFT. 
+ * @param  *pATable 			points to the A twiddle Coef buffer.  
+ * @param  *pBTable 			points to the B twiddle Coef buffer. 
+ * @param  *pDst 				points to the output buffer. 
+ * @param  modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
+ * @return none.  
+ * The function implements a Real FFT  
+ */ 
+ 
+void arm_split_rfft_q15( 
+  q15_t * pSrc, 
+  uint32_t fftLen, 
+  q15_t * pATable, 
+  q15_t * pBTable, 
+  q15_t * pDst, 
+  uint32_t modifier) 
+{ 
+  uint32_t i;                                    /* Loop Counter */ 
+  q31_t outR, outI;                              /* Temporary variables for output */ 
+  q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */ 
+  q15_t *pSrc1, *pSrc2; 
+ 
+ 
+  pSrc[2u * fftLen] = pSrc[0]; 
+  pSrc[(2u * fftLen) + 1u] = pSrc[1]; 
+ 
+  pCoefA = &pATable[modifier * 2u]; 
+  pCoefB = &pBTable[modifier * 2u]; 
+ 
+  pSrc1 = &pSrc[2]; 
+  pSrc2 = &pSrc[(2u * fftLen) - 2u]; 
+ 
+  i = 1u; 
+ 
+  while(i < fftLen) 
+  { 
+    /*  
+       outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]  
+       + pSrc[2 * n - 2 * i] * pBTable[2 * i] +  
+       pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);  
+     */ 
+ 
+    /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +  
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */ 
+ 
+    /* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */ 
+    outR = __SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA)); 
+ 
+    /* pSrc[2 * n - 2 * i] * pBTable[2 * i] +  
+       pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */ 
+    outR = __SMLAD(*__SIMD32(pSrc2), *__SIMD32(pCoefB), outR) >> 15u; 
+ 
+    /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */ 
+    outI = __SMUSDX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB)); 
+ 
+    /* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */ 
+    outI = __SMLADX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), outI); 
+ 
+    /* write output */ 
+    pDst[2u * i] = (q15_t) outR; 
+    pDst[(2u * i) + 1u] = outI >> 15u; 
+ 
+    /* write complex conjugate output */ 
+    pDst[(4u * fftLen) - (2u * i)] = (q15_t) outR; 
+    pDst[((4u * fftLen) - (2u * i)) + 1u] = -(outI >> 15u); 
+ 
+    /* update coefficient pointer */ 
+    pCoefB = pCoefB + (2u * modifier); 
+    pCoefA = pCoefA + (2u * modifier); 
+ 
+    i++; 
+ 
+  } 
+ 
+  pDst[2u * fftLen] = pSrc[0] - pSrc[1]; 
+  pDst[(2u * fftLen) + 1u] = 0; 
+ 
+  pDst[0] = pSrc[0] + pSrc[1]; 
+  pDst[1] = 0; 
+ 
+} 
+ 
+ 
+/**  
+ * @brief  Core Real IFFT process  
+ * @param[in]   *pSrc 				points to the input buffer.  
+ * @param[in]   fftLen  		    length of FFT. 
+ * @param[in]   *pATable 			points to the twiddle Coef A buffer. 
+ * @param[in]   *pBTable 			points to the twiddle Coef B buffer.  
+ * @param[out]  *pDst 				points to the output buffer. 
+ * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
+ * @return none.  
+ * The function implements a Real IFFT  
+ */ 
+ 
+void arm_split_rifft_q15( 
+  q15_t * pSrc, 
+  uint32_t fftLen, 
+  q15_t * pATable, 
+  q15_t * pBTable, 
+  q15_t * pDst, 
+  uint32_t modifier) 
+{ 
+  uint32_t i;                                    /* Loop Counter */ 
+  q31_t outR, outI;                              /* Temporary variables for output */ 
+  q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */ 
+  q15_t *pSrc1, *pSrc2; 
+  q15_t *pDst1 = &pDst[0]; 
+ 
+  pCoefA = &pATable[0]; 
+  pCoefB = &pBTable[0]; 
+ 
+  pSrc1 = &pSrc[0]; 
+  pSrc2 = &pSrc[2u * fftLen]; 
+ 
+  i = fftLen; 
+ 
+  while(i > 0u) 
+  { 
+ 
+    /*  
+       outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +  
+       pIn[2 * n - 2 * i] * pBTable[2 * i] -  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);  
+ 
+       outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -  
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);  
+ 
+     */ 
+ 
+    /* pIn[2 * n - 2 * i] * pBTable[2 * i] -  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */ 
+    outR = __SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB)); 
+ 
+    /* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +  
+       pIn[2 * n - 2 * i] * pBTable[2 * i] */ 
+    outR = __SMLAD(*__SIMD32(pSrc1), *__SIMD32(pCoefA), outR) >> 15u; 
+ 
+    /*  
+       -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] +  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */ 
+    outI = __SMUADX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB)); 
+ 
+    /* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */ 
+    outI = __SMLSDX(*__SIMD32(pCoefA), *__SIMD32(pSrc1)++, -outI); 
+ 
+    /* write output */ 
+    *__SIMD32(pDst1)++ = 
+      (q31_t) ((outI << 1u) & 0xFFFF0000) | (outR & 0x0000FFFF); 
+ 
+    /* update coefficient pointer */ 
+    pCoefB = pCoefB + (2u * modifier); 
+    pCoefA = pCoefA + (2u * modifier); 
+ 
+    i--; 
+ 
+  } 
+ 
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Cortex-M4-M3/TransformFunctions/arm_rfft_q31.c	Thu Mar 10 15:07:50 2011 +0000
@@ -0,0 +1,323 @@
+/* ----------------------------------------------------------------------  
+* Copyright (C) 2010 ARM Limited. All rights reserved.  
+*  
+* $Date:        29. November 2010  
+* $Revision: 	V1.0.3  
+*  
+* Project: 	    CMSIS DSP Library  
+* Title:	    arm_rfft_q31.c  
+*  
+* Description:	RFFT & RIFFT Q31 process function  
+*  
+*  
+* Target Processor: Cortex-M4/Cortex-M3
+*  
+* Version 1.0.3 2010/11/29 
+*    Re-organized the CMSIS folders and updated documentation.  
+*   
+* Version 1.0.2 2010/11/11  
+*    Documentation updated.   
+*  
+* Version 1.0.1 2010/10/05   
+*    Production release and review comments incorporated.  
+*  
+* Version 1.0.0 2010/09/20   
+*    Production release and review comments incorporated.  
+*  
+* Version 0.0.7  2010/06/10   
+*    Misra-C changes done  
+* -------------------------------------------------------------------- */ 
+ 
+#include "arm_math.h" 
+ 
+/*--------------------------------------------------------------------  
+*		Internal functions prototypes  
+--------------------------------------------------------------------*/ 
+ 
+void arm_split_rfft_q31( 
+  q31_t * pSrc, 
+  uint32_t fftLen, 
+  q31_t * pATable, 
+  q31_t * pBTable, 
+  q31_t * pDst, 
+  uint32_t modifier); 
+ 
+void arm_split_rifft_q31( 
+  q31_t * pSrc, 
+  uint32_t fftLen, 
+  q31_t * pATable, 
+  q31_t * pBTable, 
+  q31_t * pDst, 
+  uint32_t modifier); 
+ 
+/**  
+ * @addtogroup RFFT_RIFFT  
+ * @{  
+ */ 
+ 
+/**  
+ * @brief Processing function for the Q31 RFFT/RIFFT. 
+ * @param[in]  *S    points to an instance of the Q31 RFFT/RIFFT structure. 
+ * @param[in]  *pSrc points to the input buffer. 
+ * @param[out] *pDst points to the output buffer. 
+ * @return none. 
+ *  
+ * \par Input an output formats: 
+ * \par  
+ * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process. 
+ * Hence the output format is different for different RFFT sizes.  
+ * The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT: 
+ * \par  
+ * \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"  
+ *  
+ * \par  
+ * \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"  
+ */ 
+ 
+void arm_rfft_q31( 
+  const arm_rfft_instance_q31 * S, 
+  q31_t * pSrc, 
+  q31_t * pDst) 
+{ 
+  const arm_cfft_radix4_instance_q31 *S_CFFT = S->pCfft; 
+ 
+  /* Calculation of RIFFT of input */ 
+  if(S->ifftFlagR == 1u) 
+  { 
+    /*  Real IFFT core process */ 
+    arm_split_rifft_q31(pSrc, S->fftLenBy2, S->pTwiddleAReal, 
+                        S->pTwiddleBReal, pDst, S->twidCoefRModifier); 
+ 
+    /* Complex readix-4 IFFT process */ 
+    arm_radix4_butterfly_inverse_q31(pDst, S_CFFT->fftLen, 
+                                     S_CFFT->pTwiddle, 
+                                     S_CFFT->twidCoefModifier); 
+    /* Bit reversal process */ 
+    if(S->bitReverseFlagR == 1u) 
+    { 
+      arm_bitreversal_q31(pDst, S_CFFT->fftLen, 
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable); 
+    } 
+  } 
+  else 
+  { 
+    /* Calculation of RFFT of input */ 
+ 
+    /* Complex readix-4 FFT process */ 
+    arm_radix4_butterfly_q31(pSrc, S_CFFT->fftLen, 
+                             S_CFFT->pTwiddle, S_CFFT->twidCoefModifier); 
+ 
+    /* Bit reversal process */ 
+    if(S->bitReverseFlagR == 1u) 
+    { 
+      arm_bitreversal_q31(pSrc, S_CFFT->fftLen, 
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable); 
+    } 
+ 
+    /*  Real FFT core process */ 
+    arm_split_rfft_q31(pSrc, S->fftLenBy2, S->pTwiddleAReal, 
+                       S->pTwiddleBReal, pDst, S->twidCoefRModifier); 
+  } 
+ 
+} 
+ 
+ 
+  /**  
+   * @} end of RFFT_RIFFT group  
+   */ 
+ 
+/**  
+ * @brief  Core Real FFT process  
+ * @param[in]   *pSrc 				points to the input buffer.  
+ * @param[in]   fftLen  			length of FFT. 
+ * @param[in]   *pATable 			points to the twiddle Coef A buffer.  
+ * @param[in]   *pBTable 			points to the twiddle Coef B buffer.  
+ * @param[out]  *pDst 				points to the output buffer.  
+ * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
+ * @return none.  
+ */ 
+ 
+void arm_split_rfft_q31( 
+  q31_t * pSrc, 
+  uint32_t fftLen, 
+  q31_t * pATable, 
+  q31_t * pBTable, 
+  q31_t * pDst, 
+  uint32_t modifier) 
+{ 
+  uint32_t i;                                    /* Loop Counter */ 
+  q31_t outR, outI;                              /* Temporary variables for output */ 
+  q31_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */ 
+  q31_t CoefA1, CoefA2, CoefB1;                  /* Temporary variables for twiddle coefficients */ 
+  q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[(4u * fftLen) - 1u]; 
+  q31_t *pIn1 = &pSrc[2], *pIn2 = &pSrc[(2u * fftLen) - 1u]; 
+ 
+  pSrc[2u * fftLen] = pSrc[0]; 
+  pSrc[(2u * fftLen) + 1u] = pSrc[1]; 
+ 
+  /* Init coefficient pointers */ 
+  pCoefA = &pATable[modifier * 2u]; 
+  pCoefB = &pBTable[modifier * 2u]; 
+ 
+  i = fftLen - 1u; 
+ 
+  while(i > 0u) 
+  { 
+    /*  
+       outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]  
+       + pSrc[2 * n - 2 * i] * pBTable[2 * i] +  
+       pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);  
+     */ 
+ 
+    /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +  
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */ 
+ 
+    CoefA1 = *pCoefA++; 
+    CoefA2 = *pCoefA; 
+ 
+    /* outR = (pSrc[2 * i] * pATable[2 * i] */ 
+    outR = ((int32_t) (((q63_t) * pIn1 * CoefA1) >> 32)); 
+ 
+    /* outI = pIn[2 * i] * pATable[2 * i + 1] */ 
+    outI = ((int32_t) (((q63_t) * pIn1++ * CoefA2) >> 32)); 
+ 
+    /* - pSrc[2 * i + 1] * pATable[2 * i + 1] */ 
+    outR = 
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn1 * (-CoefA2))) >> 32); 
+ 
+    /* (pIn[2 * i + 1] * pATable[2 * i] */ 
+    outI = 
+      (q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn1++ * (CoefA1))) >> 32); 
+ 
+    /* pSrc[2 * n - 2 * i] * pBTable[2 * i]  */ 
+    outR = 
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (-CoefA2))) >> 32); 
+    CoefB1 = *pCoefB; 
+ 
+    /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */ 
+    outI = 
+      (q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn2-- * (-CoefB1))) >> 32); 
+ 
+    /* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */ 
+    outR = 
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (CoefB1))) >> 32); 
+ 
+    /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */ 
+    outI = 
+      (q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn2-- * (-CoefA2))) >> 32); 
+ 
+    /* write output */ 
+    *pOut1++ = (outR << 1u); 
+    *pOut1++ = (outI << 1u); 
+ 
+    /* write complex conjugate output */ 
+    *pOut2-- = -(outI << 1u); 
+    *pOut2-- = (outR << 1u); 
+ 
+    /* update coefficient pointer */ 
+    pCoefB = pCoefB + (modifier * 2u); 
+    pCoefA = pCoefA + ((modifier * 2u) - 1u); 
+ 
+    i--; 
+ 
+  } 
+ 
+  pDst[2u * fftLen] = pSrc[0] - pSrc[1]; 
+  pDst[(2u * fftLen) + 1u] = 0; 
+ 
+  pDst[0] = pSrc[0] + pSrc[1]; 
+  pDst[1] = 0; 
+ 
+} 
+ 
+ 
+/**  
+ * @brief  Core Real IFFT process  
+ * @param[in]   *pSrc 				points to the input buffer. 
+ * @param[in]   fftLen  			length of FFT.  
+ * @param[in]   *pATable 			points to the twiddle Coef A buffer. 
+ * @param[in]   *pBTable 			points to the twiddle Coef B buffer.  
+ * @param[out]  *pDst 				points to the output buffer. 
+ * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. 
+ * @return none.  
+ */ 
+ 
+void arm_split_rifft_q31( 
+  q31_t * pSrc, 
+  uint32_t fftLen, 
+  q31_t * pATable, 
+  q31_t * pBTable, 
+  q31_t * pDst, 
+  uint32_t modifier) 
+{ 
+  q31_t outR, outI;                              /* Temporary variables for output */ 
+  q31_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */ 
+  q31_t CoefA1, CoefA2, CoefB1;                  /* Temporary variables for twiddle coefficients */ 
+  q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[(2u * fftLen) + 1u]; 
+ 
+  pCoefA = &pATable[0]; 
+  pCoefB = &pBTable[0]; 
+ 
+  while(fftLen > 0u) 
+  { 
+    /*  
+       outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +  
+       pIn[2 * n - 2 * i] * pBTable[2 * i] -  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);  
+ 
+       outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -  
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);  
+ 
+     */ 
+    CoefA1 = *pCoefA++; 
+    CoefA2 = *pCoefA; 
+ 
+    /* outR = (pIn[2 * i] * pATable[2 * i] */ 
+    outR = ((int32_t) (((q63_t) * pIn1 * CoefA1) >> 32)); 
+ 
+    /* - pIn[2 * i] * pATable[2 * i + 1] */ 
+    outI = -((int32_t) (((q63_t) * pIn1++ * CoefA2) >> 32)); 
+ 
+    /* pIn[2 * i + 1] * pATable[2 * i + 1] */ 
+    outR = 
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn1 * (CoefA2))) >> 32); 
+ 
+    /* pIn[2 * i + 1] * pATable[2 * i] */ 
+    outI = 
+      (q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn1++ * (CoefA1))) >> 32); 
+ 
+    /* pIn[2 * n - 2 * i] * pBTable[2 * i] */ 
+    outR = 
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (CoefA2))) >> 32); 
+ 
+    CoefB1 = *pCoefB; 
+ 
+    /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */ 
+    outI = 
+      (q31_t) ((((q63_t) outI << 32) - ((q63_t) * pIn2-- * (CoefB1))) >> 32); 
+ 
+    /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */ 
+    outR = 
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (CoefB1))) >> 32); 
+ 
+    /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */ 
+    outI = 
+      (q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn2-- * (CoefA2))) >> 32); 
+ 
+    /* write output */ 
+    *pDst++ = (outR << 1u); 
+    *pDst++ = (outI << 1u); 
+ 
+    /* update coefficient pointer */ 
+    pCoefB = pCoefB + (modifier * 2u); 
+    pCoefA = pCoefA + ((modifier * 2u) - 1u); 
+ 
+    /* Decrement loop count */ 
+    fftLen--; 
+ 
+  } 
+ 
+ 
+}

Repository toolbox

Export to desktop IDE

Repository details

Type:	Library
Created:	10 Mar 2011
Imports:	907
Forks:	1
Commits:	3
Dependents:	5
Dependencies:	0
Followers:	35

Revision 0:1014af42efd9, committed 2011-03-10

Changed in this revision

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning