dsp - CMSIS DSP Library from CMSIS 2.0. See http://www.…

Users » simon » Code » dsp

CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents: K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_q31.c@0:1014af42efd9, 2011-03-10 (annotated)

Committer:: simon
Date:: Thu Mar 10 15:07:50 2011 +0000
Revision:: 0:1014af42efd9

Who changed what in which revision?

User	Revision	Line number	New contents of line
simon	0:1014af42efd9	1	/* ----------------------------------------------------------------------
simon	0:1014af42efd9	2	* Copyright (C) 2010 ARM Limited. All rights reserved.
simon	0:1014af42efd9	3	*
simon	0:1014af42efd9	4	* $Date: 29. November 2010
simon	0:1014af42efd9	5	* $Revision: V1.0.3
simon	0:1014af42efd9	6	*
simon	0:1014af42efd9	7	* Project: CMSIS DSP Library
simon	0:1014af42efd9	8	* Title: arm_mat_mult_q31.c
simon	0:1014af42efd9	9	*
simon	0:1014af42efd9	10	* Description: Q31 matrix multiplication.
simon	0:1014af42efd9	11	*
simon	0:1014af42efd9	12	* Target Processor: Cortex-M4/Cortex-M3
simon	0:1014af42efd9	13	*
simon	0:1014af42efd9	14	* Version 1.0.3 2010/11/29
simon	0:1014af42efd9	15	* Re-organized the CMSIS folders and updated documentation.
simon	0:1014af42efd9	16	*
simon	0:1014af42efd9	17	* Version 1.0.2 2010/11/11
simon	0:1014af42efd9	18	* Documentation updated.
simon	0:1014af42efd9	19	*
simon	0:1014af42efd9	20	* Version 1.0.1 2010/10/05
simon	0:1014af42efd9	21	* Production release and review comments incorporated.
simon	0:1014af42efd9	22	*
simon	0:1014af42efd9	23	* Version 1.0.0 2010/09/20
simon	0:1014af42efd9	24	* Production release and review comments incorporated.
simon	0:1014af42efd9	25	*
simon	0:1014af42efd9	26	* Version 0.0.5 2010/04/26
simon	0:1014af42efd9	27	* incorporated review comments and updated with latest CMSIS layer
simon	0:1014af42efd9	28	*
simon	0:1014af42efd9	29	* Version 0.0.3 2010/03/10
simon	0:1014af42efd9	30	* Initial version
simon	0:1014af42efd9	31	* -------------------------------------------------------------------- */
simon	0:1014af42efd9	32
simon	0:1014af42efd9	33	#include "arm_math.h"
simon	0:1014af42efd9	34
simon	0:1014af42efd9	35	/**
simon	0:1014af42efd9	36	* @ingroup groupMatrix
simon	0:1014af42efd9	37	*/
simon	0:1014af42efd9	38
simon	0:1014af42efd9	39	/**
simon	0:1014af42efd9	40	* @addtogroup MatrixMult
simon	0:1014af42efd9	41	* @{
simon	0:1014af42efd9	42	*/
simon	0:1014af42efd9	43
simon	0:1014af42efd9	44	/**
simon	0:1014af42efd9	45	* @brief Q31 matrix multiplication
simon	0:1014af42efd9	46	* @param[in] *pSrcA points to the first input matrix structure
simon	0:1014af42efd9	47	* @param[in] *pSrcB points to the second input matrix structure
simon	0:1014af42efd9	48	* @param[out] *pDst points to output matrix structure
simon	0:1014af42efd9	49	* @return The function returns either
simon	0:1014af42efd9	50	* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
simon	0:1014af42efd9	51	*
simon	0:1014af42efd9	52	* @details
simon	0:1014af42efd9	53	* <b>Scaling and Overflow Behavior:</b>
simon	0:1014af42efd9	54	*
simon	0:1014af42efd9	55	* \par
simon	0:1014af42efd9	56	* The function is implemented using an internal 64-bit accumulator.
simon	0:1014af42efd9	57	* The accumulator has a 2.62 format and maintains full precision of the intermediate
simon	0:1014af42efd9	58	* multiplication results but provides only a single guard bit. There is no saturation
simon	0:1014af42efd9	59	* on intermediate additions. Thus, if the accumulator overflows it wraps around and
simon	0:1014af42efd9	60	* distorts the result. The input signals should be scaled down to avoid intermediate
simon	0:1014af42efd9	61	* overflows. The input is thus scaled down by log2(numColsA) bits
simon	0:1014af42efd9	62	* to avoid overflows, as a total of numColsA additions are performed internally.
simon	0:1014af42efd9	63	* The 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result.
simon	0:1014af42efd9	64	*
simon	0:1014af42efd9	65	* \par
simon	0:1014af42efd9	66	* See <code>arm_mat_mult_fast_q31()</code> for a faster but less precise implementation of this function.
simon	0:1014af42efd9	67	*
simon	0:1014af42efd9	68	*/
simon	0:1014af42efd9	69
simon	0:1014af42efd9	70	arm_status arm_mat_mult_q31(
simon	0:1014af42efd9	71	const arm_matrix_instance_q31 * pSrcA,
simon	0:1014af42efd9	72	const arm_matrix_instance_q31 * pSrcB,
simon	0:1014af42efd9	73	arm_matrix_instance_q31 * pDst)
simon	0:1014af42efd9	74	{
simon	0:1014af42efd9	75	q31_t pIn1 = pSrcA->pData; / input data matrix pointer A */
simon	0:1014af42efd9	76	q31_t pIn2 = pSrcB->pData; / input data matrix pointer B */
simon	0:1014af42efd9	77	q31_t pInA = pSrcA->pData; / input data matrix pointer A */
simon	0:1014af42efd9	78	// q31_t pSrcB = pSrcB->pData; / input data matrix pointer B */
simon	0:1014af42efd9	79	q31_t pOut = pDst->pData; / output data matrix pointer */
simon	0:1014af42efd9	80	q31_t px; / Temporary output data matrix pointer */
simon	0:1014af42efd9	81	q63_t sum; /* Accumulator */
simon	0:1014af42efd9	82	uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */
simon	0:1014af42efd9	83	uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */
simon	0:1014af42efd9	84	uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */
simon	0:1014af42efd9	85	uint16_t col, i = 0u, j, row = numRowsA, colCnt; /* loop counters */
simon	0:1014af42efd9	86	arm_status status; /* status of matrix multiplication */
simon	0:1014af42efd9	87
simon	0:1014af42efd9	88
simon	0:1014af42efd9	89	#ifdef ARM_MATH_MATRIX_CHECK
simon	0:1014af42efd9	90	/* Check for matrix mismatch condition */
simon	0:1014af42efd9	91	if((pSrcA->numCols != pSrcB->numRows) \|\|
simon	0:1014af42efd9	92	(pSrcA->numRows != pDst->numRows) \|\| (pSrcB->numCols != pDst->numCols))
simon	0:1014af42efd9	93	{
simon	0:1014af42efd9	94	/* Set status as ARM_MATH_SIZE_MISMATCH */
simon	0:1014af42efd9	95	status = ARM_MATH_SIZE_MISMATCH;
simon	0:1014af42efd9	96	}
simon	0:1014af42efd9	97	else
simon	0:1014af42efd9	98	#endif
simon	0:1014af42efd9	99	{
simon	0:1014af42efd9	100	/* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */
simon	0:1014af42efd9	101	/* row loop */
simon	0:1014af42efd9	102	do
simon	0:1014af42efd9	103	{
simon	0:1014af42efd9	104	/* Output pointer is set to starting address of the row being processed */
simon	0:1014af42efd9	105	px = pOut + i;
simon	0:1014af42efd9	106
simon	0:1014af42efd9	107	/* For every row wise process, the column loop counter is to be initiated */
simon	0:1014af42efd9	108	col = numColsB;
simon	0:1014af42efd9	109
simon	0:1014af42efd9	110	/* For every row wise process, the pIn2 pointer is set
simon	0:1014af42efd9	111	** to the starting address of the pSrcB data */
simon	0:1014af42efd9	112	pIn2 = pSrcB->pData;
simon	0:1014af42efd9	113
simon	0:1014af42efd9	114	j = 0u;
simon	0:1014af42efd9	115
simon	0:1014af42efd9	116	/* column loop */
simon	0:1014af42efd9	117	do
simon	0:1014af42efd9	118	{
simon	0:1014af42efd9	119	/* Set the variable sum, that acts as accumulator, to zero */
simon	0:1014af42efd9	120	sum = 0;
simon	0:1014af42efd9	121
simon	0:1014af42efd9	122	/* Initiate the pointer pIn1 to point to the starting address of pInA */
simon	0:1014af42efd9	123	pIn1 = pInA;
simon	0:1014af42efd9	124
simon	0:1014af42efd9	125	/* Apply loop unrolling and compute 4 MACs simultaneously. */
simon	0:1014af42efd9	126	colCnt = numColsA >> 2;
simon	0:1014af42efd9	127
simon	0:1014af42efd9	128
simon	0:1014af42efd9	129	/* matrix multiplication */
simon	0:1014af42efd9	130	while(colCnt > 0u)
simon	0:1014af42efd9	131	{
simon	0:1014af42efd9	132	/* c(m,n) = a(1,1)b(1,1) + a(1,2) b(2,1) + .... + a(m,p)b(p,n) /
simon	0:1014af42efd9	133	/* Perform the multiply-accumulates */
simon	0:1014af42efd9	134	sum += (q63_t) * pIn1++ * *pIn2;
simon	0:1014af42efd9	135	pIn2 += numColsB;
simon	0:1014af42efd9	136
simon	0:1014af42efd9	137	sum += (q63_t) * pIn1++ * *pIn2;
simon	0:1014af42efd9	138	pIn2 += numColsB;
simon	0:1014af42efd9	139
simon	0:1014af42efd9	140	sum += (q63_t) * pIn1++ * *pIn2;
simon	0:1014af42efd9	141	pIn2 += numColsB;
simon	0:1014af42efd9	142
simon	0:1014af42efd9	143	sum += (q63_t) * pIn1++ * *pIn2;
simon	0:1014af42efd9	144	pIn2 += numColsB;
simon	0:1014af42efd9	145
simon	0:1014af42efd9	146	/* Decrement the loop counter */
simon	0:1014af42efd9	147	colCnt--;
simon	0:1014af42efd9	148	}
simon	0:1014af42efd9	149
simon	0:1014af42efd9	150	/* If the columns of pSrcA is not a multiple of 4, compute any remaining output samples here.
simon	0:1014af42efd9	151	** No loop unrolling is used. */
simon	0:1014af42efd9	152	colCnt = numColsA % 0x4u;
simon	0:1014af42efd9	153
simon	0:1014af42efd9	154	while(colCnt > 0u)
simon	0:1014af42efd9	155	{
simon	0:1014af42efd9	156	/* c(m,n) = a(1,1)b(1,1) + a(1,2) b(2,1) + .... + a(m,p)b(p,n) /
simon	0:1014af42efd9	157	/* Perform the multiply-accumulates */
simon	0:1014af42efd9	158	sum += (q63_t) * pIn1++ * *pIn2;
simon	0:1014af42efd9	159	pIn2 += numColsB;
simon	0:1014af42efd9	160
simon	0:1014af42efd9	161	/* Decrement the loop counter */
simon	0:1014af42efd9	162	colCnt--;
simon	0:1014af42efd9	163	}
simon	0:1014af42efd9	164
simon	0:1014af42efd9	165	/* Convert the result from 2.30 to 1.31 format and store in destination buffer */
simon	0:1014af42efd9	166	*px++ = (q31_t) (sum >> 31);
simon	0:1014af42efd9	167
simon	0:1014af42efd9	168	/* Update the pointer pIn2 to point to the starting address of the next column */
simon	0:1014af42efd9	169	j++;
simon	0:1014af42efd9	170	pIn2 = (pSrcB->pData) + j;
simon	0:1014af42efd9	171
simon	0:1014af42efd9	172	/* Decrement the column loop counter */
simon	0:1014af42efd9	173	col--;
simon	0:1014af42efd9	174
simon	0:1014af42efd9	175	} while(col > 0u);
simon	0:1014af42efd9	176
simon	0:1014af42efd9	177	/* Update the pointer pInA to point to the starting address of the next row */
simon	0:1014af42efd9	178	i = i + numColsB;
simon	0:1014af42efd9	179	pInA = pInA + numColsA;
simon	0:1014af42efd9	180
simon	0:1014af42efd9	181	/* Decrement the row loop counter */
simon	0:1014af42efd9	182	row--;
simon	0:1014af42efd9	183
simon	0:1014af42efd9	184	} while(row > 0u);
simon	0:1014af42efd9	185
simon	0:1014af42efd9	186	/* set status as ARM_MATH_SUCCESS */
simon	0:1014af42efd9	187	status = ARM_MATH_SUCCESS;
simon	0:1014af42efd9	188	}
simon	0:1014af42efd9	189	/* Return to application */
simon	0:1014af42efd9	190	return (status);
simon	0:1014af42efd9	191	}
simon	0:1014af42efd9	192
simon	0:1014af42efd9	193	/**
simon	0:1014af42efd9	194	* @} end of MatrixMult group
simon	0:1014af42efd9	195	*/

Repository toolbox

Export to desktop IDE

Repository details

Type:	Library
Created:	10 Mar 2011
Imports:	907
Forks:	1
Commits:	3
Dependents:	5
Dependencies:	0
Followers:	35

src/Cortex-M4-M3/MatrixFunctions/arm_mat_mult_q31.c@0:1014af42efd9, 2011-03-10 (annotated)

Who changed what in which revision?

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning