Hi,
I am looking for some suggestions on how to use the MAC or EMAC.
I am using CodeWarrior 7.1 and the MCF52235.
I am not doing any special DSP code or algorithm, just need to do a 16x16 multiple and accumulate as quickly as possible.
The 16 x 16 multiply and accumulate must be done 8 times spread through a function and other code.
I am working in C, so for example I have:
register int Input1; // defined as int, but value limited to 12 bits so multiply and accumulate won't overflow 32-bits.
register int Sum;
....
Input1 = read A2D
Sum += Input1 x Input1;
... other code
Input 1 = read A2D
Sum += Input1 x Input1
... other code, etc
... after summing 8 times save the Sum
Defining Input1 and Sum as registers made the assembly faster.
Is it possible to use the MAC or EMAC staying in C? Are there compiler extensions for these registers?
If I use inline assembly I don't see how to know what register the compiler chose for these values.
This is my first experience with CodeFire and hardware multipliers.
Thanks for the help.
Hi Ron,
I'm a bit rusty on this, but here is some code that should do what you want with some very minor modification. I used it in a DSP application some years ago to do a multiply and accumulate on a MCF5213 which only has the MAC, not the eMAC. I've not used the eMAC, but I think that the main difference is that it has has four accumulators rather than just the one on the MAC. You might want to save the extra accumulators on the stack and you may need to rename "ACC" to refer to the accumulator that you want to use. This code definitely worked using the CW7.1 compiler as we never migrated to CW 7.2 for the MCF5213. This code is significantly faster than using straight C. I hope this helps you out, Shaun.
/************************************************************************/
// Hardware MAC (Multiply Accumulate) - v0.00
// Takes 2 arrays of signed shorts, performs a multiply accumulate and returns
// a signed long result.
// 1) Speed increase of 3x over conventional code
// 2) Length max = 8192
// 3) Auto supports all 3 parameter passing settings (Std,Register,Compact)
// 4) Function is re-entrant, but exercise caution when doing so
// 5) Function has been tested under the conditions :
// - Project optimization at min & max.
// - Calling conventions - Std/Reg/Compact
// [160us for length of 1000, at 73.7328MHz sysclk]
/************************************************************************/
// All Optimization OFF for the MAC routine, to avoid unexpected conflicts
#pragma push //
#pragma auto_inline off //
#pragma optimization_level 0 //
#pragma optimize_for_size off //
signed long Micro_MulAcc(signed short *in1,signed short *in2, unsigned short length)
{
// By default, these registers are d0-d2/a0-a1 push/popped, for an ISR.
// These are defined as the general scratch reg's (data/ptrs respectively)
signed long result=0;
signed long mac_save;
unsigned char macsr_save;
if((length==0)||(length > 8192)) // Failsafe (also checked in asm)
length = 1; //
asm
{
move.l ACC,d1 // Push MAC registers
move.l d1,(mac_save) //
move.l MACSR,d1 //
move.l d1,(macsr_save) //
// Retrieve parms from stack
// ----------------------------
#if (__STDABI__==1) // If all parms are 32bit wide
movea.l (in1),a0 // Get pointers to start of both data arrays
movea.l (in2),a1 //
move.w (length),d0 // Get length from stack as a short
#endif
#if (__REGABI__==1) // If all parms are passed in registers
#pragma unused(in1,in2,length)
// Pointers already in A0 and A1
// Length already in D0
#endif
#if(__STDABI__==0)&&(__REGABI__==0) // If Parms Compact mode
movea.l (in1),a0 // Get pointers to start of both data arrays
movea.l (in2),a1 //
move.w (length),d0 // Get length from upper short of next stack item
#endif
andi.l #0x00001FFF,d0 // Ensure length is not more than 8192, and that upper short
// of d0 is clear
// Setup MAC
// ----------------------------
move.l #0x00000080,MACSR // load MACSR for Sat|signed|Integer
move.l #0,ACC // Clear MAC accumulator
// Loop for length
// ----------------------------
loop:
move.w (a0)+,d1 // Retrieve a word from both arrays
move.w (a1)+,d2 //
mac.w d1.l,d2.l // MAC (.l)LSbyte of both registers
subq.l #1,d0 // Decrement counter
bne.s loop // Branch if not zero
//-------------------------------
move.l ACC,d0 // Transfer mac accumulator to temp location
move.l d0,result // Place on stack for return value
move.l (mac_save),d1 //
move.l d1,ACC // Pop MAC registers
move.l (macsr_save),d1//
move.l d1,MACSR //
}
}
#pragma pop