DSC inline assembler mismanages access to upper 4bits of 36bit accumulator registers

cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 

DSC inline assembler mismanages access to upper 4bits of 36bit accumulator registers

Jump to solution
1,215 Views
Lorenzo_Mch_IT
Contributor IV

I've found an issue in the inline assembler.

When compiling the following code:

typedef unsigned char       UINT8;
typedef unsigned int        UINT16;
typedef unsigned long       UINT32;

typedef struct UINT64_T {
	UINT32 low32;
	UINT32 high32;
}  UINT64;

/* compile with -signed-char */
typedef signed char         SINT8;
typedef signed int          SINT16;
typedef signed long         SINT32;

// typedef signed long long    SINT64;
typedef struct SINT64_T {
	UINT32 low32;
	SINT32 high32;
}  SINT64;
 
typedef UINT64 RRTCTime; // 8Khz timer tick counter

typedef struct ITEMS_TIME_STRUCT
{
	UINT64 mSecEpoch; // milliseconds since 1/1/1970
	UINT8  flags;
	BYTE dummy_alignment_byte1;
	BYTE dummy_alignment_byte2;
	BYTE dummy_alignment_byte3;
} iTEMSTime;

inline void u64_SHR3(register UINT64 *RReg )
{
	register UINT32 AA;
	register UINT32 BB;
	__asm{
	move.l X:(RReg)+,AA
	move.l X:(RReg),BB
	move.w #0,BB.2 // clear extension, so "asr BB" will be like "lsr BB10"
	asr BB
	ror.l AA
	asr BB
	ror.l AA
	asr BB
	ror.l AA
	move.l BB,X:(RReg)-
	move.l AA,X:(RReg)
	}
}
inline void u64_add_S16(register UINT64 *R2Reg, register SINT16 D16)
{
	// N.B. by declaring "register UINT32 XReg;"
	// into assembly istructions you can use
	// Xreg   --> 32bit register   (either dst A,B,C,D or src A10,B10,C10,D10)
	// Xreg.0 --> lower 16bit word (either A0,B0,C0,D0)
	// Xreg.1 --> upper 16bit word (either A1,B1,C1,D1)

	// *R2reg = *R3Reg + BB
	__asm{
		adda #2,SP
		move.l Y,X:(SP)+
		move.l A10,X:(SP)
		move.w D16,Y0
		move.w #0,Y1 // this is necessary to tell the compiler that register Y is in use
	    move.l X:(R2Reg),A
		tst.w  Y0
		blt    do_sub_s16
		add    Y,A
		move.l A10,X:(R2Reg)+
		move.l #0,Y
		move.l X:(R2Reg),A
		adc    Y,A
		bra    end_add_s16
	do_sub_s16:
		neg    Y0
		sub    Y,A
		move.l A10,X:(R2Reg)+
		move.l #0,Y
		move.l X:(R2Reg),A
		sbc    Y,A
	end_add_s16:
		move.l A10,X:(R2Reg)-
		move.l X:(SP)-,A
		move.l X:(SP)-,Y
	};
}

#define tick2mSec(tmp) {u64_SHR3(tmp);} // myRTC/8 --> time in mSec

void iTEMSTimeFromRRTC(iTEMSTime *p, int fixup)
{
	register RRTCTime *pTime;
	// fixup is time offset in 8KHz ticks
	// to compensate for transmission/reception delays
	pTime = &(p->mSecEpoch);

	u64_add_S16(pTime,fixup);

        // convert from 8kHz ticks to milliseconds
	tick2mSec(pTime);

	p->flags = 0;
}

 

Compiler's inline assembler mismanages BB.2 references like this (example from the disassembly of the inline code generated by expanding u64_shr3() inside iTEMSTimeFromRRTC()

0x0000002C  0xF120                 move.l      X:(R0)+,B
0x0000002D  0xF034                 move.l      X:(R0),A
0x0000002E  0xE080                 move.w      #0,A
0x0000002F  0x706B                 asr         A
0x00000030  0x76C7                 ror.l       B
0x00000031  0x706B                 asr         A
0x00000032  0x76C7                 ror.l       B
0x00000033  0x706B                 asr         A
0x00000034  0x76C7                 ror.l       B
0x00000035  0xD030                 move.l      A10,X:(R0)-
0x00000036  0xD134                 move.l      B10,X:(R0)

 

That is the sequence of assembly instructions:

	register UINT32 AA;
	register UINT32 BB;
	__asm{
	      move.l X:(RReg)+,AA
	      move.l X:(RReg),BB
	      move.w #0,BB.2 // clear extension, so "asr BB" will be like "lsr BB10"
              ...
        }

Gets assembled as:

0x0000002C  0xF120                 move.l      X:(R0)+,B
0x0000002D  0xF034                 move.l      X:(R0),A
0x0000002E  0xE080                 move.w      #0,A

 register variable AA gets mapped into B accumulator, BB gets mapped into A accumulator
(so far, so good)
but the inline assembly instruction

move.w #0,BB.2

instead of being assembled as

move.w #0,A2

gets "wrongtimized" (optimized in a wrong way) as:

move.w #0,A

Clearing A0, A1 and A2 instead of only clearing A2.

I fixed the issue on my own by changing the inline source assembly code, but such bugs at assembler level should not exist in a mature toolchain like this.

0 Kudos
1 Solution
1,205 Views
xiangjun_rong
NXP TechSupport
NXP TechSupport

Hi,

Thank you for pointing out the bug.

I will report the bug to corresponding team.

BR

XiangJun Rong

View solution in original post

0 Kudos
1 Reply
1,206 Views
xiangjun_rong
NXP TechSupport
NXP TechSupport

Hi,

Thank you for pointing out the bug.

I will report the bug to corresponding team.

BR

XiangJun Rong

0 Kudos