I've found an issue in the inline assembler.
When compiling the following code:
typedef unsigned char UINT8;
typedef unsigned int UINT16;
typedef unsigned long UINT32;
typedef struct UINT64_T {
UINT32 low32;
UINT32 high32;
} UINT64;
/* compile with -signed-char */
typedef signed char SINT8;
typedef signed int SINT16;
typedef signed long SINT32;
// typedef signed long long SINT64;
typedef struct SINT64_T {
UINT32 low32;
SINT32 high32;
} SINT64;
typedef UINT64 RRTCTime; // 8Khz timer tick counter
typedef struct ITEMS_TIME_STRUCT
{
UINT64 mSecEpoch; // milliseconds since 1/1/1970
UINT8 flags;
BYTE dummy_alignment_byte1;
BYTE dummy_alignment_byte2;
BYTE dummy_alignment_byte3;
} iTEMSTime;
inline void u64_SHR3(register UINT64 *RReg )
{
register UINT32 AA;
register UINT32 BB;
__asm{
move.l X:(RReg)+,AA
move.l X:(RReg),BB
move.w #0,BB.2 // clear extension, so "asr BB" will be like "lsr BB10"
asr BB
ror.l AA
asr BB
ror.l AA
asr BB
ror.l AA
move.l BB,X:(RReg)-
move.l AA,X:(RReg)
}
}
inline void u64_add_S16(register UINT64 *R2Reg, register SINT16 D16)
{
// N.B. by declaring "register UINT32 XReg;"
// into assembly istructions you can use
// Xreg --> 32bit register (either dst A,B,C,D or src A10,B10,C10,D10)
// Xreg.0 --> lower 16bit word (either A0,B0,C0,D0)
// Xreg.1 --> upper 16bit word (either A1,B1,C1,D1)
// *R2reg = *R3Reg + BB
__asm{
adda #2,SP
move.l Y,X:(SP)+
move.l A10,X:(SP)
move.w D16,Y0
move.w #0,Y1 // this is necessary to tell the compiler that register Y is in use
move.l X:(R2Reg),A
tst.w Y0
blt do_sub_s16
add Y,A
move.l A10,X:(R2Reg)+
move.l #0,Y
move.l X:(R2Reg),A
adc Y,A
bra end_add_s16
do_sub_s16:
neg Y0
sub Y,A
move.l A10,X:(R2Reg)+
move.l #0,Y
move.l X:(R2Reg),A
sbc Y,A
end_add_s16:
move.l A10,X:(R2Reg)-
move.l X:(SP)-,A
move.l X:(SP)-,Y
};
}
#define tick2mSec(tmp) {u64_SHR3(tmp);} // myRTC/8 --> time in mSec
void iTEMSTimeFromRRTC(iTEMSTime *p, int fixup)
{
register RRTCTime *pTime;
// fixup is time offset in 8KHz ticks
// to compensate for transmission/reception delays
pTime = &(p->mSecEpoch);
u64_add_S16(pTime,fixup);
// convert from 8kHz ticks to milliseconds
tick2mSec(pTime);
p->flags = 0;
}
Compiler's inline assembler mismanages BB.2 references like this (example from the disassembly of the inline code generated by expanding u64_shr3() inside iTEMSTimeFromRRTC()
0x0000002C 0xF120 move.l X:(R0)+,B
0x0000002D 0xF034 move.l X:(R0),A
0x0000002E 0xE080 move.w #0,A
0x0000002F 0x706B asr A
0x00000030 0x76C7 ror.l B
0x00000031 0x706B asr A
0x00000032 0x76C7 ror.l B
0x00000033 0x706B asr A
0x00000034 0x76C7 ror.l B
0x00000035 0xD030 move.l A10,X:(R0)-
0x00000036 0xD134 move.l B10,X:(R0)
That is the sequence of assembly instructions:
register UINT32 AA;
register UINT32 BB;
__asm{
move.l X:(RReg)+,AA
move.l X:(RReg),BB
move.w #0,BB.2 // clear extension, so "asr BB" will be like "lsr BB10"
...
}
Gets assembled as:
0x0000002C 0xF120 move.l X:(R0)+,B
0x0000002D 0xF034 move.l X:(R0),A
0x0000002E 0xE080 move.w #0,A
register variable AA gets mapped into B accumulator, BB gets mapped into A accumulator
(so far, so good)
but the inline assembly instruction
move.w #0,BB.2
instead of being assembled as
move.w #0,A2
gets "wrongtimized" (optimized in a wrong way) as:
move.w #0,A
Clearing A0, A1 and A2 instead of only clearing A2.
I fixed the issue on my own by changing the inline source assembly code, but such bugs at assembler level should not exist in a mature toolchain like this.
Solved! Go to Solution.
Hi,
Thank you for pointing out the bug.
I will report the bug to corresponding team.
BR
XiangJun Rong
Hi,
Thank you for pointing out the bug.
I will report the bug to corresponding team.
BR
XiangJun Rong