Right!
The quick fix is to do the ALIGN(8) that I show at the top.
The better fix is to apply the BIC instruction to all assignments of SP in that startup.S code...
@ set stacks for all other modes
msr CPSR_c, #MODE_FIQ | I_BIT | F_BIT
bic r0, r0, #15
mov sp, r0
sub r0, r0, r1
msr CPSR_c, #MODE_IRQ | I_BIT | F_BIT
bic r0, r0, #15
mov sp, r0
sub r0, r0, r1
msr CPSR_c, #MODE_ABT | I_BIT | F_BIT
bic r0, r0, #15
mov sp, r0
sub r0, r0, r1
msr CPSR_c, #MODE_UND | I_BIT | F_BIT
bic r0, r0, #15
mov sp, r0
sub r0, r0, r1
msr CPSR_c, #MODE_SYS | I_BIT | F_BIT
bic r0, r0, #15
mov sp, r0
sub r0, r0, r1
@ Set SVC mode stack with interrupts disabled
msr CPSR_c, #MODE_SVC | I_BIT | F_BIT
bic r0, r0, #15
mov sp, r0
sub r0, r0, r1