I seem to be facing the exact same issue on a Teensy Micromod using an RT1062 - this is an Arduino friendly rt1062 by PJRC/SparkFun.
Although I am not using the official NXP demo code, I have based the configuration of the FlexIO module, shifter and timer in an identical manner to the HX8357 NXP example for the KL28/RT1050 (AN5313/AN12822), and I am also getting an early pulse on the WR as soon as the shifter is loaded on a single beat transmit

My code looks like so:
Shifter/timer setup:
void singleBeatWrite(uint8_t const * buffer, uint32_t const length){
p->CTRL &= ~FLEXIO_CTRL_FLEXEN;
//p->CTRL |= FLEXIO_CTRL_SWRST; //Software reset FlexIO registers enable
//p->CTRL &= ~FLEXIO_CTRL_SWRST; //Software reset FlexIO registers disable
/* Configure the shifters */
p->SHIFTCFG[0] =
FLEXIO_SHIFTCFG_INSRC /* Shifter input */
| FLEXIO_SHIFTCFG_SSTOP(0U) /* Shifter stop bit disabled */
| FLEXIO_SHIFTCFG_SSTART(0U) /* Shifter start bit disabled and loading data on enabled */
| FLEXIO_SHIFTCFG_PWIDTH(8U-1U); /* Bus width */
p->SHIFTCTL[0] =
FLEXIO_SHIFTCTL_TIMSEL(0U) /* Shifter's assigned timer index */
| (0<<23) //FLEXIO_SHIFTCTL_TIMPOL(0U) /* Shift on posedge of shift clock */
| FLEXIO_SHIFTCTL_PINCFG(3U) /* Shifter's pin configured as output */
| FLEXIO_SHIFTCTL_PINSEL(4U) /* Shifter's pin start index */
| (0<<7) //((uint32_t)(1<<7)) /* Shifter's pin active high */
| FLEXIO_SHIFTCTL_SMOD(2U); /* Shifter mode as transmit */
/* Configure the timer for shift clock */
p->TIMCMP[0] =
((1U * 2U - 1) << /* TIMCMP[15:8] = number of beats x 2 – 1 */
| (40U/2U - 1U); //(4U/2U - 1U) /* TIMCMP[7:0] = baud rate divider / 2 – 1 */
p->TIMCFG[0] =
FLEXIO_TIMCFG_TIMOUT(0U) /* Timer output logic one when enabled and not affected by reset */
| FLEXIO_TIMCFG_TIMDEC(0U) /* Timer decrement on FlexIO clock, shift clock equals timer output */
| FLEXIO_TIMCFG_TIMRST(0U) /* Timer never reset */
| FLEXIO_TIMCFG_TIMDIS(2U) /* Timer disabled on timer compare */
| FLEXIO_TIMCFG_TIMENA(2U) /* Timer enabled on trigger high */
| FLEXIO_TIMCFG_TSTOP(0U) /* Timer stop bit disabled */
| (0<<1); //((uint32_t)(0<<1)) //FLEXIO_TIMCFG_TSTART(0U); /* Timer start bit disabled */
p->TIMCTL[0] =
FLEXIO_TIMCTL_TRGSEL((((0U) << 2) | 1U)) /* Timer trigger selected as shifter's status flag */
| (1<<23) //FLEXIO_TIMCTL_TRGPOL(1U) /* Timer trigger polarity as active low */
| (1<<22)//FLEXIO_TIMCTL_TRGSRC(1U) /* Timer trigger source as internal */
| FLEXIO_TIMCTL_PINCFG(3U) /* Timer' pin configured as output */
| FLEXIO_TIMCTL_PINSEL(0) /* Timer' pin index: WR pin */
| (1<<7) //FLEXIO_TIMCTL_PINPOL(1U) /* Timer' pin active low */
| FLEXIO_TIMCTL_TIMOD(1U); /* Timer mode as dual 8-bit counters baud/bit */
/* Enable FlexIO */
p->CTRL |= FLEXIO_CTRL_FLEXEN;
if(length)
{
/* Use polling method for data transfer */
for(uint32_t i=0; i<length-1U; i++)
{
while(0 == (p->SHIFTSTAT & (1U << 0U)))
{
}
p->SHIFTBUF[0U] = *buffer++;
}
/* Write the last byte */
while(0 == (p->SHIFTSTAT & (1U << 0U)))
{
}
p->TIMSTAT |= (1U << 0U);
p->SHIFTBUF[0U] = *buffer++;
/*Wait for transfer to be completed */
while(0 == (p->TIMSTAT & (1U << 0U)));
{
}
}
}
I then call this function from my main loop ever 500ms by calling singleBeatWrite and passing the data
uint8_t bufferData[4] = {0x17, 0x07, 0x19, 0x90};
...
singleBeatWrite(bufferData,4);