I'm working on trying to use SPI+DMA direct from registers. I have the code working through NXP's API for the LPC55S69, which works. But it takes 24.7us to excute the SPI_MasterTransferDMA() function. This means that I end up missing interrupts (which are coming in every 62.5 us) and there is little time for any other processing with this overhead. To put this in context the part is running at 150MHz, so we are spending 3507 cycles executing an API function.
So I have extracted what I thought is the majority of the relevant code to directly talk to the registers, but I seem to be missing something.
FLEXCOMM4_DMA_Handle.state = (uint8_t)1; // kSPI_Busy
FLEXCOMM4_DMA_Handle.transferSize = num_bytes;
// Clear FIFOs.
FLEXCOMM4_PERIPHERAL->FIFOCFG |= SPI_FIFOCFG_EMPTYTX_MASK | SPI_FIFOCFG_EMPTYRX_MASK;
FLEXCOMM4_PERIPHERAL->FIFOSTAT |= SPI_FIFOSTAT_TXERR_MASK | SPI_FIFOSTAT_RXERR_MASK;
// Enable rx&tx dma
FLEXCOMM4_PERIPHERAL->FIFOCFG |= SPI_FIFOCFG_DMARX_MASK | SPI_FIFOCFG_DMATX_MASK;
// base address is the same for TX&RX, so only need to set it once.
FLEXCOMM4_TX_DMA_BASEADDR->SRAMBASE = (uint32_t) &desc_table[0];
// receive
// enable peripheral request
FLEXCOMM4_RX_DMA_BASEADDR->CHANNEL[FLEXCOMM4_RX_DMA_CHANNEL].CFG |= DMA_CHANNEL_CFG_PERIPHREQEN_MASK;
const uint32_t rxfercfg = DMA_CHANNEL_XFERCFG_CFGVALID(1)
| DMA_CHANNEL_XFERCFG_RELOAD(0)
| DMA_CHANNEL_XFERCFG_SWTRIG(0)
| DMA_CHANNEL_XFERCFG_CLRTRIG(1)
| DMA_CHANNEL_XFERCFG_SETINTA(1)
| DMA_CHANNEL_XFERCFG_SETINTB(0)
| DMA_CHANNEL_XFERCFG_WIDTH(0)
| DMA_CHANNEL_XFERCFG_SRCINC(0)
| DMA_CHANNEL_XFERCFG_DSTINC(1)
| DMA_CHANNEL_XFERCFG_XFERCOUNT(num_bytes - 1)
;
desc_table[FLEXCOMM4_RX_DMA_CHANNEL].xfercfg = rxfercfg;
desc_table[FLEXCOMM4_RX_DMA_CHANNEL].srcEndAddr = DMA_DESCRIPTOR_END_ADDRESS((uint32_t *)&(FLEXCOMM4_PERIPHERAL->FIFORD), 0, 0, 0);
desc_table[FLEXCOMM4_RX_DMA_CHANNEL].dstEndAddr = DMA_DESCRIPTOR_END_ADDRESS((uint32_t *)g_afe_data.recv_buffer, 1, num_bytes, 1);
desc_table[FLEXCOMM4_RX_DMA_CHANNEL].linkToNextDesc = NULL;
FLEXCOMM4_RX_DMA_BASEADDR->CHANNEL[FLEXCOMM4_RX_DMA_CHANNEL].XFERCFG = rxfercfg;
// transmit
// enable peripheral request
FLEXCOMM4_TX_DMA_BASEADDR->CHANNEL[FLEXCOMM4_TX_DMA_CHANNEL].CFG |= DMA_CHANNEL_CFG_PERIPHREQEN_MASK;
const uint32_t txfercfg = DMA_CHANNEL_XFERCFG_CFGVALID(1)
| DMA_CHANNEL_XFERCFG_RELOAD(0)
| DMA_CHANNEL_XFERCFG_SWTRIG(0)
| DMA_CHANNEL_XFERCFG_CLRTRIG(1)
| DMA_CHANNEL_XFERCFG_SETINTA(1)
| DMA_CHANNEL_XFERCFG_SETINTB(0)
| DMA_CHANNEL_XFERCFG_WIDTH(0)
| DMA_CHANNEL_XFERCFG_SRCINC(1)
| DMA_CHANNEL_XFERCFG_DSTINC(0)
| DMA_CHANNEL_XFERCFG_XFERCOUNT(num_bytes - 1)
;
desc_table[FLEXCOMM4_TX_DMA_CHANNEL].xfercfg = txfercfg;
desc_table[FLEXCOMM4_TX_DMA_CHANNEL].srcEndAddr = DMA_DESCRIPTOR_END_ADDRESS((uint32_t *)g_afe_data.send_buffer, 1, num_bytes, 1);
desc_table[FLEXCOMM4_TX_DMA_CHANNEL].dstEndAddr = DMA_DESCRIPTOR_END_ADDRESS((uint32_t *)&(FLEXCOMM4_PERIPHERAL->FIFOWR), 0, 0, 0);
desc_table[FLEXCOMM4_TX_DMA_CHANNEL].linkToNextDesc = NULL;
FLEXCOMM4_TX_DMA_BASEADDR->CHANNEL[FLEXCOMM4_TX_DMA_CHANNEL].XFERCFG = txfercfg;
FLEXCOMM4_DMA_Handle.rxInProgress = true;
FLEXCOMM4_DMA_Handle.txInProgress = true;
uint32_t tmpData = 0U;
//tmpData |= ((txfercfg.configFlags & (uint32_t)kSPI_FrameDelay) != 0U) ? (uint32_t)kSPI_FrameDelay : 0U;
tmpData |= (uint32_t)kSPI_FrameAssert;
spi_config_t *spi_config_p = (spi_config_t *)SPI_GetConfig(FLEXCOMM4_PERIPHERAL);
tmpData |= ((uint32_t)SPI_DEASSERT_ALL & (~(uint32_t)SPI_DEASSERTNUM_SSEL((uint32_t)spi_config_p->sselNum)));
// set width of data - range asserted at entry
tmpData |= SPI_FIFOWR_LEN(kSPI_Data8Bits);
*((uint16_t *)((uint32_t)&FLEXCOMM4_PERIPHERAL->FIFOWR) + 1) = (uint16_t)(tmpData >> 16U);
Any help with why this isn't working would be appreciated. The desc_table variable is aligned to a 512 byte boundary as per the alignment requirement. The num_bytes is passed in as a parameter and calls to
DMA_StartTransfer(FLEXCOMM4_DMA_Handle.rxHandle);
DMA_StartTransfer(FLEXCOMM4_DMA_Handle.txHandle);
are done in a separate function.