DMA with SPI to read SD Card?

cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 

DMA with SPI to read SD Card?

19,331 Views
ids
Contributor II

I am struggling with getting DMA to read data from an SPI channel, in particular, for an SD card.

Do I need to link two channels to get the SPI write before the SPI read?

Any code or suggestions?

Labels (1)
Tags (3)
9 Replies

1,987 Views
egoodii
Senior Contributor III

I don't have any particular experience using SPI for an SD card, but I will make two particular comments:

All SPI transactions are simultaneously a 'read' and a 'write'.  The only difference is what you 'keep'.  I think you will find that the DMA to 'empty' the RX-FIFO will 'naturally' come after the requests to fill the TX-FIFO since you can't have an RX-request until a TX-send has completed.

Freescale DSPI 'DMA' is a 'little bit inconvenient'.  The loading of SPIx_PUSHR FIFO registers requires 32-bit writes, the top-half of which are SPI-controls.  Thus, if you want to DMA-out a 'block', you have to intersperse your data as the 'bottom byte or word' in these 32-bit words, meaning your data-block is 'non contiguous'.  This puts an 'extra step' in your data-block handling to interleave data & controls that MAY preclude any advantage you were hoping to gain from DMA, especially since I assume you would run SPI to SD at 'full hardware rate' (1/2 busclk), meaning each byte-out takes 16 bus-clocks, or probably only 32 CPU cycles.

I presently use SPI DMA to continually refresh a monochrome bitmap display.  It is write-only, so I ignore RX requests (and overruns therefrom).  And since the internal memory buffer is in a fixed location (specifically addressed at the top of RAM so I can use bit-banding access for individual pixels!), it is 'very little trouble' to work with using only the least-byte of the 32-bit words, with the rest pre-set for the proper SPI controls for each write.

Memory structure:

typedef union{                        //Byte/DWord duality, big-endian

  struct{

     uint8_t lo;

     uint8_t mlo;

     uint8_t mhi;

     uint8_t hi;

  } u8;

  struct {

     uint16_t lo;

     uint16_t hi;

  }u16;

  uint32_t u32;

}u32_8_t;

#define Y_PITCH 132

typedef struct {

  u32_8_t   OLED_CMDS[32];          //Precede the actual dispaly RAM with room for commands to prefix the data

                                    //  In a contiguous block-write operation

  u32_8_t   Display_RAM[Y_PITCH*8]; //Chip RAM is 132*8, only 128*8 is displayed

} OLED_RAM_Obj;

//OLED.Display_RAM[ ].u8.lo are the bytes for screen data

with this pre-set:

    uint16_t foo;

//Preset SPI-port-required upper bits of Command/Display RAM

    for(foo=32;foo>0;foo--)       //Commands assert two CS, one of which is D/!C

        OLED.OLED_CMDS[foo-1].u32 = SPI_PUSHR_PCS(3) | SPI_PUSHR_CTAS(0);

    for(foo=Y_PITCH*8;foo>0;foo--)    //Data asserts just CS0, leaving D/!C high

        OLED.Display_RAM[foo-1].u32 = SPI_PUSHR_PCS(1) | SPI_PUSHR_CTAS(0);

#pragma location=0x2000EF00

__no_init OLED_RAM_Obj OLED;    //Pre-allocated fixed-space for Display RAM

                                //Necessary, in SRAM-U, to use bit-banding!

TX channel initialization:

void DMA_Init_Tx(void)

{

   // use dma to blast-out display contents!

    SIM_SCGC6 |= SIM_SCGC6_DMAMUX_MASK;

    SIM_SCGC7 |= SIM_SCGC7_DMA_MASK;

    DMA_ERQ = DMA_ERQ_ERQ2_MASK; //channel 2

    DMAMUX_CHCFG2 = DMAMUX_CHCFG_ENBL_MASK | DMAMUX_CHCFG_SOURCE(DMA_SPI0_XMIT_CHAN) ; //Source 17 for SPI transmit

    DMA_TCD2_SADDR = (uint32_t)&OLED.OLED_CMDS[32-OLED_cmd_cnt].u32; /* Set the Source Address */

      /* Destination address */

    DMA_TCD2_DADDR = (uint32_t)&SPI0_PUSHR;

      /* Source offset Dwords */

    DMA_TCD2_SOFF = 0x04;

      /* Source and Destination Modulo off, source and destination size 2 = 32 bits */

    DMA_TCD2_ATTR = DMA_ATTR_SSIZE(2) | DMA_ATTR_DSIZE(2);

      /* Transfer 4 bytes (one aligned Dword) per transaction */

    DMA_TCD2_NBYTES_MLNO = 0x04;

      /* Adjust back to start needed */

    DMA_TCD2_SLAST = -(4*(Y_PITCH*8 + OLED_cmd_cnt));

      /* Destination offset disabled */

    DMA_TCD2_DOFF = 0x00;

      /* No link channel to channel, 1 transaction */

    DMA_TCD2_CITER_ELINKNO = DMA_CITER_ELINKNO_CITER(((Y_PITCH*8 + OLED_cmd_cnt)));

      /* No adjustment to destination address */

    DMA_TCD2_DLASTSGA = 0x00;

    DMA_TCD2_BITER_ELINKNO = DMA_BITER_ELINKNO_BITER(((Y_PITCH*8 + OLED_cmd_cnt)));

}

Then this is called regularly to start the DMA-block to refresh:

void OLED_Refresh(void)        //See that RAM contents get sent to the OLED display device

{

//   SPI0_MCR |= SPI_MCR_CLR_RXF_MASK;  //Make sure RX Fifo is empty for us!

    //^^leave it full of garbage, there is just more to come (which we don't even offload)

      SPI0_RSER = SPI_RSER_TFFF_RE_MASK | SPI_RSER_TFFF_DIRS_MASK; //Set SPI TX to make DMA requests

      DMA_ERQ = DMA_ERQ_ERQ2_MASK; //channel 2

      DMA_TCD2_CSR = DMA_CSR_DREQ_MASK | DMA_CSR_START_MASK;  //One transfer at a time.

}

0 Kudos

1,987 Views
ids
Contributor II

Thank you for the detailed response.

I am confused about the PUSHR comment you make.  According to the "KL25 Sub-Family Reference Manual", rev 3, Sep 2012, sec'n 37.3, "the SPI has 8-bit registers" and then in 37.3.5 there is the definition of the SPI Data Register - is this not an 8-bit field to write your output to, and then later read the input back from?

The SPI impl in mbed has, for example, the following struct, which is all 8-bit as well:

/** SPI - Register Layout Typedef */

typedef struct {

  __IO uint8_t C1;                                 /**< SPI control register 1, offset: 0x0 */

  __IO uint8_t C2;                                 /**< SPI control register 2, offset: 0x1 */

  __IO uint8_t BR;                                 /**< SPI baud rate register, offset: 0x2 */

  __I  uint8_t S;                                  /**< SPI status register, offset: 0x3 */

       uint8_t RESERVED_0[1];

  __IO uint8_t D;                                  /**< SPI data register, offset: 0x5 */

       uint8_t RESERVED_1[1];

  __IO uint8_t M;                                  /**< SPI match register, offset: 0x7 */

} SPI_Type;

Their implementation for a write/read cycle is also all 8-bit:

int spi_master_write(spi_t *obj, int value) {

    // wait tx buffer empty

    while(!spi_writeable(obj));

    obj->spi->D = (value & 0xff);

    // wait rx buffer full

    while (!spi_readable(obj));

    return obj->spi->D & 0xff;

}

I have therefore setup the DMA to do 8-bit reads/writes.  My primary purpose is to read from the SPI and fill a buffer.  I prefix any DMA activity with appropriate commands to the SD card to prepare it for a read of a 512-byte block.  DMA wise I have tried a lot of things.  For example, a single DMA channel set to read, with Cycle Steal = 0, filled my buffer (512 bytes) very quickly but with all the same value.  I do have the SPI set up properly (as far as I can tell) - RXDMAE on, etc.  I tried linking two DMA channels, the other set to do writes to the SPI data register, Cycle Steal = 1 (single read/write at a time), ERQ enabled, etc, hoping writes would trigger reads and they would ping-pong back and forth with the SPRF/SPTEF flags signalling the appropriate DMA operation.  I've tried just about all permutations and combinations but cannot get it to work.  I've also tried adding the extra SPI write, as per ref.man 37.4.4.1.

I'd post code, but have done it in so many ways, no snippet could reflect this.

fwiw, using default mbed read/write functions is working, so I have a solid baseline from which to work.  Swapping the 512-byte block read to DMA is not working.

I see DMAMUX in your code snippet - maybe setting up the DMAMUX is what I am missing?

0 Kudos

1,987 Views
egoodii
Senior Contributor III

My bad -- I didn't notice the 'L series' tag.  My stuff is all for the 'bigger parts', in particular this is K20 Rev 2 silicon, with the FIFO-enabled DSPI peripheral.  Sounds like the KL peripheral is 'much simpler', which isn't all bad!

0 Kudos

1,987 Views
ids
Contributor II

Sorry, I should have spelled that out in my question, rather than just tagging it.  I had a suspicion you may be referring to other hardware.

Simple is good, but working would be better. :smileysad:  I'm really stumped on this, having tried just about everything.  I'll dig more into the DMAMUX stuff and see if the answer is hidden therein.  There are sections of the ref. man. that refer specifically to SPI via DMA, so I'm not just imagining it should work....I think.

Thanks again!

0 Kudos

1,987 Views
egoodii
Senior Contributor III

Sorry I can't be of 'more direct' help.  Only thing I will mention -- as SPI master, you certainly only 'receive' as a result of a 'transmit', so you will certainly need something filling your 512 TXs, so presumably two DMA channels are indeed required, but I expect your SPI peripheral has individual RX and TX 'requests for service' so they should pace themselves once enabled!

0 Kudos

1,987 Views
ids
Contributor II

Thanks again.

I actually had a long winded response ready to go, and as I was finishing up a detailed pseudo-code ver'n of what I've done, I read a bit more in the ref. manual that got me wondering about my impl, so I'll delay until I try a few more things.  I do understand the bit about an SPI Master having to write in order to read.  I'm just not sure how much of this a DMA transfer might automate.  It's probably safest not to assume too much, and run two channels, one to write (essentially garbage), the other to read.  I have tried a few things with the DMAMUX, scanned mbed source to see if I may conflict with any internals, etc.  Still no luck, however. :smileysad:  I much prefer a world in which someone else has done the hard part, and where I can just re-use the code.  Guess I'm stuck doing the legwork on this one.

Thanks

0 Kudos

1,987 Views
ids
Contributor II

After much too much time on this, I have finally succeeded.  If anyone wants code, just ask.

0 Kudos

1,987 Views
colin
Contributor III

Hi Steve,

I would be interested to see the code.

How is the performance of DMA SPI?  Did you consider, or try, interrupt-driven SPI first?

I'm curious if it is a significant improvement.  I understand there is an errata on the Kinetis SPI that means you can't run it at full speed, if I recall correctly, but I imagine that DMA reduces CPU consumption quite a bit over interrupt-driven SPI, at least for medium and large block sizes.  (For small transfers of 1 to 20 bytes or so, perhaps the overhead of the DMA setup would actually reduce performance over interrupt-driven mode, but it seems fairly low-overhead to me.)

0 Kudos

1,987 Views
ids
Contributor II

Code below.  It should be noted that while I do have DMA and SPI working together, I have not been able to apply this to an SD card.  It's a great mystery to me at this point, and I'm throwing in the towel.  I have no sense of performance difference at this time, sorry.  I would assume that it would be faster than a busy loop checking the SPI Read Buffer Full and Transmit Buffer Empty flags and acting appropriately.  And it should reduce the CPU burden as well, allowing your code to carry on while this happens in the background.  My intent was to target the 512 byte block transfers from the SD card, filling a buffer with audio samples, in which case my code really has no need to wait for, or be notified of, completion.

SPI_Type* spi_peripherals[] = SPI_BASES;

/**

* Run a DMA Test using SPI.  Attempts to read and write a fixed number of bytes, to/from provided buffers.

*

* @param dmaReadCh The DMA channel to configure for the SPI READ operation.

* @param dmaWriteCh The DMA channel to configure for the SPI WRITE operation.

* @param srcBuffer Data buffer of values to write out to SPI

* @param destBuffer Data buffer in which to store values read from the SPI

* @param length The length of both buffers

* @param spiCh The SPI channel to use, 0 or 1

*/

void dma_test(const int dmaReadCh, const int dmaWriteCh, uint8_t *srcBuffer, uint8_t *destBuffer, int length, int spiCh) {

    SPI_Type* SPIn = spi_peripherals[spiCh];

    SPIn->C1 &= ~SPI_C1_SPE_MASK;

    //Enable DMA clocking

    SIM->SCGC6 |= SIM_SCGC6_DMAMUX_MASK;    // Enable clock to DMA mux

    SIM->SCGC7 |= SIM_SCGC7_DMA_MASK;      // Enable clock to DMA

    __disable_irq();    // Disable Interrupts - this needs to be an atomic operation

    // reset DMAMUX0

    DMAMUX0->CHCFG[dmaReadCh] = 0;

    DMAMUX0->CHCFG[dmaWriteCh] = 0;

    DMA0->DMA[dmaReadCh].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status

    DMA0->DMA[dmaWriteCh].DSR_BCR = DMA_DSR_BCR_DONE_MASK; // clear/reset DMA status

    // Configure DMAMUX

    DMAMUX0->CHCFG[dmaReadCh] = DMAMUX_CHCFG_ENBL_MASK | /*DMAMUX_CHCFG_TRIG_MASK | */(spiCh ? DMA_MUX_SRC_SPI1_Receive : DMA_MUX_SRC_SPI0_Receive);

    DMAMUX0->CHCFG[dmaWriteCh] = DMAMUX_CHCFG_ENBL_MASK | /*DMAMUX_CHCFG_TRIG_MASK | */(spiCh ? DMA_MUX_SRC_SPI1_Transmit : DMA_MUX_SRC_SPI0_Transmit);

    // Set up DMA channel to read from SPI

    DMA0->DMA[dmaReadCh].SAR = (uint32_t)&(SPIn->D);// set source address: SPI Data register

    DMA0->DMA[dmaReadCh].DAR = (unsigned int)destBuffer; // set dest address: memory buffer

    DMA0->DMA[dmaReadCh].DSR_BCR |= DMA_DSR_BCR_BCR_MASK & length; // length of transfer

    DMA0->DMA[dmaReadCh].DCR = DMA_DCR_ERQ_MASK | DMA_DCR_CS_MASK | DMA_DCR_SSIZE(0x01) | DMA_DCR_DINC_MASK | DMA_DCR_DSIZE(0x01) | DMA_DCR_D_REQ_MASK;

    // Set up another DMA channel to write to the SPI, in order to force Reads

    DMA0->DMA[dmaWriteCh].SAR = (unsigned int)srcBuffer;// set source address

    DMA0->DMA[dmaWriteCh].DAR = (uint32_t)&(SPIn->D); // set dest address: SPI Data register

    DMA0->DMA[dmaWriteCh].DSR_BCR |= DMA_DSR_BCR_BCR_MASK & length; // length of transfer

    DMA0->DMA[dmaWriteCh].DCR = DMA_DCR_ERQ_MASK | DMA_DCR_CS_MASK | DMA_DCR_SINC_MASK | DMA_DCR_SSIZE(0x01) | DMA_DCR_DSIZE(0x01) | DMA_DCR_D_REQ_MASK;

    __enable_irq();

    // Enable SPI and the DMA features within

    SPIn->C1 |= SPI_C1_SPE_MASK;

    SPIn->C2 |= SPI_C2_TXDMAE_MASK | SPI_C2_RXDMAE_MASK;

I followed this up with a loop to check various status registers and so forth.

0 Kudos