GPDMA for Frame buffer memory copy (Multi buffer/emWin)

cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 

GPDMA for Frame buffer memory copy (Multi buffer/emWin)

1,002 Views
lpcware
NXP Employee
NXP Employee
Content originally posted in LPCWare by chetansd83 on Wed Dec 30 23:00:57 MST 2015
Dear friends,

I am using Embedded Artist board of LPC4357 (EA4357) with 1024x768 TFT.

I have configured lcd_conf.c for multiple buffer (3) and added Interrupt service routine and Copy buffer callback function.

I can observe the improvement in flickering of display after implementing multiple frame buffers but the execution becomes too slow after that.

I supposed it is due to bulky memcpy() function.

So I want to use GPDMA for memory copy (SDRAM to SDRAM).

But unfortunately my code works only when my DMA size is below 1080. If I increase this size It doesn't works.

Any suggestions please for me friends...???

I have attached my LCDconf.c.

Please let me know if any clarification needed from my side.

Chetan

Original Attachment has been moved to: LCDConf_4.c.zip

Labels (1)
0 Kudos
5 Replies

623 Views
lpcware
NXP Employee
NXP Employee
Content originally posted in LPCWare by michele sponchiado on Mon Jan 18 07:55:40 MST 2016
Hi plz check if this works for you as it works for me
typedef enum
{
enum_dma_fast_memcpy_width_byte=0,
enum_dma_fast_memcpy_width_halfword=1,
enum_dma_fast_memcpy_width_word=2,
enum_dma_fast_memcpy_width_numof
}enum_dma_fast_memcpy_width;


typedef struct _type_dma_fast_memcpy_params
{
uint32_t src_address;
uint32_t dst_address;
enum_dma_fast_memcpy_width width;
uint32_t num_of_elements;
type_wait_1ms_delay p_delay;  // if you set this to NULL, the routine waits until the transfer ends without calling the delay routine
unsigned int ui_max_ms_to_wait;
unsigned int ui_num_elements_per_line; // if 0, the copy is continuous, else the src is continuous while the dst every ui_num_elements_per_line is increased by ui_dst_num_bytes_per_line
unsigned int ui_dst_num_bytes_per_line;// num bytes between lines in the destination buffer
}type_dma_fast_memcpy_params;


// copy from memory to memory using the DMA, which is faster because it uses the burst queue of up to 4 elements and this really speeds up the transfer by about 5 times
enum_dma_fast_memcpy_retcode v_do_dma_fast_memcpy(type_dma_fast_memcpy_params *p)
{
enum_dma_fast_memcpy_retcode retcode;
GPDMA_Channel_CFG_Type cfg;
unsigned int ui_num_of_full_linked_list_required;
unsigned int ui_spare_linked_list_size;
uint32_t src;
uint32_t dst;
uint32_t chunk_width;
uint32_t control_word;
uint32_t control_word_spare;
enum_dma_fast_memcpy_width width;
GPDMA_LLI_Type *p_ll;
uint32_t ui_num_elements_per_line;
uint32_t ui_timeout_ms;
unsigned char uc_continuous_copy;

retcode=enum_dma_fast_memcpy_retcode_OK;
switch(p->width)
{
case enum_dma_fast_memcpy_width_byte:
case enum_dma_fast_memcpy_width_halfword:
case enum_dma_fast_memcpy_width_word:
{
width=p->width;
break;
}
default:
{
return enum_dma_fast_memcpy_retcode_invalid_width;
}
}


dst=p->dst_address;
src=p->src_address;
if (p->ui_num_elements_per_line==0)
{
uc_continuous_copy=1;
ui_num_of_full_linked_list_required=p->num_of_elements/def_linked_list_chunk_size;
ui_num_elements_per_line=def_linked_list_chunk_size;
}
else
{
if (p->ui_num_elements_per_line>def_linked_list_chunk_size)
{
return enum_dma_fast_memcpy_retcode_too_long_line;
}
uc_continuous_copy=0;
ui_num_of_full_linked_list_required=p->num_of_elements/p->ui_num_elements_per_line;
ui_num_elements_per_line=p->ui_num_elements_per_line;
}
ui_spare_linked_list_size=p->num_of_elements-(ui_num_of_full_linked_list_required*ui_num_elements_per_line);

if (!ui_spare_linked_list_size)
{
if (ui_num_of_full_linked_list_required)
{
ui_num_of_full_linked_list_required--;
}
ui_spare_linked_list_size=ui_num_elements_per_line;
}
if (ui_num_of_full_linked_list_required+1>=def_max_linked_list)
{
if(uc_continuous_copy)
{
memcpy((char*)dst,(char*)src,uc_num_bytes_for_width[width]*p->num_of_elements);
}
return enum_dma_fast_memcpy_retcode_too_big_transfer_regular_memcpy_done;
}

memset(&cfg,0,sizeof(cfg));
ui_timeout_ms=p->ui_max_ms_to_wait;
control_word=  0 // by now, let the transfer size to zero
     |(def_dma_burst_src(def_dma_burst_size_32))
 |(def_dma_burst_dst(def_dma_burst_size_32))
 |(def_dma_width_src(width))
 |(def_dma_width_dst(width))
 | def_dma_increment_src
 | def_dma_increment_dst
 ;
control_word_spare=control_word;
control_word_spare|=  ui_spare_linked_list_size | def_dma_generate_interrupt;
control_word |=  ui_num_elements_per_line;

chunk_width=uc_num_bytes_for_width[width]*ui_num_elements_per_line;

{
unsigned int i;
for (i=0;i<ui_num_of_full_linked_list_required;i++)
{
p_ll=&linked_list;
p_ll->DstAddr=dst;
p_ll->SrcAddr=src;
src+=chunk_width;
if (uc_continuous_copy)
{
dst+=chunk_width;
}
else
{
dst+=p->ui_dst_num_bytes_per_line;
}
p_ll->NextLLI=(uint32_t)&linked_list[i+1]; // point to the next lli
p_ll->Control=control_word;
}
}
// set the final element of the list
{
p_ll=&linked_list[ui_num_of_full_linked_list_required];
p_ll->DstAddr=dst;
p_ll->SrcAddr=src;
p_ll->NextLLI=0; // 0 means this is the last element...
p_ll->Control=control_word_spare;
}

cfg.ChannelNum=enum_dma_channel_reserved_fast_memcpy;
cfg.TransferSize=ui_num_elements_per_line;
cfg.TransferWidth=width;
cfg.SrcMemAddr=p->src_address;
cfg.DstMemAddr=p->dst_address;
cfg.TransferType=GPDMA_TRANSFERTYPE_M2M_CONTROLLER_DMA;
cfg.DMALLI=(uint32_t)&linked_list[0];
// setup the transfer
if (GPDMA_Setup(&cfg)!=SUCCESS)
{
return enum_dma_fast_memcpy_retcode_unable_to_start;
}
{
uint32_t ui_expected_dma_count;
uint32_t ui_loop_before_delay;
// starting from now, we expect two more interrupts to come from the dma: the start of the transfer, and the end of the transfer
ui_expected_dma_count=ui_count_tc_dma_channel_reserved_fast_memcpy+2;
// start the transfer
GPDMA_ChannelCmd(enum_dma_channel_reserved_fast_memcpy,ENABLE);
ui_loop_before_delay=0;
// wait until the end of the transfer
while (ui_expected_dma_count!=ui_count_tc_dma_channel_reserved_fast_memcpy)
{
if (ui_loop_before_delay<30000)
{
ui_loop_before_delay++;
}
else
{
if (ui_timeout_ms)
{
if (p->p_delay)
{
p->p_delay();
}
else
{
TIM_Waitus(1000);
}
ui_timeout_ms--;
}
// timeout?
else
{
retcode=enum_dma_fast_memcpy_retcode_timeout;
break;
}
}
}
// clear the channel
GPDMA_ChannelCmd(enum_dma_channel_reserved_fast_memcpy,DISABLE);
}
return retcode;
}
void _custom_copy_framebuffer(int LayerIndex, int IndexSrc, int IndexDst)
{
unsigned long BufferSize, AddrSrc, AddrDst;
// int i;
// int BITSPERPIXEL=LCD_GetBitsPerPixelEx(i);
//
// Calculate the size of one frame buffer
//
BufferSize = (XSIZE_PHYS * YSIZE_PHYS* PIXEL_WIDTH);

//
// Calculate source- and destination address
//
Addrsrc=VRAM_ADDR_PHYS + BufferSize  * IndexSrc;
AddrDst = VRAM_ADDR_PHYS + BufferSize  * IndexDst;
// set the current frame buffer index
ui_framebuffer_idx=IndexDst;

{
type_dma_fast_memcpy_params dma_fast;

dma_fast.src_address=(uint32_t)(AddrSrc);
dma_fast.dst_address=(uint32_t)(AddrDst);
dma_fast.width=enum_dma_fast_memcpy_width_word;
dma_fast.num_of_elements=BufferSize/4;
dma_fast.ui_num_elements_per_line=0; // if 0, the copy is continuous, else the src is continuous while the dst every ui_num_elements_per_line is increased by ui_dst_num_bytes_per_line
dma_fast.ui_dst_num_bytes_per_line=XSIZE_PHYS*4;// num bytes between lines in the destination buffer
dma_fast.ui_max_ms_to_wait=100;
dma_fast.p_delay=v_execute_1ms_delay;

v_do_dma_fast_memcpy(&dma_fast);
}


}

0 Kudos

623 Views
lpcware
NXP Employee
NXP Employee
Content originally posted in LPCWare by chetansd83 on Fri Jan 15 21:26:14 MST 2016
Oh thanks michele....

I had just tried DMA linked list... But It is not working... There might be some error in my code...

Can you please share the related code...

Thanks
Chetan
0 Kudos

623 Views
lpcware
NXP Employee
NXP Employee
Content originally posted in LPCWare by michele sponchiado on Fri Jan 15 03:39:41 MST 2016
Maybe you should use the linked lists for the DMA transfer (please check the documentation about the DMALLI field in the DMA configuration structure): actually I use them and they works very well. This way, the maximum DMA size you can transfer is extended.
Please note that when using linked lists,  you'll receive two interrupts from the DMA controller: the first at the end of the very first chunk transferred, and the second arising from the end of the linked lists transfer
0 Kudos

623 Views
lpcware
NXP Employee
NXP Employee
Content originally posted in LPCWare by chetansd83 on Sun Jan 03 23:04:59 MST 2016
Thanks for your reply..

Actually I am using emwin as graphic library. And also using multiple framebuffer in order to reduce/remove the tearing/flickering effects.

About Multiple frame buffer:
When LCD controller is using frame buffer-1 to refresh the screen, my library uses framebuffer-2 to update the data. After the full buffered is updated by library the stating address of the frame buffer is changed during VSYNC signal interrupt and FB-2 is used by LCD controller to update the screen.

https://www.segger.com/emwin-multiple-buffering.html

Please refer above link for more detail.

Thanks

Chetan
0 Kudos

623 Views
lpcware
NXP Employee
NXP Employee
Content originally posted in LPCWare by wmues on Fri Jan 01 06:01:43 MST 2016
I do not understand why you copy your frame buffer.

The idea of a frame buffer is to draw the content into the frame buffer, and then make this content visible by programming the LCD DMA start address.
0 Kudos