<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>i.MX ProcessorsのトピックRe: imx6 memory bandwidth problem</title>
    <link>https://community.nxp.com/t5/i-MX-Processors/imx6-memory-bandwidth-problem/m-p/249802#M22533</link>
    <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;You may try the (GNU asm) example below for copying (it uses NEON instructions) :&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;/* void transfer_eight_words_vld(int* dst, const int *src) */ &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;transfer_eight_words_vld:&lt;/P&gt;&lt;P&gt;&amp;nbsp; vld1.64 {d0,d1,d2,d3}, [r1]&lt;/P&gt;&lt;P&gt;&amp;nbsp; vst1.64 {d0,d1,d2,d3}, [r0]&lt;/P&gt;&lt;P&gt;&amp;nbsp; mov pc, lr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* return */&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Note, there is no need for storing / saving general registers (they are not used) &lt;BR /&gt;as it is usually required for C functions. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Also, before using it, NEON must be enabled.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;* Enable NEON&amp;nbsp; */&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;/* --- enable NEON VFP */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; ldr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; r0, =ARM_CACR_CONFIG&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* ; r0 = CACR configuration */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; mcr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; p15, 0, r0, c1, c0, 2&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* ; update CACR */ &lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; isb&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;/* --- Enable VFP */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; fmrx&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; r0, fpexc&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; orr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; r0, r0, #VFP_NEON_ENABLE&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; fmxr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; fpexc, r0&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;/* --- Set VFP to runfast mode */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; fmrx&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; r0, fpscr&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; orr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; r0, r0, #VFP_RUN_FAST&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; fmxr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; fpscr, r0&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
    <pubDate>Wed, 24 Jul 2013 02:20:47 GMT</pubDate>
    <dc:creator>Yuri</dc:creator>
    <dc:date>2013-07-24T02:20:47Z</dc:date>
    <item>
      <title>imx6 memory bandwidth problem</title>
      <link>https://community.nxp.com/t5/i-MX-Processors/imx6-memory-bandwidth-problem/m-p/249799#M22530</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Dear all,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The performances of my video processing project by imx6 seem to be limited by by the memory bandwidth.&lt;/P&gt;&lt;P&gt;I have benchmarked the memory bandwidth and the result is just 10% of the theoretical value.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Configuration 1:&lt;/P&gt;&lt;P&gt;Freescale SABRE board (MCIMX6Q-SDB)&lt;/P&gt;&lt;P&gt;Memory. 1 GB DDR3 SDRAM up to 533 MHz (1066 MTPS)&lt;/P&gt;&lt;P&gt;Ubuntu L3.0.35_1.1.0_121218&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Configuration 2:&lt;/P&gt;&lt;P&gt;BoundaryDevices Nitrogen6X&lt;/P&gt;&lt;P&gt;Memory 1GBytes of 64-bit wide DDR3 @ 532MHz&lt;/P&gt;&lt;P&gt;Ubuntu L3.0.35_4.0.0_UBUNTU_RFS / or LTIB 4.0.0 / or Original Demo SD&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Theoretical memory bandwidth:&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;DDR3 1066 x 64 bits = &amp;gt; 1066*8 = &lt;STRONG&gt;8528 MB/s&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Measured memory bandwidth:&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;I use the &lt;STRONG&gt;mbw&lt;/STRONG&gt; tool.&amp;nbsp; I have checked the source, and the memcpy method is the 1 (0 is loop 'for', and 2 is a small mem block cached).&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;# mbw -t1 100&lt;/P&gt;&lt;P&gt;AVG&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; Method: DUMB&amp;nbsp;&amp;nbsp;&amp;nbsp; Elapsed: 0.24973&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; MiB: 100.00000&amp;nbsp; Copy: 400.426 MiB/s&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;This 'memcpy' reads and writes the memory, and the &lt;STRONG&gt;real memory bandwidth is 2x400 = 800 MB/s&lt;/STRONG&gt;, just 10% of the theoretical value.&lt;/P&gt;&lt;P&gt;I made my own memory test project, and the results are equivalent.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I guess the display used part of memory bandwidth, but where is the other 90% memory bandwidth ?&lt;/P&gt;&lt;P&gt;If this result is normal, this limitation stop my project.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Is there any ideas to help me go on?&lt;/P&gt;&lt;P&gt;Thank you very much.&lt;/P&gt;&lt;P&gt;&lt;SPAN class="mce_paste_marker"&gt;&lt;BR /&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Tue, 02 Jul 2013 12:13:04 GMT</pubDate>
      <guid>https://community.nxp.com/t5/i-MX-Processors/imx6-memory-bandwidth-problem/m-p/249799#M22530</guid>
      <dc:creator>ericb</dc:creator>
      <dc:date>2013-07-02T12:13:04Z</dc:date>
    </item>
    <item>
      <title>Re: imx6 memory bandwidth problem</title>
      <link>https://community.nxp.com/t5/i-MX-Processors/imx6-memory-bandwidth-problem/m-p/249800#M22531</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;1.&lt;BR /&gt;As for "&lt;SPAN style="font-style: inherit; font-family: inherit;"&gt;&lt;STRONG&gt;Theoretical memory bandwidth:&lt;/STRONG&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;DDR3 1066 x 64 bits = &amp;gt; 1066*8 = &lt;SPAN style="font-style: inherit; font-family: inherit;"&gt;&lt;STRONG&gt;8528 MB/s".&lt;BR /&gt;&lt;/STRONG&gt;&lt;/SPAN&gt;&lt;BR /&gt;Such calculations, assuming that data are provided at every clock edge, are very "theoretical" :-).&lt;BR /&gt;Real DDR data access burst needs some preparation stage : bus arbitration, RAS phase, CAS phase,&lt;BR /&gt;CAS Latency and only after that we can get &lt;SPAN style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;data&amp;nbsp; at every clock edge. So, let's divide &lt;/SPAN&gt; 8528 MB/s by 2.&lt;/P&gt;&lt;P style="font-family: 'Helvetica Neue', Helvetica, Arial, 'Lucida Grande', sans-serif; background-color: #ffffff;"&gt;&lt;BR /&gt;2.&amp;nbsp; &lt;/P&gt;&lt;P&gt;Next, screen refresh for high resolution modes may require significant bus throughput.&lt;BR /&gt;Say, for resolution 1600 x 1200, 32-bit per pixel, 60 Hz refresh :&amp;nbsp; ~ 460 MB / sec = 1600 x 1200 x 4bytes x 60&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;3.&lt;BR /&gt;If VPU codecs are applied it is needed at least to read frame buffer, encode / decode it and write back,&lt;BR /&gt;so for 1920x1088@30fps : ~ 500 MB/sec = 1920 x 1080 x 4bytes x 30 x 2&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;4.&lt;/P&gt;&lt;P&gt;You wrote about memcpy tests : this is a question if memcpy is maximally optimized for ARMv7. &lt;BR /&gt;Say - if it&amp;nbsp; uses NEON instructions. &lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 03 Jul 2013 02:20:48 GMT</pubDate>
      <guid>https://community.nxp.com/t5/i-MX-Processors/imx6-memory-bandwidth-problem/m-p/249800#M22531</guid>
      <dc:creator>Yuri</dc:creator>
      <dc:date>2013-07-03T02:20:48Z</dc:date>
    </item>
    <item>
      <title>Re: imx6 memory bandwidth problem</title>
      <link>https://community.nxp.com/t5/i-MX-Processors/imx6-memory-bandwidth-problem/m-p/249801#M22532</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;Thank you for your answers.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;3.&lt;/P&gt;&lt;P&gt;I do nothing during the test. The VPU is not used.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;4.&lt;/P&gt;&lt;P&gt;I use standard memcpy provided by gcc.&lt;/P&gt;&lt;P&gt;The project is compiled with : -mfloat-abi=softfp -mfpu=neon -mcpu=cortex-a9 -march=armv7-a -fprefetch-loop-arrays&lt;/P&gt;&lt;P&gt;I have attached the source code of my MemTest project (mbw code based).&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have added multi-thread for the test, and it improves the bandwidth (R/W):&lt;/P&gt;&lt;P&gt;1 thread&amp;nbsp; : 407.559 MB/s&lt;/P&gt;&lt;P&gt;2 threads : 605.705 MB/s&lt;/P&gt;&lt;P&gt;3 threads : 642.874 MB/s&lt;/P&gt;&lt;P&gt;4 threads : 636.358 MB/s&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The limit seems to be (with the display) 640*2 + 460 = 1740 MB/s&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;a.&lt;/P&gt;&lt;P&gt;How can I check the DDR frequency ?&lt;/P&gt;&lt;P&gt;- dmidecode is not implemented on arm&lt;/P&gt;&lt;P&gt;- lshw has an error&lt;/P&gt;&lt;P&gt;# lshw -short -C memory&lt;/P&gt;&lt;P&gt;&amp;gt; Unhandled fault: external abort on non-linefetch (0x018) at 0x2b3c0000&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;b.&lt;/P&gt;&lt;P&gt;Is it possible, for an application, to do a memory copy with a DMA transfer ?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN class="mce_paste_marker"&gt;&lt;BR /&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Thu, 04 Jul 2013 12:20:59 GMT</pubDate>
      <guid>https://community.nxp.com/t5/i-MX-Processors/imx6-memory-bandwidth-problem/m-p/249801#M22532</guid>
      <dc:creator>ericb</dc:creator>
      <dc:date>2013-07-04T12:20:59Z</dc:date>
    </item>
    <item>
      <title>Re: imx6 memory bandwidth problem</title>
      <link>https://community.nxp.com/t5/i-MX-Processors/imx6-memory-bandwidth-problem/m-p/249802#M22533</link>
      <description>&lt;HTML&gt;&lt;HEAD&gt;&lt;/HEAD&gt;&lt;BODY&gt;&lt;P&gt;You may try the (GNU asm) example below for copying (it uses NEON instructions) :&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;/* void transfer_eight_words_vld(int* dst, const int *src) */ &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;transfer_eight_words_vld:&lt;/P&gt;&lt;P&gt;&amp;nbsp; vld1.64 {d0,d1,d2,d3}, [r1]&lt;/P&gt;&lt;P&gt;&amp;nbsp; vst1.64 {d0,d1,d2,d3}, [r0]&lt;/P&gt;&lt;P&gt;&amp;nbsp; mov pc, lr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* return */&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Note, there is no need for storing / saving general registers (they are not used) &lt;BR /&gt;as it is usually required for C functions. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Also, before using it, NEON must be enabled.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;* Enable NEON&amp;nbsp; */&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;/* --- enable NEON VFP */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; ldr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; r0, =ARM_CACR_CONFIG&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* ; r0 = CACR configuration */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; mcr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; p15, 0, r0, c1, c0, 2&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; /* ; update CACR */ &lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; isb&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;/* --- Enable VFP */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; fmrx&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; r0, fpexc&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; orr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; r0, r0, #VFP_NEON_ENABLE&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; fmxr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; fpexc, r0&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;/* --- Set VFP to runfast mode */&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; fmrx&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; r0, fpscr&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; orr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; r0, r0, #VFP_RUN_FAST&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; fmxr&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; fpscr, r0&lt;/P&gt;&lt;/BODY&gt;&lt;/HTML&gt;</description>
      <pubDate>Wed, 24 Jul 2013 02:20:47 GMT</pubDate>
      <guid>https://community.nxp.com/t5/i-MX-Processors/imx6-memory-bandwidth-problem/m-p/249802#M22533</guid>
      <dc:creator>Yuri</dc:creator>
      <dc:date>2013-07-24T02:20:47Z</dc:date>
    </item>
  </channel>
</rss>

