SDRAM performance at 120MHz vs 60MHz

キャンセル
次の結果を表示 
表示  限定  | 次の代わりに検索 
もしかして: 

SDRAM performance at 120MHz vs 60MHz

1,257件の閲覧回数
lpcware
NXP Employee
NXP Employee
Content originally posted in LPCWare by wella-tabor on Fri Jul 24 04:03:07 MST 2015
Hello,

I just did some "not so perfect" tests to compare SDRAM when clocking 120MHz and 60 Mhz. The cpu-LPC1788 ran at 120Mhz,  SDRAM MT48LC4M32B2B5, NOR FLASH S29GL256P.

The numbers are in ms.
SDRAM, NOR @120MHz
Overall 13782
Burst read16 1957
Nor burst read 10066


SDRAM, NOR @60MHz
Overall 15576
Burst read16 2236
Nor burst read 11744


My interest was to find out the speed impact at 60MHz. I could let 120MHz but this is not a valid freq (according to the datasheet, max ~80MHz). The speed decrease ratio is about ~1,3 so the memory is not two times faster when at 120MHz. I could speed it up to 80 MHz but it causes lowering the CPU to 80 MHz.

Why I am posting this?
Because nobody did it yet. And if yes, I did not find it. Until this test I was a clock hunter :).

BR
Martin




#include <stdint.h>
// Compile without optimisation
int memperf(uint32_t sdram_base_address, uint32_t sdram_size,uint32_t nor_base_address, uint32_t nor_size)
{
StopWatch_Init();
uint32_t start;
uint32_t elapsed;
uint32_t elapsed_ms;

uint8_t *ram8 = (uint8_t *) sdram_base_address;
uint16_t *ram16 = (uint16_t *) sdram_base_address;
uint32_t *ram32 = (uint32_t *) sdram_base_address;

uint8_t *rom8 = (uint8_t *) nor_base_address;
uint16_t *rom16 = (uint16_t *) nor_base_address;
uint32_t *rom32 = (uint32_t *) nor_base_address;

uint32_t count = 0;

start = StopWatch_Start();
// write

// Fill with pattern
for (count = 0; count < sdram_size; count++)
{
ram8[count] = 0xAA;
}

// Fill with pattern
for (count = 0; count < sdram_size / sizeof(uint16_t); count++)
{
ram16[count] = 0x5555;
}

// Fill with pattern
for (count = 0; count < sdram_size / sizeof(uint32_t); count++)
{
ram32[count] = 0xAAAAAAAAUL;
}

// read
for (count = 0; count < sdram_size; count++)
{
if (ram8[count] != 0xAA)
{
return 0;
}
}

for (count = 0; count < sdram_size / sizeof(uint16_t); count++)
{
if (ram16[count] != 0xAAAA)
{
return 0;
}
}

for (count = 0; count < sdram_size / sizeof(uint32_t); count++)
{
if (ram32[count] != 0xAAAAAAAAUL)
{

}
}

// clear
for (count = 0; count < sdram_size / sizeof(uint32_t); count++)
{
ram32[count] = 0;
}

// write with stride
#define STRIDE 16

// Fill with pattern
for (count = 0; count < sdram_size; count += STRIDE)
{
ram8[count] = 0xAA;
}

// Fill with pattern
for (count = 0; count < sdram_size / sizeof(uint16_t); count += STRIDE)
{
ram16[count] = 0xAAAA;
}

// Fill with pattern
for (count = 0; count < sdram_size / sizeof(uint32_t); count += STRIDE)
{
ram32[count] = 0xAAAAAAAAUL;
}

// read with stride
for (count = 0; count < sdram_size; count += STRIDE)
{
if (ram8[count] != 0xAA)
{
return -1;
}
}

for (count = 0; count < sdram_size / sizeof(uint16_t); count += STRIDE)
{
if (ram16[count] != 0xAAAA)
{
return -1;
}
}

for (count = 0; count < sdram_size / sizeof(uint32_t); count += STRIDE)
{
if (ram32[count] != 0xAAAAAAAAUL)
{
return -1;
}
}


// write /read

// Fill with pattern
for (count = 0; count < sdram_size / sizeof(uint32_t) / 2; count++)
{
// write to beginning
ram32[count] = 0xAAAAAAAAUL;

// read from end with write back
ram32[count] = ram32[sdram_size / sizeof(uint32_t) - count  - 1];
}

elapsed = StopWatch_Elapsed(start);
elapsed_ms = StopWatch_TicksToMs(elapsed);
debug_print_text("Overall ");
debug_print_number(elapsed_ms);
debug_print_newline(1);

// Rom test
start = StopWatch_Start();
for (count = 0; count < sdram_size / sizeof(uint16_t); count++)
{
volatile uint16_t a;
a = ram16[count];

}
elapsed = StopWatch_Elapsed(start);
elapsed_ms = StopWatch_TicksToMs(elapsed);
debug_print_text("Burst read16 ");
debug_print_number(elapsed_ms);
debug_print_newline(1);


start = StopWatch_Start();
// read
for (count = 0; count < nor_size; count++)
{
volatile uint32_t a;
a = rom8[count];
}

for (count = 0; count < sdram_size / sizeof(uint16_t); count++)
{
volatile uint32_t a;
a = rom16[count];
}
for (count = 0; count < sdram_size / sizeof(uint32_t); count++)
{
volatile uint32_t a;
a = rom32[count];
}
elapsed = StopWatch_Elapsed(start);
elapsed_ms = StopWatch_TicksToMs(elapsed);
debug_print_text("Nor burst read ");
debug_print_number(elapsed_ms);
debug_print_newline(1);

return 0;
}
ラベル(1)
0 件の賞賛
返信
2 返答(返信)

1,075件の閲覧回数
lpcware
NXP Employee
NXP Employee
Content originally posted in LPCWare by wella-tabor on Tue Jul 28 22:47:18 MST 2015
Hi,

sorry for late reply.

The values from structs are directly written to the EMC registers.
Here you are:

const PINMUX_GRP_T bsp_pin_configuration[] =
{

// External memories configuration
{ 2, 16, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, // EMC_CAS
{ 2, 17, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, // EMC_RAS
{ 2, 18, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, // EMC_CLK0
{ 2, 20, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, // EMC_DYCS0
{ 2, 24, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, // EMC_CKE0
{ 2, 28, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, // DQM0
{ 2, 29, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, // DQM1
{ 2, 30, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, // DQM2
{ 2, 31, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, // DQM3

/* EMC data 0 - 31 */
{ 3, 0, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 1, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 2, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 3, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 4, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 5, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 6, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 7, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 8, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 9, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 10, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 11, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 12, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 13, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 14, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 15, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 16, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 17, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 18, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 19, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 20, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 21, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 22, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 23, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 24, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 25, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 26, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 27, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 28, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 29, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 30, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 3, 31, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },

/* EMC address 0 - 23 *, + 5-0 lpc1788 extra address pin*/
{ 4, 0, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 1, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 2, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 3, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 4, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 5, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 6, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 7, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 8, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 9, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 10, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 11, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 12, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 13, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 14, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 15, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 16, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 17, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 18, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 19, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 20, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 21, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 22, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 4, 23, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },
{ 5, 0, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) },

/* Control signals*/
{ 4, 24, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, //EMC_OE
{ 4, 25, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, //EMC_WE
{ 4, 30, (IOCON_FUNC1 | IOCON_FASTSLEW_EN) }, //EMC_CS0

};

/*
 * Some delays taken from
 * https://www.lpcware.com/content/forum/lpc177x-emc-and-sdram-conundrum
 */
const SDRAM_CONFIG SDRAM_MT48LC4M32B2B5_at_120MHz =
{ .baseAddress = EXTERNAL_SDRAM_ADDRESS,
.size = EXTERNAL_SDRAM_SIZE,
.CAS = 3,
.RAS = 3,  // 20ns  ACTIVE-to-READ or WRITE delay t RCD, EMCCLK cycles

//Note: For 32 bit wide chip selects data is transferred to and from dynamic memory in
//SDRAM bursts of four. For 16 bit wide chip selects SDRAM bursts of eight are used.
.SDRAMMode = 0x32 << 12, /*SDRAMWriteBurstMode*/
.SDRAMArchitecture = 0x00004500, /*SDRAMArchitecture*/
// In EMC cycles, 1 cycle  8,333333333ns
.tRP = 2,//20 ns, /*tRP*/  (regval + 1)*EMCCLK
.tRAS = 5, //42ns,  /*tRAS*/   (regval + 1)*EMCCLK
.tSREX = 8, //70ns, /*tSREX, devices without this parameter use the same value as tXSR*/  (regval + 1)*EMCCLK
.tAPR = 0,//20ns, /*tAPR, not in datasheet, if fail, use  tRCD val*/   (regval + 1)*EMCCLK
.tDAL = 5,//5 tclk while CL(CAS)=3, /*tDAL*/    (regval + 0)*EMCCLK
.tWR = 1, //1tclk + 7 ns tWR    (regval + 1)*EMCCLK
.tRC = 8,//70 ns /*tRC*/    (regval + 1)*EMCCLK
.tRFC = 8,//70 ns, /*tRFC*/   (regval + 1)*EMCCLK
.tXSR = 8,//70ns /*tXSR*/   (regval + 1)*EMCCLK
.tRRD = 1,//15 ns, /*tRRD*/   (regval + 1)*EMCCLK
.tMRD = 1,//2 tclk, /*tMRD*/    (regval + 1)*EMCCLK
.tREF = 119, //15 us /*tREF*/  n x16 = 16n EMCCLKs between SDRAM refresh cycles.
.buffer = true, .write_protect = false,
.delays = (8 << 8) | (8 << 0),
.read_strategy = 1,
};

const SDRAM_CONFIG SDRAM_MT48LC4M32B2B5_at_60MHz =
{
 .baseAddress = EXTERNAL_SDRAM_ADDRESS,
.size = EXTERNAL_SDRAM_SIZE,
// In EMC cycles, 1 cycle  16,66666666ns
.CAS = 2,
.RAS = 2,  // 20ns  ACTIVE-to-READ or WRITE delay t RCD, EMCCLK cycles

//Note: For 32 bit wide chip selects data is transferred to and from dynamic memory in
//SDRAM bursts of four. For 16 bit wide chip selects SDRAM bursts of eight are used.
.SDRAMMode = 0x22 << 12, /*SDRAMWriteBurstMode*/
.SDRAMArchitecture = 0x00004500, /*SDRAMArchitecture*/
// In EMC cycles, 1 cycle  8,333333333ns
.tRP = 1,//20 ns, /*tRP*/  (regval + 1)*EMCCLK
.tRAS = 2, //42ns,  /*tRAS*/   (regval + 1)*EMCCLK
.tSREX = 4, //70ns, /*tSREX, devices without this parameter use the same value as tXSR*/  (regval + 1)*EMCCLK
.tAPR = 0,//20ns, /*tAPR, not in datasheet, if fail, use  tRCD val*/   (regval + 1)*EMCCLK
.tDAL = 4,//4 tclk while CL(CAS)=2, /*tDAL*/    (regval + 0)*EMCCLK
.tWR = 1, //1tclk + 7 ns tWR    (regval + 1)*EMCCLK
.tRC = 4,//70 ns /*tRC*/    (regval + 1)*EMCCLK
.tRFC = 4,//70 ns, /*tRFC*/   (regval + 1)*EMCCLK
.tXSR = 4,//70ns /*tXSR*/   (regval + 1)*EMCCLK
.tRRD = 0,//15 ns, /*tRRD*/   (regval + 1)*EMCCLK
.tMRD = 1,//2 tclk, /*tMRD*/    (regval + 1)*EMCCLK
.tREF = 58, //4096 row -> 64 ms, 64/4096 = every 15,625 us /*tREF*/  n x16 = 16n EMCCLKs between SDRAM refresh cycles.
.buffer = true, .write_protect = false,
.delays = (31 << 16) | (16 << 8) | (0 << 0),
.read_strategy = 0,
};

const NOR_FLASH_CONFIG NOR_FLASH_S29GL256P_at_120MHz =
{ .baseAddress = EXTERNAL_FLASH_ADDRESS, .size = EXTERNAL_FLASH_SIZE,
.CS_active_high = false,
.cfi_id = (0x7E << 16) | (0x22 << 8) | (0x01 << 0),
// In EMC cycles, 1 cycle  8,333333333ns
.WaitOen = 3,//25 ns tOE , (regval + 0)*EMCCLK
.WaitRd = 13,//110 ns tCE, (regval + 1)*EMCCLK
.WaitPage = 2,//25 ns tPACC  (regval + 1)*EMCCLK
.WaitWen = 0, //0 ns tCS     (regval + 1)*EMCCLK
.WaitWr = 12, //110 ns tWC   (regval + 2)*EMCCLK
.WaitTurn = 7,// value from other projects   (regval + 1)*EMCCLK
// Bit width
.bit_width = 16,
.page_mode = true,
/*PL172 datasheet
 * In other words, the PB bit influences the WE signal. When the PB bit is cleared, the WE
 signal is never active. When the PB bit is set, the WE signal is generated.
 */
.byte_lane_state_low = true, // line MUST BE!!!! true for 16 bit devices.
.extended_wait = false,
.buffer = true,
.write_protect = true,
.shift_mode = false,
};

const NOR_FLASH_CONFIG NOR_FLASH_S29GL256P_at_60MHz =
{ .baseAddress = EXTERNAL_FLASH_ADDRESS,
.size = EXTERNAL_FLASH_SIZE,
.CS_active_high = false,
.cfi_id = (0x7E << 16) | (0x22 << 8) | (0x01 << 0),
// In EMC cycles, 1 cycle  16,66666666ns
.WaitOen = 2,//25 tOE , (regval + 0)*EMCCLK
.WaitRd = 6,//110 tCE, (regval + 1)*EMCCLK
.WaitPage = 1,//25 tPACC  (regval + 1)*EMCCLK
.WaitWen = 0, //0 tCS     (regval + 1)*EMCCLK
.WaitWr = 5, //110 tWC   (regval + 2)*EMCCLK
.WaitTurn = 7,// value from other projects   (regval + 1)*EMCCLK
// Bit width
.bit_width = 16,
.page_mode = true,
/*PL172 datasheet
 * In other words, the PB bit influences the WE signal. When the PB bit is cleared, the WE
 signal is never active. When the PB bit is set, the WE signal is generated.
 */
.byte_lane_state_low = true, // line MUST BE!!!! true for 16 bit devices.
.extended_wait = false,
.buffer = true,
.write_protect = true,
.shift_mode = false,
};
0 件の賞賛
返信

1,075件の閲覧回数
lpcware
NXP Employee
NXP Employee
Content originally posted in LPCWare by mc on Fri Jul 24 07:51:19 MST 2015
Hi Martin,
Yes, 120MHz EMC clock is out of specification. Just checking in your experience when you ran at EMC at 120MHz what was the value of CAS,?
0 件の賞賛
返信