Content originally posted in LPCWare by wmues on Fri Jul 06 06:51:39 MST 2012
sebgonzo,
there is more than ONE delay line!
- There is one delay line to adjust the delay between the SDRAM clock and all outgoing signals.
- And there is another delay line to adjust the delay between the SDRAM clock and the SDRAM read data latch.
You have to adjust both lines.
You may use from the following code fragments:
<code>
/* Transform ns into clock cycles (runtime, only 32bit multiplications):
 * Valid input range: Clock < 995 MHz, ns = 0..1000000 (1ms)
 *
 * a) Divide Clock by 16. This gives enough headroom for step b).
 * b) Multiply Clock by 69. This will adjust for decimal/binary divisor.
 *    This computation will overflow for a clock > 995 MHz!
 *    This computation will give results 0,5% larger than the real value.
 * c) Divide Clock by 1048576 (2^20). This will give enough headroom for step d).
 * d) Multiply Clock by ns. This will not overflow for ns = 0..1000000 (1ms).
 * e) Divide Clock by 4096 (2^12). This will give value in clocks.
 * f) Add 1 to account for rounding.
 * (Use runtime computations because frequencies may change in runtime).
 */
#define NS_2_EMC_CLKS(ns) ((((((EMCClock >> 4)*69) >> 20)*(ns))>>12)+1)
#define NS_2_CPU_CLKS(ns) ((((((SystemCoreClock >> 4)*69) >> 20)*(ns))>>12)+1)
// Delay loop for short busy waits
__attribute__( ( always_inline ) ) static inline void wait_clocks( unsigned int clocks)
{
   clocks >>= 1;          // 2 Takte pro Durchlauf
   while(clocks--) {
      asm("":::"memory"); // hint for gcc: do not remove this loop!
   }
}
/*----------------------------------------------------------------------------
  MPU Defines
 *----------------------------------------------------------------------------*/
#define SIZE_32B    0x04
#define SIZE_64B    0x05
#define SIZE_128B   0x06
#define SIZE_256B   0x07
#define SIZE_512B   0x08
#define SIZE_1K     0x09
#define SIZE_2K     0x0A
#define SIZE_4K     0x0B
#define SIZE_8K     0x0C
#define SIZE_16K    0x0D
#define SIZE_32K    0x0E
#define SIZE_64K    0x0F
#define SIZE_128K   0x10
#define SIZE_256K   0x11
#define SIZE_512K   0x12
#define SIZE_1M     0x13
#define SIZE_2M     0x14
#define SIZE_4M     0x15
#define SIZE_8M     0x16
#define SIZE_16M    0x17
#define SIZE_32M    0x18
#define SIZE_64M    0x19
#define SIZE_128M   0x1A
#define SIZE_256M   0x1B
#define SIZE_512M   0x1C
#define SIZE_1G     0x1D
#define SIZE_2G     0x1E
#define SIZE_4G     0x1F
#define AP_NONE     0x00
#define AP_RW       0x03
#define MEM_TYPE_STRONGLY 0x04          // Strongly ordered, Sharable, no Cache, no Buffer, no allocate
#define MEM_TYPE_FLASH    0x02          // Normal, Non-Sharable, Cached, no Buffer, write-through
#define MEM_TYPE_IRAM     0x06          // Normal, Sharable, Cached, no Buffer, write-through
#define MEM_TYPE_ERAM     0x07          // Normal, Sharable, Cached, Buffered, write-back & write allocate
#define MEM_TYPE_DEVICE   0x05          // Device, Sharable, no Cache, Buffered, no allocate
#define _RBAR(_ADDR, _VALID, _REGION) \
((_ADDR) | ((_VALID) << 4) | (_REGION))
#define _RASR(_XN, _AP, _TYPE, _SRD, _SIZE, _ENABLE) \
(((_XN) << 28) | ((_AP) << 24) | ((_TYPE) << 16) | ((_SRD) << 8) | ((_SIZE) << 1) | (_ENABLE))
/*----------------------------------------------------------------------------
  SDRAM calculation values
 *----------------------------------------------------------------------------*/
// Timing for a 133 MHz SDRAM (-75), calculated in clocks
#define RAS_LATENCY NS_2_EMC_CLKS(20)
#define CAS_LATENCY 2
#define T_RP        NS_2_EMC_CLKS(20)
#define T_RAS       NS_2_EMC_CLKS(45)
#define T_SREX      NS_2_EMC_CLKS(72)
#define T_APR       NS_2_EMC_CLKS(20)
#define T_DAL       (CAS_LATENCY+2)
#define T_WR        2
#define T_RC        NS_2_EMC_CLKS(65)
#define T_RFC       NS_2_EMC_CLKS(75)
#define T_XSR       NS_2_EMC_CLKS(72)
#define T_RRD       NS_2_EMC_CLKS(15)
#define T_MRD       2
#define T_REFRESH   NS_2_EMC_CLKS(15625)
// Default Delays. Quelle: praktische Versuche
// mit dem Embedded Artist Board . EMC Clock <= 80MHz, Command Delay Strategie +0.
#define DEFAULT_CLKOUTDLY 0
#define DEFAULT_FBCLKDLY  20
#define DEFAULT_CMDDLY    16
/**
 * SDRAM Delay line calibration.
 * The estimated value gives a hint to correct the SDRAM timings.
 * @param  none
 * @return value 0..4095 corresponding to the speed of logic.
 *         Faster logik (less delay) gives larger output values.
 */
static unsigned int Calibrate (void)
{
   unsigned int i;
   unsigned int value = 0;
   unsigned int data;
   for (i = 16; i; i--) {        // get a mean value
      LPC_SC->EMCCAL = 0x4000;   // start calibration
      do {
         data = LPC_SC->EMCCAL;
      } while (!(data & 0x8000));// wait until DONE
      value += data & 0xFF;
   }
   return value;
}
/**
 * SDRAM Test.
 * This is used at startup time (before the first usage) to find the optimum
 * delay time values.
 * ATT: This test overwrites the previous SDRAM contents!
 * Please note that IT IS NOT POSSIBLE to preserve these contents because
 * it might be possible that garbage address commands are output and we will
 * not know which locations are addressed during this test.
 * DO NOT USE this test if executing code from SDRAM.
 * @param       offsetKonstante, um das Testpattern zu modifizieren
 * @return      1 if test OK, 0 if failure
 * */
static int sdram_check(uint32_t offset)
{
  volatile uint32_t *ram_ptr;
  uint32_t data;
  uint32_t i,j;
  uint32_t blocks;
#define SDRAMTEST_BLOCKSIZE 0x100
#define SDRAMTEST_JUMPSIZE  0x10000
  blocks = SDRAM_SIZE/(sizeof(uint32_t) * SDRAMTEST_JUMPSIZE);
  /* writing */
  ram_ptr = (uint32_t *)SDRAM_START;
  data = 0x10000 + offset;
  for (i = 0; i < blocks; i++)
  {
    for (j = 0; j < SDRAMTEST_BLOCKSIZE; j++)
    {
       *ram_ptr++ = data;
       data += j + (j << 4);
    }
    ram_ptr += SDRAMTEST_JUMPSIZE - SDRAMTEST_BLOCKSIZE;
    data += i + (i << 4);
  }
  /* Verifying */
  ram_ptr = (uint32_t *)SDRAM_START;
  data = 0x10000 + offset;
  for (i = 0; i < blocks; i++)
  {
    for (j = 0; j < SDRAMTEST_BLOCKSIZE; j++)
    {
       if (data != *ram_ptr++)
          return 0;
       data += j + (j << 4);
    }
    ram_ptr += SDRAMTEST_JUMPSIZE - SDRAMTEST_BLOCKSIZE;
    data += i + (i << 4);
  }
  return 1;
}
/**
 * Optimize the SDRAM delays.
 * The complete range of a delay (0..31) is set, then tested by a SDRAM test.
 * Output value is the middle of the functional range.
 * @param shift     Bitshift value to point to the delay position in EMCDLYCTL
 * @param def_value  Value to set if no positive range can be found.
 * @return           Optimum value which is set in EMCDLYCTL.
 */
static unsigned int Optimize_Delay( unsigned int shift, unsigned int def_value)
{
   int i;
   int start_delay = -1;
   int stop_delay = -1;
#define DELAY_RANGE 0x1F
   for (i = 0; i <= DELAY_RANGE; i++) {
      // set delay value
      LPC_SC->EMCDLYCTL = ( LPC_SC->EMCDLYCTL & ~(DELAY_RANGE << shift) ) | (i << shift) ;
      if (sdram_check(i+10+shift)) {
         if (start_delay < 0)
            start_delay = i;
         stop_delay = i;
      }
   }
   // Hier an dieser Stelle wissen wir, ob das SDRAM ueberhaupt vernuenftig geht.
   if ((stop_delay - start_delay) < 3) {
       // Tja, damit kann man kein Telefon fahren
       while (1)
           {};
   }
  
   i = (start_delay + stop_delay) / 2;
   if (i < 0)
      i = def_value;
   // set new delay value
   LPC_SC->EMCDLYCTL = ( LPC_SC->EMCDLYCTL & ~(DELAY_RANGE << shift) ) | (i << shift);
   return i;
}
  // configure the dynamic memory read strategy.
  // ------------------------------------------------------------------------------------------
  LPC_EMC->DynamicReadConfig = 0x00000001;  // Command delayed strategy + 0, using EMCCLKDELAY
                                            // (command delayed, clock out not delayed)
  /* Anmerkung:
   * mit Clockout Delayed Strategie (00) kann man die Data Hold Time (NXP min 0,2ns, SDRAM min 1,0ns)
   * nicht mehr sicher erreichen, also muss man zwangsweise Command Delayed verwenden.
   */
   // Default-Werte fuer Delaylines setzen
   LPC_SC->EMCDLYCTL = (DEFAULT_CLKOUTDLY << 24) | (DEFAULT_CLKOUTDLY << 16) | (DEFAULT_FBCLKDLY << 8) | DEFAULT_CMDDLY;
  // do the reference calibration
  perm.initial_calibration_value = Calibrate();
     // Delay-startwerte ausmessen, setzen und aufbewahren
     perm.original_cmdclkdelay = Optimize_Delay( 0, DEFAULT_CMDDLY);
     perm.original_fbclkdelay  = Optimize_Delay( 8, DEFAULT_FBCLKDLY);
     // wir sind im Bootloader
     MPU->CTRL = 0x00;             // MPU disablen
     // Flush into MPU
     __ISB();                      // instruction memory barrier
     // Die Regionen sind mit unterschiedlicher Prioritaet versehen. Region 0 ist
     // die unterste, d.h. man kann sie gut als Default verwenden. Und wenn man sie so
     // programmiert, dass alle Zugriffe verboten sind, so kann man damit ungueltige
     // Speicherzugriffe abfangen und die Software neu starten.
     // Region 0: Default ohne Zugriffsrechte, gesamter Adressbereich
     MPU->RBAR = _RBAR(0x00000000, 1, 0);
     MPU->RASR = _RASR(1, AP_NONE, MEM_TYPE_STRONGLY, 0, SIZE_4G, 1);
     // Region 1: internes Flash, internes SRAM, Boot ROM,
     //           Peripheral SRAM, AHB Peripherals
     // 0x00000000 - 0x3FFFFFFF
     // Subregions: 0 = Flash, 2 = SRAM, 3 = Boot ROM,
     //             4 = Peripheral SRAM + AHB Peripherals
     MPU->RBAR = _RBAR(0x00000000, 1, 1);
     MPU->RASR = _RASR(0, AP_RW, MEM_TYPE_IRAM, 0xE2, SIZE_1G, 1);
     // Region 2: Peripheral Devices
     // 0x40000000 - 0x400FFFFF
     // 0x42000000 - 0x43FFFFFF Bitband-Alias
     MPU->RBAR = _RBAR(0x40000000, 1, 2);
     MPU->RASR = _RASR(1, AP_RW, MEM_TYPE_DEVICE, 0, SIZE_64M, 1);
     // Region 3: ext. Chip Selects/Devices (XHFC)
     // 0x80000000 - 0x9000FFFF
     MPU->RBAR = _RBAR(0x80000000, 1, 3);
     MPU->RASR = _RASR(1, AP_RW, MEM_TYPE_DEVICE, 0, SIZE_64K, 1);
     // Region 4: SDRAM
     // 0xA0000000 - 0xA0FFFFFF
     MPU->RBAR = _RBAR(0xA0000000, 1, 4);
     MPU->RASR = _RASR(0, AP_RW, MEM_TYPE_ERAM, 0, SIZE_16M, 1);
     // Region 5: Cortex-M3 Private Peripheral Bus
     // 0xE0000000 - 0xE00FFFFF
     MPU->RBAR = _RBAR(0xE0000000, 1, 5);
     MPU->RASR = _RASR(1, AP_RW, MEM_TYPE_STRONGLY, 0, SIZE_1M, 1);
     // Region 6: not used (immer gut, eine frei zu haben)
     MPU->RBAR = _RBAR(0x00000000, 1, 6);
     MPU->RASR = _RASR(0, 0, 0, 0, 0, 0);
     // Region 7: Stack check
     MPU->RBAR = _RBAR(STACK_START, 1, 7);
     MPU->RASR = _RASR(1, AP_NONE, MEM_TYPE_IRAM, 0, SIZE_32B, 1);
     // MPU (wieder) einschalten
     MPU->CTRL = 0x01;       // MPU enabled
  }
  // Flush into MPU
  __DSB();                 // Data Sync
  __ISB();                // instruction memory barrier
/** Diese Routine sollte in regelmaessigen Abstaenden aufgerufen werden,
 * um das Timing bei Aenderungen der Umgebungsbedingungen abzustimmen.
 */
void adjust_timing (void)
{
   // wie schnell sind wir denn heute?
   unsigned int calibration_value = Calibrate();
   // neue Kalibrierwerte errechnen
   unsigned int fbclkdelay  = ((perm.original_fbclkdelay  * calibration_value) / perm.initial_calibration_value);
   unsigned int cmdclkdelay = ((perm.original_cmdclkdelay * calibration_value) / perm.initial_calibration_value);
   // Check the range, luke
   if (fbclkdelay > 0x1F)
      fbclkdelay = 0x1F;
   if (cmdclkdelay > 0x1F)
      cmdclkdelay = 0x1F;
   // Set new values
   LPC_SC->EMCDLYCTL = (DEFAULT_CLKOUTDLY << 24) | (DEFAULT_CLKOUTDLY << 16)
                     | (fbclkdelay << 8) | cmdclkdelay;
}