| while ((LPC_UART->LSR & LSR_THRE) == 0); LPC_UART->THR = ch; | 
This is my implementation:
/* Transmit a byte array through the UART peripheral (non-blocking) */
int Chip_UART_Send(LPC_USART_T *pUART, const void *data, int numBytes)
{
int sent = 0;
uint8_t *p8 = (uint8_t *) data;
/* beforehand, wait till the buffer is empty*/
while ((Chip_UART_ReadLineStatus(pUART) & UART_LSR_THRE) == 0)
;/*send 16 bytes to bufferred Tx*/
while (sent < numBytes) {
Chip_UART_SendByte(pUART, *p8);
p8++;
sent++;
/*Wait till buffer is empty every 16 Bytes*/
if(!(sent % 16))
{
while ((Chip_UART_ReadLineStatus(pUART) & UART_LSR_THRE) == 0)
;
}
}
return sent;
}
| 
static char FifoContains = 0;
void Uart0WriteByte(const char ch)
{
    if (FifoContains < 16)
    {
        LPC_UART0->THR = ch;
        FifoContains++;
    }
    else
    {
        while ((LPC_UART0->LSR & LSR_THRE) == 0);
        LPC_UART0->THR = ch;
        FifoContains   = 1;
    }
}
 | 
| 
 void UART_Tx(char *BufferPtr, int Length)
 {
  unsigned char tx_data = 0;
   while ( Length != 0 )
   {
      if(!(tx_data % 16))
      {
        while ( !(LPC_UART->LSR & LSR_THRE) );
      }
       LPC_UART->THR = *BufferPtr;
      BufferPtr++;
      tx_data ++;
      Length--;
   }
   return;
 }
 | 
