got my code working, apparently the answer is that there is Kinetis specific setup required, but the standard Cortex setup for ITM, CoreDebug, and DWT are required.
void swo_init()
{
uint32_t SWOSpeed = 6000000; //6000kbps, default for JLinkSWOViewer
uint32_t SWOPrescaler = (F_CPU / SWOSpeed) - 1; // SWOSpeed in Hz, note that F_CPU is expected to be 96000000 in this case
CoreDebug->DEMCR = CoreDebug_DEMCR_TRCENA_Msk;
*((volatile unsigned *)(ITM_BASE + 0x400F0)) = 0x00000002; // "Selected PIN Protocol Register": Select which protocol to use for trace output (2: SWO)
*((volatile unsigned *)(ITM_BASE + 0x40010)) = SWOPrescaler; // "Async Clock Prescaler Register". Scale the baud rate of the asynchronous output
*((volatile unsigned *)(ITM_BASE + 0x00FB0)) = 0xC5ACCE55; // ITM Lock Access Register, C5ACCE55 enables more write access to Control Register 0xE00 :: 0xFFC
ITM->TCR = ITM_TCR_TraceBusID_Msk | ITM_TCR_SWOENA_Msk | ITM_TCR_SYNCENA_Msk | ITM_TCR_ITMENA_Msk; // ITM Trace Control Register
ITM->TPR = ITM_TPR_PRIVMASK_Msk; // ITM Trace Privilege Register
ITM->TER = 0x00000001; // ITM Trace Enable Register. Enabled tracing on stimulus ports. One bit per stimulus port.
*((volatile unsigned *)(ITM_BASE + 0x01000)) = 0x400003FE; // DWT_CTRL
*((volatile unsigned *)(ITM_BASE + 0x40304)) = 0x00000100; // Formatter and Flush Control Register
}
// may not be needed
void swo_deinit()
{
volatile uint32_t i = 0xFFF;
while (ITM->PORT[0].u32 == 0 && i--); // wait for any pending transmission
CoreDebug->DEMCR = 0;
*((volatile unsigned *)(ITM_BASE + 0x00FB0)) = 0xC5ACCE55; // ITM Lock Access Register, C5ACCE55 enables more write access to Control Register 0xE00 :: 0xFFC
ITM->TCR = 0;
ITM->TPR = 0;
ITM->TER = 0;
}
// function mostly copied from ITM_SendChar
void swo_sendchar(char x)
{
if (
(CoreDebug->DHCSR & CoreDebug_DHCSR_C_DEBUGEN_Msk) && // if debugger is attached (does it really work?)
(CoreDebug->DEMCR & CoreDebug_DEMCR_TRCENA_Msk) && // Trace enabled
(ITM->TCR & ITM_TCR_ITMENA_Msk) && // ITM enabled
(ITM->TER & (1UL << 0)) // ITM Port #0 enabled
)
{
volatile uint32_t i = 0xFFF; // implement a timeout
while (ITM->PORT[0].u32 == 0 && i--); // wait for next
if (ITM->PORT[0].u32 != 0 && i != 0) {
ITM->PORT[0].u8 = (uint8_t) x; // send
}
}
}