The assembly code is embedded into the function that calculates packet's checksum. The entire function's body is copied below. The target is MCF5307. The embedded __asm assembly statements that push and pop stack contents do not comile with CW for ColdFire compiler. I am looking for equivalent assembly code that is accepted by this compiler.
/*
* tfPacketChecksum() Function Description
* This function performs checksum on (IP, ICMP, IGMP, UDP and TCP)
* packets. Note that we cannot assume that data is contiguous. We follow
* the chain of ttPacket (using the pktNextPtr), and compute the checksum
* on each contiguous data area. We try and unroll the loop as much as we can.
* This version handles odd byte lengths in inner links, and odd linkDataPtr.
*
* Parameter Description
* packetPtr Pointer to a ttPacket. packetPtr->pktLinkDataPtr
* points to the data to be checksummed.
* length Length of data to be checksumed.
* Returns
* Value Meaning
* 16bit one's complement checksum
*/
#include <trtype.h>
tt16Bit tfPacketChecksum (ttPacketPtr packetPtr, ttPktLen totalDataLength)
{
tt16BitPtr link16BitDataPtr;
tt32Bit sum;
ttPktLen linkLength;
tt16Bit sum16;
unsigned short int bigLoop;
unsigned short int switchOffset;
union
{
tt16Bit oddWord;
tt8Bit oddChar[2];
} oddByte16Bit;
tt8Bit sumByteSwapped;
sum = 0UL;
linkLength = (ttPktLen)0;
sumByteSwapped = TM_8BIT_ZERO;
/* packetPtr is assumed to be non null */
do
{
/* Point to data in this link */
link16BitDataPtr = (tt16BitPtr)packetPtr->pktLinkDataPtr;
/* Loop through scattered data */
if (linkLength == (ttPktLen)0)
{
linkLength = packetPtr->pktLinkDataLength;
}
else
{
/* previous link had odd byte length, store 2nd byte to make a word */
oddByte16Bit.oddChar[1] = *((tt8BitPtr)link16BitDataPtr);
/* add to sum */
sum = sum + oddByte16Bit.oddWord;
/* Update pointer, and length. Pointer now is on odd byte boundary */
link16BitDataPtr = (tt16BitPtr)((tt8BitPtr)link16BitDataPtr + 1);
linkLength = packetPtr->pktLinkDataLength - 1;
totalDataLength--;
}
if (linkLength > totalDataLength)
{
/* Do not span more than asked */
linkLength = totalDataLength;
}
/* left over data to be checksumed after this link */
totalDataLength -= linkLength;
/* Move pointer to even byte boundary */
if ((tm_is_odd(link16BitDataPtr)) && (linkLength != 0))
{
/* save odd byte for later addition */
oddByte16Bit.oddChar[0] = *((tt8BitPtr)link16BitDataPtr);
/* Update pointer, and length. Pointer now is on even byte boundary */
link16BitDataPtr =
(tt16BitPtr)((tt8BitPtr)link16BitDataPtr + 1);
linkLength--;
/* Byte swap the sum, to collect odd bytes with odd bytes */
tm_byte_swap_sum(sum);
sumByteSwapped = TM_8BIT_YES;
}
/*
* How many times we are going to have to go through the big loop. Each time
* through the loop, we add in 64 more bytes.
*/
bigLoop = (unsigned short)((linkLength + 63) >> 6);
/* How far to jump into the switch statement */
switchOffset = (unsigned short)(linkLength % 64);
/*
* We do a shift right after this if. The shift is there because we operate
* on 16 bits at a time, so we divide the number of bytes by 2. However,
* a shift right means we'll lose a remainder of 1, so we have to deal with
* it here. We don't actually add the extra byte into the sum right now, but
* we still have to decrement the loop control variable
*/
if (switchOffset == 1)
{
bigLoop--;
}
/* Divide switchOffset by 2 */
switchOffset = switchOffset >> 1;
tm_fold_sum(sum);
/*
* Chances are, the linkLength isn't going to be evenly divisible by 64. Thus,
* we do calculations on the remainder first, and then use an optimized
* assembly loop to handle the majority of the packet. While it would in fact
* be possible, as well as marginally faster, to handle ALL of the
* checksumming in assembly, this part is left in C to make porting to your
* (the customer's) platform easier. If you are a confident assembly
* programmer, this switch can be incorporated into the rest of the loop
* (Duff's Device is one example of how it can be done).
*/
switch (switchOffset)
{
case 31: sum += *(link16BitDataPtr+30);
case 30: sum += *(link16BitDataPtr+29);
case 29: sum += *(link16BitDataPtr+28);
case 28: sum += *(link16BitDataPtr+27);
case 27: sum += *(link16BitDataPtr+26);
case 26: sum += *(link16BitDataPtr+25);
case 25: sum += *(link16BitDataPtr+24);
case 24: sum += *(link16BitDataPtr+23);
case 23: sum += *(link16BitDataPtr+22);
case 22: sum += *(link16BitDataPtr+21);
case 21: sum += *(link16BitDataPtr+20);
case 20: sum += *(link16BitDataPtr+19);
case 19: sum += *(link16BitDataPtr+18);
case 18: sum += *(link16BitDataPtr+17);
case 17: sum += *(link16BitDataPtr+16);
case 16: sum += *(link16BitDataPtr+15);
case 15: sum += *(link16BitDataPtr+14);
case 14: sum += *(link16BitDataPtr+13);
case 13: sum += *(link16BitDataPtr+12);
case 12: sum += *(link16BitDataPtr+11);
case 11: sum += *(link16BitDataPtr+10);
case 10: sum += *(link16BitDataPtr+9);
case 9: sum += *(link16BitDataPtr+8);
case 8: sum += *(link16BitDataPtr+7);
case 7: sum += *(link16BitDataPtr+6);
case 6: sum += *(link16BitDataPtr+5);
case 5: sum += *(link16BitDataPtr+4);
case 4: sum += *(link16BitDataPtr+3);
case 3: sum += *(link16BitDataPtr+2);
case 2: sum += *(link16BitDataPtr+1);
case 1: sum += *(link16BitDataPtr);
/* Fold the sum back into 16 bits */
tm_add_carries_sum(sum);
/* Increment the pointer */
link16BitDataPtr += switchOffset;
/* Decrement the loop control variable */
bigLoop--;
}
/* Put the sum into a 16 bit stack variable */
sum16 = (tt16Bit)(sum&0xffffUL);
/* We have to save all of the registers we are going to modify */
/* ES will contain a modified data segment */
__asm push es ;
/* DI is the offset of the data to sum */
__asm push di;
/* DX is the sum */
__asm push dx;
/*
* CX is first used for temporary storage, and then to hold the loop control
* variable
*/
__asm push cx;
/* Load the pointer into ES:di */
__asm les di, link16BitDataPtr;
/*
* We need to normalize the pointer, because of the segment/offset memory
* scheme x86 real-mode processors use. To do this, we shift offset right
* by 4, and add it to the segment. That total is the new segment, and
* the new offset is the lowest 4 bits of the old offset. Both the new and
* old addresses point to the same location in memory, but with the new,
* normalized pointer, we don't ever have to worry about a packet being
* long enough to necessitate incrementing the segment. This optimization
* saves us significant time in the loop itself.
*/
/* Save the offset portion of the pointer before we modify it */
__asm mov cx, di;
/* Shift the offset right by 4 */
__asm shr di, 4;
/* Copy the segment (es) into dx */
__asm push es;
__asm pop dx;
/* Add in the shifted offset */
__asm add dx, di;
/* Copy the updated segment back into es */
__asm push dx;
__asm pop es;
/* Load the old offset, and mask off everything except the last 4 bits */
__asm mov di, cx;
__asm and di, 000fh;
/* Zero out DX */
__asm xor dx, dx;
/* Clear the carry bit and load sum16 into DX */
__asm add dx, sum16;
/* Load the counter into cx */
__asm mov cx, bigLoop;
TL_CHECKSUM_LOOP_TOP:
/* or the counter with itself. If it's 0, jump out of the loop */
__asm or cx, cx;
__asm jne TL_CHECKSUM_LOOP_CONTINUE;
__asm jmp TL_CHECKSUM_LOOP_END;
/* decrement the counter by 1 */
TL_CHECKSUM_LOOP_CONTINUE:
__asm dec cx;
/* Add the next 64 bytes of the packet into the checksum */
__asm adc dx, es:word ptr [di + 62];
__asm adc dx, es:word ptr [di + 60];
__asm adc dx, es:word ptr [di + 58];
__asm adc dx, es:word ptr [di + 56];
__asm adc dx, es:word ptr [di + 54];
__asm adc dx, es:word ptr [di + 52];
__asm adc dx, es:word ptr [di + 50];
__asm adc dx, es:word ptr [di + 48];
__asm adc dx, es:word ptr [di + 46];
__asm adc dx, es:word ptr [di + 44];
__asm adc dx, es:word ptr [di + 42];
__asm adc dx, es:word ptr [di + 40];
__asm adc dx, es:word ptr [di + 38];
__asm adc dx, es:word ptr [di + 36];
__asm adc dx, es:word ptr [di + 34];
__asm adc dx, es:word ptr [di + 32];
__asm adc dx, es:word ptr [di + 30];
__asm adc dx, es:word ptr [di + 28];
__asm adc dx, es:word ptr [di + 26];
__asm adc dx, es:word ptr [di + 24];
__asm adc dx, es:word ptr [di + 22];
__asm adc dx, es:word ptr [di + 20];
__asm adc dx, es:word ptr [di + 18];
__asm adc dx, es:word ptr [di + 16];
__asm adc dx, es:word ptr [di + 14];
__asm adc dx, es:word ptr [di + 12];
__asm adc dx, es:word ptr [di + 10];
__asm adc dx, es:word ptr [di + 8];
__asm adc dx, es:word ptr [di + 6];
__asm adc dx, es:word ptr [di + 4];
__asm adc dx, es:word ptr [di + 2];
__asm adc dx, es:word ptr [di];
/*
* adc (add with carry) works by setting the carry bit if there's a carry, and
* then picking it up on the next adc instruction. We execute one final adc
* here to pick up the carry bit
*/
__asm adc dx, 0;
/* Add 64 to the offset */
__asm add di, 64;
/* Go back to the top of the loop */
__asm jmp TL_CHECKSUM_LOOP_TOP;
TL_CHECKSUM_LOOP_END:
/* Copy the sum out of dx and back into sum16 */
__asm mov sum16, dx;
/* Restore all the registers that we used */
__asm pop cx;
__asm pop di;
__asm pop dx;
__asm pop es;
/* Increment the pointer to the correct point */
link16BitDataPtr += (linkLength >> 1) - switchOffset;
/*
* We're done with the 16-bit variable. Move the sum back to the standard
* (32-bit) variable for the rest of the function
*/
sum = (tt32Bit)sum16;
/* We need to know if linkLength was odd */
linkLength &= 1;
if (sumByteSwapped != TM_8BIT_ZERO)
{
/*
* We want to add the saved even byte, so byte swap the sum to start
* collecting the even bytes.
*/
tm_byte_swap_sum(sum);
/* toggle byte swapping */
sumByteSwapped = TM_8BIT_ZERO;
if (linkLength != 0)
/* extra byte at the end. We can pair it with our saved even byte */
{
oddByte16Bit.oddChar[1] = *((tt8BitPtr)link16BitDataPtr);
/* add to sum */
sum = sum + oddByte16Bit.oddWord;
linkLength = 0;
}
else
{
/* remember that we have our even byte stored in oddChar[0] */
linkLength = 1;
}
}
else
{
if (linkLength != 0)
{
/* Odd byte length. Save the character to add it later on */
oddByte16Bit.oddChar[0] = *((tt8BitPtr)link16BitDataPtr);
}
}
/* check this before computing next packetPtr */
if (totalDataLength == (ttPktLen)0)
{
/* break out of the loop. Do not check 0 length after the loop */
goto outFold;
}
packetPtr = (ttPacketPtr)packetPtr->pktLinkNextPtr;
} while (packetPtr != TM_PACKET_NULL_PTR);
#ifdef TM_ERROR_CHECKING
/* double check that all data has been checksumed */
if (totalDataLength != (ttPktLen)0)
{
tfKernelError("tfPacketChecksum",
"Data in Packet chain smaller than length to be checksumed");
}
#endif
outFold:
if (linkLength != 0UL)
{
/* Add left-over byte */
oddByte16Bit.oddChar[1] = 0;
sum = sum + oddByte16Bit.oddWord;
}
/* Fold 32-bit sum to 16 bits, adding posssible carries from fold */
tm_add_carries_sum(sum);
return (tt16Bit)~sum;
}
THANK YOU!