here is MC68xx uP code for 4byte /4 byte, I have used this for years.
You can edit it for 2 byte dividend.
You could probably make it faster by using stack indexing...
*NAME:BIGDIV
*DESC: 32BIT DIVIDE BY 32BIT
* @DIVBUF bytes
* 0...4 = DIVISOR.
* 5...9 = DIVIDEND
* 10..14 = RESULT
* CHECKS FOR /0 , CY SET IF ERROR
CLRDIV: ;call first to clear ram area
LDHX #DIVBUF
CLRDIV10
CLR ,X
AIX #1
CPHX #(DIVBUF+14T)
BNE CLRDIV10
RTS
DIVERR
SEC ; SET CY IF CANNOT DIVIDE
CLR 14T,X
CLR 13T,X
RTS
;
BIGDIV:
LDHX #DIVBUF ; 5 BYTE DIV POINT TO MSD
BIGD10:
CLR 10T,X ; RESULT MSD
CLR 11T,X ;
CLR 12T,X
CLR 13T,X
CLR 14T,X
LDA 8,X ; CHECK IF ZERO
ORA 7,X
ORA 9,X
ORA 6,X
ORA 5,X
BEQ DIVERR
LDA 4,X ; CHK IF DIV < DIVIDEND
SUB 9,X
LDA 3,X ; CHK IF DIV < DIVIDEND
SBC 8,X ;
LDA 2,X ; CHK IF DIV < DIVIDEND
SBC 7,X
LDA 1,X ; CHK IF DIV < DIVIDEND
SBC 6,X
LDA 0,X
SBC 5,X
BCS DIVERR
CLR SRCTMP
INC SRCTMP
DIV3210 TST 5,X
BMI DIV3212
ASL 9,X ; SHIFT UP DIVSR TO LINE UP WITH DIVDN
ROL 8,X
ROL 7,X
ROL 6,X
ROL 5,X
INC SRCTMP
BRA DIV3210
DIVLNE LSR 5,X
ROR 6,X
ROR 7,X
ROR 8,X
ROR 9,X
DIV3212 LDA 4,X ; DIVDEND-DIVSR
SUB 9,X
STA 4,X
LDA 3,X
SBC 8,X
STA 3,X
LDA 2,X
SBC 7,X
STA 2,X
LDA 1,X
SBC 6,X
STA 1,X
LDA 0,X
SBC 5,X
STA 0,X
BCS DIV3220 ; DIDNT FIT
SEC ;ADD BIT TO RESULT
DIV3240 ROL 14T,X
ROL 13T,X
ROL 12T,X
ROL 11T,X
ROL 10T,X
DEC SRCTMP ; ALL DONE?
BNE DIVLNE ; DIVIDE NEXT BIT
CLC ; DONE OK
RTS
DIV3220
LDA 4,X ;RESTORE DIVEND-DIVSR
ADD 9,X
STA 4,X
LDA 3,X
ADC 8,X
STA 3,X
LDA 2,X
ADC 7,X
STA 2,X
LDA 1,X
ADC 6,X
STA 1,X
LDA 0,X
ADC 5,X
STA 0,X
CLC
BRA DIV3240
*******************************