/* disasm.c where all the _work_ gets done in the Netwide Disassembler | |
* | |
* The Netwide Assembler is copyright (C) 1996 Simon Tatham and | |
* Julian Hall. All rights reserved. The software is | |
* redistributable under the licence given in the file "Licence" | |
* distributed in the NASM archive. | |
* | |
* initial version 27/iii/95 by Simon Tatham | |
*/ | |
#include <stdio.h> | |
#include <string.h> | |
#include "nasm.h" | |
#include "insns.h" | |
/* names.c included source file defining instruction and register | |
* names for the Netwide [Dis]Assembler | |
* | |
* The Netwide Assembler is copyright (C) 1996 Simon Tatham and | |
* Julian Hall. All rights reserved. The software is | |
* redistributable under the licence given in the file "Licence" | |
* distributed in the NASM archive. | |
*/ | |
static const char *conditions[] = { /* condition code names */ | |
"a", "ae", "b", "be", "c", "e", "g", "ge", "l", "le", "na", "nae", | |
"nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no", "np", | |
"ns", "nz", "o", "p", "pe", "po", "s", "z" | |
}; | |
/* Register names automatically generated from regs.dat */ | |
/* automatically generated from ./regs.dat - do not edit */ | |
static const char *reg_names[] = { | |
"ah", | |
"al", | |
"ax", | |
"bh", | |
"bl", | |
"bp", | |
"bx", | |
"ch", | |
"cl", | |
"cr0", | |
"cr1", | |
"cr2", | |
"cr3", | |
"cr4", | |
"cr5", | |
"cr6", | |
"cr7", | |
"cs", | |
"cx", | |
"dh", | |
"di", | |
"dl", | |
"dr0", | |
"dr1", | |
"dr2", | |
"dr3", | |
"dr4", | |
"dr5", | |
"dr6", | |
"dr7", | |
"ds", | |
"dx", | |
"eax", | |
"ebp", | |
"ebx", | |
"ecx", | |
"edi", | |
"edx", | |
"es", | |
"esi", | |
"esp", | |
"fs", | |
"gs", | |
"mm0", | |
"mm1", | |
"mm2", | |
"mm3", | |
"mm4", | |
"mm5", | |
"mm6", | |
"mm7", | |
"segr6", | |
"segr7", | |
"si", | |
"sp", | |
"ss", | |
"st0", | |
"st1", | |
"st2", | |
"st3", | |
"st4", | |
"st5", | |
"st6", | |
"st7", | |
"tr0", | |
"tr1", | |
"tr2", | |
"tr3", | |
"tr4", | |
"tr5", | |
"tr6", | |
"tr7", | |
"xmm0", | |
"xmm1", | |
"xmm2", | |
"xmm3", | |
"xmm4", | |
"xmm5", | |
"xmm6", | |
"xmm7" | |
}; | |
/* Instruction names automatically generated from insns.dat */ | |
/* This file is auto-generated from insns.dat by insns.pl - don't edit it */ | |
/* This file in included by names.c */ | |
static const char *insn_names[] = { | |
"aaa", | |
"aad", | |
"aam", | |
"aas", | |
"adc", | |
"add", | |
"addpd", | |
"addps", | |
"addsd", | |
"addss", | |
"addsubpd", | |
"addsubps", | |
"and", | |
"andnpd", | |
"andnps", | |
"andpd", | |
"andps", | |
"arpl", | |
"bound", | |
"bsf", | |
"bsr", | |
"bswap", | |
"bt", | |
"btc", | |
"btr", | |
"bts", | |
"call", | |
"cbw", | |
"cdq", | |
"clc", | |
"cld", | |
"clflush", | |
"cli", | |
"clts", | |
"cmc", | |
"cmp", | |
"cmpeqpd", | |
"cmpeqps", | |
"cmpeqsd", | |
"cmpeqss", | |
"cmplepd", | |
"cmpleps", | |
"cmplesd", | |
"cmpless", | |
"cmpltpd", | |
"cmpltps", | |
"cmpltsd", | |
"cmpltss", | |
"cmpneqpd", | |
"cmpneqps", | |
"cmpneqsd", | |
"cmpneqss", | |
"cmpnlepd", | |
"cmpnleps", | |
"cmpnlesd", | |
"cmpnless", | |
"cmpnltpd", | |
"cmpnltps", | |
"cmpnltsd", | |
"cmpnltss", | |
"cmpordpd", | |
"cmpordps", | |
"cmpordsd", | |
"cmpordss", | |
"cmppd", | |
"cmpps", | |
"cmpsb", | |
"cmpsd", | |
"cmpss", | |
"cmpsw", | |
"cmpunordpd", | |
"cmpunordps", | |
"cmpunordsd", | |
"cmpunordss", | |
"cmpxchg", | |
"cmpxchg486", | |
"cmpxchg8b", | |
"comisd", | |
"comiss", | |
"cpuid", | |
"cvtdq2pd", | |
"cvtdq2ps", | |
"cvtpd2dq", | |
"cvtpd2pi", | |
"cvtpd2ps", | |
"cvtpi2pd", | |
"cvtpi2ps", | |
"cvtps2dq", | |
"cvtps2pd", | |
"cvtps2pi", | |
"cvtsd2si", | |
"cvtsd2ss", | |
"cvtsi2sd", | |
"cvtsi2ss", | |
"cvtss2sd", | |
"cvtss2si", | |
"cvttpd2dq", | |
"cvttpd2pi", | |
"cvttps2dq", | |
"cvttps2pi", | |
"cvttsd2si", | |
"cvttss2si", | |
"cwd", | |
"cwde", | |
"daa", | |
"das", | |
"db", | |
"dd", | |
"dec", | |
"div", | |
"divpd", | |
"divps", | |
"divsd", | |
"divss", | |
"dq", | |
"dt", | |
"dw", | |
"emms", | |
"enter", | |
"equ", | |
"f2xm1", | |
"fabs", | |
"fadd", | |
"faddp", | |
"fbld", | |
"fbstp", | |
"fchs", | |
"fclex", | |
"fcmovb", | |
"fcmovbe", | |
"fcmove", | |
"fcmovnb", | |
"fcmovnbe", | |
"fcmovne", | |
"fcmovnu", | |
"fcmovu", | |
"fcom", | |
"fcomi", | |
"fcomip", | |
"fcomp", | |
"fcompp", | |
"fcos", | |
"fdecstp", | |
"fdisi", | |
"fdiv", | |
"fdivp", | |
"fdivr", | |
"fdivrp", | |
"femms", | |
"feni", | |
"ffree", | |
"ffreep", | |
"fiadd", | |
"ficom", | |
"ficomp", | |
"fidiv", | |
"fidivr", | |
"fild", | |
"fimul", | |
"fincstp", | |
"finit", | |
"fist", | |
"fistp", | |
"fisttp", | |
"fisub", | |
"fisubr", | |
"fld", | |
"fld1", | |
"fldcw", | |
"fldenv", | |
"fldl2e", | |
"fldl2t", | |
"fldlg2", | |
"fldln2", | |
"fldpi", | |
"fldz", | |
"fmul", | |
"fmulp", | |
"fnclex", | |
"fndisi", | |
"fneni", | |
"fninit", | |
"fnop", | |
"fnsave", | |
"fnstcw", | |
"fnstenv", | |
"fnstsw", | |
"fpatan", | |
"fprem", | |
"fprem1", | |
"fptan", | |
"frndint", | |
"frstor", | |
"fsave", | |
"fscale", | |
"fsetpm", | |
"fsin", | |
"fsincos", | |
"fsqrt", | |
"fst", | |
"fstcw", | |
"fstenv", | |
"fstp", | |
"fstsw", | |
"fsub", | |
"fsubp", | |
"fsubr", | |
"fsubrp", | |
"ftst", | |
"fucom", | |
"fucomi", | |
"fucomip", | |
"fucomp", | |
"fucompp", | |
"fwait", | |
"fxam", | |
"fxch", | |
"fxrstor", | |
"fxsave", | |
"fxtract", | |
"fyl2x", | |
"fyl2xp1", | |
"haddpd", | |
"haddps", | |
"hlt", | |
"hsubpd", | |
"hsubps", | |
"ibts", | |
"icebp", | |
"idiv", | |
"imul", | |
"in", | |
"inc", | |
"incbin", | |
"insb", | |
"insd", | |
"insw", | |
"int", | |
"int01", | |
"int03", | |
"int1", | |
"int3", | |
"into", | |
"invd", | |
"invlpg", | |
"iret", | |
"iretd", | |
"iretw", | |
"jcxz", | |
"jecxz", | |
"jmp", | |
"jmpe", | |
"lahf", | |
"lar", | |
"lddqu", | |
"ldmxcsr", | |
"lds", | |
"lea", | |
"leave", | |
"les", | |
"lfence", | |
"lfs", | |
"lgdt", | |
"lgs", | |
"lidt", | |
"lldt", | |
"lmsw", | |
"loadall", | |
"loadall286", | |
"lodsb", | |
"lodsd", | |
"lodsw", | |
"loop", | |
"loope", | |
"loopne", | |
"loopnz", | |
"loopz", | |
"lsl", | |
"lss", | |
"ltr", | |
"maskmovdqu", | |
"maskmovq", | |
"maxpd", | |
"maxps", | |
"maxsd", | |
"maxss", | |
"mfence", | |
"minpd", | |
"minps", | |
"minsd", | |
"minss", | |
"monitor", | |
"mov", | |
"movapd", | |
"movaps", | |
"movd", | |
"movddup", | |
"movdq2q", | |
"movdqa", | |
"movdqu", | |
"movhlps", | |
"movhpd", | |
"movhps", | |
"movlhps", | |
"movlpd", | |
"movlps", | |
"movmskpd", | |
"movmskps", | |
"movntdq", | |
"movnti", | |
"movntpd", | |
"movntps", | |
"movntq", | |
"movq", | |
"movq2dq", | |
"movsb", | |
"movsd", | |
"movshdup", | |
"movsldup", | |
"movss", | |
"movsw", | |
"movsx", | |
"movupd", | |
"movups", | |
"movzx", | |
"mul", | |
"mulpd", | |
"mulps", | |
"mulsd", | |
"mulss", | |
"mwait", | |
"neg", | |
"nop", | |
"not", | |
"or", | |
"orpd", | |
"orps", | |
"out", | |
"outsb", | |
"outsd", | |
"outsw", | |
"packssdw", | |
"packsswb", | |
"packuswb", | |
"paddb", | |
"paddd", | |
"paddq", | |
"paddsb", | |
"paddsiw", | |
"paddsw", | |
"paddusb", | |
"paddusw", | |
"paddw", | |
"pand", | |
"pandn", | |
"pause", | |
"paveb", | |
"pavgb", | |
"pavgusb", | |
"pavgw", | |
"pcmpeqb", | |
"pcmpeqd", | |
"pcmpeqw", | |
"pcmpgtb", | |
"pcmpgtd", | |
"pcmpgtw", | |
"pdistib", | |
"pextrw", | |
"pf2id", | |
"pf2iw", | |
"pfacc", | |
"pfadd", | |
"pfcmpeq", | |
"pfcmpge", | |
"pfcmpgt", | |
"pfmax", | |
"pfmin", | |
"pfmul", | |
"pfnacc", | |
"pfpnacc", | |
"pfrcp", | |
"pfrcpit1", | |
"pfrcpit2", | |
"pfrsqit1", | |
"pfrsqrt", | |
"pfsub", | |
"pfsubr", | |
"pi2fd", | |
"pi2fw", | |
"pinsrw", | |
"pmachriw", | |
"pmaddwd", | |
"pmagw", | |
"pmaxsw", | |
"pmaxub", | |
"pminsw", | |
"pminub", | |
"pmovmskb", | |
"pmulhriw", | |
"pmulhrwa", | |
"pmulhrwc", | |
"pmulhuw", | |
"pmulhw", | |
"pmullw", | |
"pmuludq", | |
"pmvgezb", | |
"pmvlzb", | |
"pmvnzb", | |
"pmvzb", | |
"pop", | |
"popa", | |
"popad", | |
"popaw", | |
"popf", | |
"popfd", | |
"popfw", | |
"por", | |
"prefetch", | |
"prefetchnta", | |
"prefetcht0", | |
"prefetcht1", | |
"prefetcht2", | |
"prefetchw", | |
"psadbw", | |
"pshufd", | |
"pshufhw", | |
"pshuflw", | |
"pshufw", | |
"pslld", | |
"pslldq", | |
"psllq", | |
"psllw", | |
"psrad", | |
"psraw", | |
"psrld", | |
"psrldq", | |
"psrlq", | |
"psrlw", | |
"psubb", | |
"psubd", | |
"psubq", | |
"psubsb", | |
"psubsiw", | |
"psubsw", | |
"psubusb", | |
"psubusw", | |
"psubw", | |
"pswapd", | |
"punpckhbw", | |
"punpckhdq", | |
"punpckhqdq", | |
"punpckhwd", | |
"punpcklbw", | |
"punpckldq", | |
"punpcklqdq", | |
"punpcklwd", | |
"push", | |
"pusha", | |
"pushad", | |
"pushaw", | |
"pushf", | |
"pushfd", | |
"pushfw", | |
"pxor", | |
"rcl", | |
"rcpps", | |
"rcpss", | |
"rcr", | |
"rdmsr", | |
"rdpmc", | |
"rdshr", | |
"rdtsc", | |
"resb", | |
"resd", | |
"resq", | |
"rest", | |
"resw", | |
"ret", | |
"retf", | |
"retn", | |
"rol", | |
"ror", | |
"rsdc", | |
"rsldt", | |
"rsm", | |
"rsqrtps", | |
"rsqrtss", | |
"rsts", | |
"sahf", | |
"sal", | |
"salc", | |
"sar", | |
"sbb", | |
"scasb", | |
"scasd", | |
"scasw", | |
"sfence", | |
"sgdt", | |
"shl", | |
"shld", | |
"shr", | |
"shrd", | |
"shufpd", | |
"shufps", | |
"sidt", | |
"sldt", | |
"smi", | |
"smint", | |
"smintold", | |
"smsw", | |
"sqrtpd", | |
"sqrtps", | |
"sqrtsd", | |
"sqrtss", | |
"stc", | |
"std", | |
"sti", | |
"stmxcsr", | |
"stosb", | |
"stosd", | |
"stosw", | |
"str", | |
"sub", | |
"subpd", | |
"subps", | |
"subsd", | |
"subss", | |
"svdc", | |
"svldt", | |
"svts", | |
"syscall", | |
"sysenter", | |
"sysexit", | |
"sysret", | |
"test", | |
"ucomisd", | |
"ucomiss", | |
"ud0", | |
"ud1", | |
"ud2", | |
"umov", | |
"unpckhpd", | |
"unpckhps", | |
"unpcklpd", | |
"unpcklps", | |
"verr", | |
"verw", | |
"wait", | |
"wbinvd", | |
"wrmsr", | |
"wrshr", | |
"xadd", | |
"xbts", | |
"xchg", | |
"xlat", | |
"xlatb", | |
"xor", | |
"xorpd", | |
"xorps", | |
"xstore" | |
}; | |
/* Conditional instructions */ | |
static const char *icn[] = { | |
"cmov", | |
"j", | |
"set" | |
}; | |
/* and the corresponding opcodes */ | |
static int ico[] = { | |
I_CMOVcc, | |
I_Jcc, | |
I_SETcc | |
}; | |
#define INSN_MAX 32 /* one instruction can't be longer than this */ | |
long disasm (unsigned char *data, char *output, int segsize, long offset); | |
extern struct itemplate **itable[]; | |
/* | |
* Flags that go into the `segment' field of `insn' structures | |
* during disassembly. | |
*/ | |
#define SEG_RELATIVE 1 | |
#define SEG_32BIT 2 | |
#define SEG_RMREG 4 | |
#define SEG_DISP8 8 | |
#define SEG_DISP16 16 | |
#define SEG_DISP32 32 | |
#define SEG_NODISP 64 | |
#define SEG_SIGNED 128 | |
static int whichreg(long regflags, int regval) | |
{ | |
/* automatically generated from ./regs.dat - do not edit */ | |
static const int creg [] = {R_CR0,R_CR1,R_CR2,R_CR3,R_CR4,R_CR5,R_CR6,R_CR7}; | |
static const int dreg [] = {R_DR0,R_DR1,R_DR2,R_DR3,R_DR4,R_DR5,R_DR6,R_DR7}; | |
static const int fpureg [] = {R_ST0,R_ST1,R_ST2,R_ST3,R_ST4,R_ST5,R_ST6,R_ST7}; | |
static const int mmxreg [] = {R_MM0,R_MM1,R_MM2,R_MM3,R_MM4,R_MM5,R_MM6,R_MM7}; | |
static const int reg16 [] = {R_AX,R_CX,R_DX,R_BX,R_SP,R_BP,R_SI,R_DI}; | |
static const int reg32 [] = {R_EAX,R_ECX,R_EDX,R_EBX,R_ESP,R_EBP,R_ESI,R_EDI}; | |
static const int reg8 [] = {R_AL,R_CL,R_DL,R_BL,R_AH,R_CH,R_DH,R_BH}; | |
static const int sreg [] = {R_ES,R_CS,R_SS,R_DS,R_FS,R_GS,R_SEGR6,R_SEGR7}; | |
static const int treg [] = {R_TR0,R_TR1,R_TR2,R_TR3,R_TR4,R_TR5,R_TR6,R_TR7}; | |
static const int xmmreg [] = {R_XMM0,R_XMM1,R_XMM2,R_XMM3,R_XMM4,R_XMM5,R_XMM6,R_XMM7}; | |
if (!(REG_AL & ~regflags)) | |
return R_AL; | |
if (!(REG_AX & ~regflags)) | |
return R_AX; | |
if (!(REG_EAX & ~regflags)) | |
return R_EAX; | |
if (!(REG_DL & ~regflags)) | |
return R_DL; | |
if (!(REG_DX & ~regflags)) | |
return R_DX; | |
if (!(REG_EDX & ~regflags)) | |
return R_EDX; | |
if (!(REG_CL & ~regflags)) | |
return R_CL; | |
if (!(REG_CX & ~regflags)) | |
return R_CX; | |
if (!(REG_ECX & ~regflags)) | |
return R_ECX; | |
if (!(FPU0 & ~regflags)) | |
return R_ST0; | |
if (!(REG_CS & ~regflags)) | |
return (regval == 1) ? R_CS : 0; | |
if (!(REG_DESS & ~regflags)) | |
return (regval == 0 || regval == 2 || regval == 3 ? sreg[regval] : 0); | |
if (!(REG_FSGS & ~regflags)) | |
return (regval == 4 || regval == 5 ? sreg[regval] : 0); | |
if (!(REG_SEG67 & ~regflags)) | |
return (regval == 6 || regval == 7 ? sreg[regval] : 0); | |
/* All the entries below look up regval in an 8-entry array */ | |
if (regval < 0 || regval > 7) | |
return 0; | |
if (!((REGMEM|BITS8) & ~regflags)) | |
return reg8[regval]; | |
if (!((REGMEM|BITS16) & ~regflags)) | |
return reg16[regval]; | |
if (!((REGMEM|BITS32) & ~regflags)) | |
return reg32[regval]; | |
if (!(REG_SREG & ~regflags)) | |
return sreg[regval]; | |
if (!(REG_CREG & ~regflags)) | |
return creg[regval]; | |
if (!(REG_DREG & ~regflags)) | |
return dreg[regval]; | |
if (!(REG_TREG & ~regflags)) | |
return treg[regval]; | |
if (!(FPUREG & ~regflags)) | |
return fpureg[regval]; | |
if (!(MMXREG & ~regflags)) | |
return mmxreg[regval]; | |
if (!(XMMREG & ~regflags)) | |
return xmmreg[regval]; | |
return 0; | |
} | |
static const char *whichcond(int condval) | |
{ | |
static int conds[] = { | |
C_O, C_NO, C_C, C_NC, C_Z, C_NZ, C_NA, C_A, | |
C_S, C_NS, C_PE, C_PO, C_L, C_NL, C_NG, C_G | |
}; | |
return conditions[conds[condval]]; | |
} | |
/* | |
* Process an effective address (ModRM) specification. | |
*/ | |
static unsigned char *do_ea (unsigned char *data, int modrm, int asize, | |
int segsize, operand *op) | |
{ | |
int mod, rm, scale, index, base; | |
mod = (modrm >> 6) & 03; | |
rm = modrm & 07; | |
if (mod == 3) { /* pure register version */ | |
op->basereg = rm; | |
op->segment |= SEG_RMREG; | |
return data; | |
} | |
op->addr_size = 0; | |
if (asize == 16) { | |
/* | |
* <mod> specifies the displacement size (none, byte or | |
* word), and <rm> specifies the register combination. | |
* Exception: mod=0,rm=6 does not specify [BP] as one might | |
* expect, but instead specifies [disp16]. | |
*/ | |
op->indexreg = op->basereg = -1; | |
op->scale = 1; /* always, in 16 bits */ | |
switch (rm) { | |
case 0: op->basereg = R_BX; op->indexreg = R_SI; break; | |
case 1: op->basereg = R_BX; op->indexreg = R_DI; break; | |
case 2: op->basereg = R_BP; op->indexreg = R_SI; break; | |
case 3: op->basereg = R_BP; op->indexreg = R_DI; break; | |
case 4: op->basereg = R_SI; break; | |
case 5: op->basereg = R_DI; break; | |
case 6: op->basereg = R_BP; break; | |
case 7: op->basereg = R_BX; break; | |
} | |
if (rm == 6 && mod == 0) { /* special case */ | |
op->basereg = -1; | |
if (segsize != 16) | |
op->addr_size = 16; | |
mod = 2; /* fake disp16 */ | |
} | |
switch (mod) { | |
case 0: | |
op->segment |= SEG_NODISP; | |
break; | |
case 1: | |
op->segment |= SEG_DISP8; | |
op->offset = (signed char) *data++; | |
break; | |
case 2: | |
op->segment |= SEG_DISP16; | |
op->offset = *data++; | |
op->offset |= ((unsigned) *data++) << 8; | |
break; | |
} | |
return data; | |
} else { | |
/* | |
* Once again, <mod> specifies displacement size (this time | |
* none, byte or *dword*), while <rm> specifies the base | |
* register. Again, [EBP] is missing, replaced by a pure | |
* disp32 (this time that's mod=0,rm=*5*). However, rm=4 | |
* indicates not a single base register, but instead the | |
* presence of a SIB byte... | |
*/ | |
op->indexreg = -1; | |
switch (rm) { | |
case 0: op->basereg = R_EAX; break; | |
case 1: op->basereg = R_ECX; break; | |
case 2: op->basereg = R_EDX; break; | |
case 3: op->basereg = R_EBX; break; | |
case 5: op->basereg = R_EBP; break; | |
case 6: op->basereg = R_ESI; break; | |
case 7: op->basereg = R_EDI; break; | |
} | |
if (rm == 5 && mod == 0) { | |
op->basereg = -1; | |
if (segsize != 32) | |
op->addr_size = 32; | |
mod = 2; /* fake disp32 */ | |
} | |
if (rm == 4) { /* process SIB */ | |
scale = (*data >> 6) & 03; | |
index = (*data >> 3) & 07; | |
base = *data & 07; | |
data++; | |
op->scale = 1 << scale; | |
switch (index) { | |
case 0: op->indexreg = R_EAX; break; | |
case 1: op->indexreg = R_ECX; break; | |
case 2: op->indexreg = R_EDX; break; | |
case 3: op->indexreg = R_EBX; break; | |
case 4: op->indexreg = -1; break; | |
case 5: op->indexreg = R_EBP; break; | |
case 6: op->indexreg = R_ESI; break; | |
case 7: op->indexreg = R_EDI; break; | |
} | |
switch (base) { | |
case 0: op->basereg = R_EAX; break; | |
case 1: op->basereg = R_ECX; break; | |
case 2: op->basereg = R_EDX; break; | |
case 3: op->basereg = R_EBX; break; | |
case 4: op->basereg = R_ESP; break; | |
case 6: op->basereg = R_ESI; break; | |
case 7: op->basereg = R_EDI; break; | |
case 5: | |
if (mod == 0) { | |
mod = 2; | |
op->basereg = -1; | |
} else | |
op->basereg = R_EBP; | |
break; | |
} | |
} | |
switch (mod) { | |
case 0: | |
op->segment |= SEG_NODISP; | |
break; | |
case 1: | |
op->segment |= SEG_DISP8; | |
op->offset = (signed char) *data++; | |
break; | |
case 2: | |
op->segment |= SEG_DISP32; | |
op->offset = *data++; | |
op->offset |= ((unsigned) *data++) << 8; | |
op->offset |= ((long) *data++) << 16; | |
op->offset |= ((long) *data++) << 24; | |
break; | |
} | |
return data; | |
} | |
} | |
/* | |
* Determine whether the instruction template in t corresponds to the data | |
* stream in data. Return the number of bytes matched if so. | |
*/ | |
static int matches (struct itemplate *t, unsigned char *data, int asize, | |
int osize, int segsize, int rep, insn *ins) | |
{ | |
unsigned char * r = (unsigned char *)(t->code); | |
unsigned char * origdata = data; | |
int a_used = FALSE, o_used = FALSE; | |
int drep = 0; | |
if ( rep == 0xF2 ) | |
drep = P_REPNE; | |
else if ( rep == 0xF3 ) | |
drep = P_REP; | |
while (*r) | |
{ | |
int c = *r++; | |
if (c >= 01 && c <= 03) { | |
while (c--) | |
if (*r++ != *data++) | |
return FALSE; | |
} | |
if (c == 04) { | |
switch (*data++) { | |
case 0x07: ins->oprs[0].basereg = 0; break; | |
case 0x17: ins->oprs[0].basereg = 2; break; | |
case 0x1F: ins->oprs[0].basereg = 3; break; | |
default: return FALSE; | |
} | |
} | |
if (c == 05) { | |
switch (*data++) { | |
case 0xA1: ins->oprs[0].basereg = 4; break; | |
case 0xA9: ins->oprs[0].basereg = 5; break; | |
default: return FALSE; | |
} | |
} | |
if (c == 06) { | |
switch (*data++) { | |
case 0x06: ins->oprs[0].basereg = 0; break; | |
case 0x0E: ins->oprs[0].basereg = 1; break; | |
case 0x16: ins->oprs[0].basereg = 2; break; | |
case 0x1E: ins->oprs[0].basereg = 3; break; | |
default: return FALSE; | |
} | |
} | |
if (c == 07) { | |
switch (*data++) { | |
case 0xA0: ins->oprs[0].basereg = 4; break; | |
case 0xA8: ins->oprs[0].basereg = 5; break; | |
default: return FALSE; | |
} | |
} | |
if (c >= 010 && c <= 012) { | |
int t = *r++, d = *data++; | |
if (d < t || d > t+7) | |
return FALSE; | |
else { | |
ins->oprs[c-010].basereg = d-t; | |
ins->oprs[c-010].segment |= SEG_RMREG; | |
} | |
} | |
if (c == 017) | |
if (*data++) | |
return FALSE; | |
if (c >= 014 && c <= 016) { | |
ins->oprs[c-014].offset = (signed char) *data++; | |
ins->oprs[c-014].segment |= SEG_SIGNED; | |
} | |
if (c >= 020 && c <= 022) | |
ins->oprs[c-020].offset = *data++; | |
if (c >= 024 && c <= 026) | |
ins->oprs[c-024].offset = *data++; | |
if (c >= 030 && c <= 032) { | |
ins->oprs[c-030].offset = *data++; | |
ins->oprs[c-030].offset |= (((unsigned) *data++) << 8); | |
} | |
if (c >= 034 && c <= 036) { | |
ins->oprs[c-034].offset = *data++; | |
ins->oprs[c-034].offset |= (((unsigned) *data++) << 8); | |
if (osize == 32) { | |
ins->oprs[c-034].offset |= (((long) *data++) << 16); | |
ins->oprs[c-034].offset |= (((long) *data++) << 24); | |
} | |
if (segsize != asize) | |
ins->oprs[c-034].addr_size = asize; | |
} | |
if (c >= 040 && c <= 042) { | |
ins->oprs[c-040].offset = *data++; | |
ins->oprs[c-040].offset |= (((unsigned) *data++) << 8); | |
ins->oprs[c-040].offset |= (((long) *data++) << 16); | |
ins->oprs[c-040].offset |= (((long) *data++) << 24); | |
} | |
if (c >= 044 && c <= 046) { | |
ins->oprs[c-044].offset = *data++; | |
ins->oprs[c-044].offset |= (((unsigned) *data++) << 8); | |
if (asize == 32) { | |
ins->oprs[c-044].offset |= (((long) *data++) << 16); | |
ins->oprs[c-044].offset |= (((long) *data++) << 24); | |
} | |
if (segsize != asize) | |
ins->oprs[c-044].addr_size = asize; | |
} | |
if (c >= 050 && c <= 052) { | |
ins->oprs[c-050].offset = (signed char) *data++; | |
ins->oprs[c-050].segment |= SEG_RELATIVE; | |
} | |
if (c >= 060 && c <= 062) { | |
ins->oprs[c-060].offset = *data++; | |
ins->oprs[c-060].offset |= (((unsigned) *data++) << 8); | |
ins->oprs[c-060].segment |= SEG_RELATIVE; | |
ins->oprs[c-060].segment &= ~SEG_32BIT; | |
} | |
if (c >= 064 && c <= 066) { | |
ins->oprs[c-064].offset = *data++; | |
ins->oprs[c-064].offset |= (((unsigned) *data++) << 8); | |
if (osize == 32) { | |
ins->oprs[c-064].offset |= (((long) *data++) << 16); | |
ins->oprs[c-064].offset |= (((long) *data++) << 24); | |
ins->oprs[c-064].segment |= SEG_32BIT; | |
} else | |
ins->oprs[c-064].segment &= ~SEG_32BIT; | |
ins->oprs[c-064].segment |= SEG_RELATIVE; | |
if (segsize != osize) { | |
ins->oprs[c-064].type = | |
(ins->oprs[c-064].type & NON_SIZE) | |
| ((osize == 16) ? BITS16 : BITS32); | |
} | |
} | |
if (c >= 070 && c <= 072) { | |
ins->oprs[c-070].offset = *data++; | |
ins->oprs[c-070].offset |= (((unsigned) *data++) << 8); | |
ins->oprs[c-070].offset |= (((long) *data++) << 16); | |
ins->oprs[c-070].offset |= (((long) *data++) << 24); | |
ins->oprs[c-070].segment |= SEG_32BIT | SEG_RELATIVE; | |
} | |
if (c >= 0100 && c < 0130) { | |
int modrm = *data++; | |
ins->oprs[c & 07].basereg = (modrm >> 3) & 07; | |
ins->oprs[c & 07].segment |= SEG_RMREG; | |
data = do_ea (data, modrm, asize, segsize, | |
&ins->oprs[(c >> 3) & 07]); | |
} | |
if (c >= 0130 && c <= 0132) { | |
ins->oprs[c-0130].offset = *data++; | |
ins->oprs[c-0130].offset |= (((unsigned) *data++) << 8); | |
} | |
if (c >= 0140 && c <= 0142) { | |
ins->oprs[c-0140].offset = *data++; | |
ins->oprs[c-0140].offset |= (((unsigned) *data++) << 8); | |
ins->oprs[c-0140].offset |= (((long) *data++) << 16); | |
ins->oprs[c-0140].offset |= (((long) *data++) << 24); | |
} | |
if (c >= 0200 && c <= 0277) { | |
int modrm = *data++; | |
if (((modrm >> 3) & 07) != (c & 07)) | |
return FALSE; /* spare field doesn't match up */ | |
data = do_ea (data, modrm, asize, segsize, | |
&ins->oprs[(c >> 3) & 07]); | |
} | |
if (c >= 0300 && c <= 0302) { | |
if (asize) | |
ins->oprs[c-0300].segment |= SEG_32BIT; | |
else | |
ins->oprs[c-0300].segment &= ~SEG_32BIT; | |
a_used = TRUE; | |
} | |
if (c == 0310) { | |
if (asize == 32) | |
return FALSE; | |
else | |
a_used = TRUE; | |
} | |
if (c == 0311) { | |
if (asize == 16) | |
return FALSE; | |
else | |
a_used = TRUE; | |
} | |
if (c == 0312) { | |
if (asize != segsize) | |
return FALSE; | |
else | |
a_used = TRUE; | |
} | |
if (c == 0320) { | |
if (osize == 32) | |
return FALSE; | |
else | |
o_used = TRUE; | |
} | |
if (c == 0321) { | |
if (osize == 16) | |
return FALSE; | |
else | |
o_used = TRUE; | |
} | |
if (c == 0322) { | |
if (osize != segsize) | |
return FALSE; | |
else | |
o_used = TRUE; | |
} | |
if (c == 0330) { | |
int t = *r++, d = *data++; | |
if (d < t || d > t+15) | |
return FALSE; | |
else | |
ins->condition = d - t; | |
} | |
if (c == 0331) { | |
if ( rep ) | |
return FALSE; | |
} | |
if (c == 0332) { | |
if (drep == P_REP) | |
drep = P_REPE; | |
} | |
if (c == 0333) { | |
if ( rep != 0xF3 ) | |
return FALSE; | |
drep = 0; | |
} | |
} | |
/* | |
* Check for unused rep or a/o prefixes. | |
*/ | |
ins->nprefix = 0; | |
if (drep) | |
ins->prefixes[ins->nprefix++] = drep; | |
if (!a_used && asize != segsize) | |
ins->prefixes[ins->nprefix++] = (asize == 16 ? P_A16 : P_A32); | |
if (!o_used && osize != segsize) | |
ins->prefixes[ins->nprefix++] = (osize == 16 ? P_O16 : P_O32); | |
return data - origdata; | |
} | |
long disasm (unsigned char *data, char *output, int segsize, long offset) | |
{ | |
struct itemplate **p, **best_p; | |
int length, best_length = 0; | |
const char *segover; | |
int rep, lock, asize, osize, i, slen, colon; | |
unsigned char *origdata; | |
int works; | |
insn tmp_ins = { NULL }, ins; | |
unsigned long goodness, best; | |
/* | |
* Scan for prefixes. | |
*/ | |
asize = osize = segsize; | |
segover = NULL; | |
ins.condition = ins.nprefix = rep = lock = 0; | |
origdata = data; | |
for (;;) { | |
if (*data == 0xF3 || *data == 0xF2) | |
rep = *data++; | |
else if (*data == 0xF0) | |
lock = *data++; | |
else if (*data == 0x2E || *data == 0x36 || *data == 0x3E || | |
*data == 0x26 || *data == 0x64 || *data == 0x65) { | |
switch (*data++) { | |
case 0x2E: segover = "cs"; break; | |
case 0x36: segover = "ss"; break; | |
case 0x3E: segover = "ds"; break; | |
case 0x26: segover = "es"; break; | |
case 0x64: segover = "fs"; break; | |
case 0x65: segover = "gs"; break; | |
} | |
} else if (*data == 0x66) | |
osize = 48 - segsize, data++; | |
else if (*data == 0x67) | |
asize = 48 - segsize, data++; | |
else | |
break; | |
} | |
tmp_ins.oprs[0].segment = tmp_ins.oprs[1].segment = | |
tmp_ins.oprs[2].segment = | |
tmp_ins.oprs[0].addr_size = tmp_ins.oprs[1].addr_size = | |
tmp_ins.oprs[2].addr_size = (segsize == 16 ? 0 : SEG_32BIT); | |
tmp_ins.condition = -1; | |
best = ~0UL; /* Worst possible */ | |
best_p = NULL; | |
for (p = itable[*data]; *p; p++) { | |
if ( (length = matches(*p, data, asize, osize, | |
segsize, rep, &tmp_ins)) ) { | |
works = TRUE; | |
/* | |
* Final check to make sure the types of r/m match up. | |
*/ | |
for (i = 0; i < (*p)->operands; i++) { | |
if ( | |
/* If it's a mem-only EA but we have a register, die. */ | |
((tmp_ins.oprs[i].segment & SEG_RMREG) && | |
!(MEMORY & ~(*p)->opd[i])) || | |
/* If it's a reg-only EA but we have a memory ref, die. */ | |
(!(tmp_ins.oprs[i].segment & SEG_RMREG) && | |
!(REGNORM & ~(*p)->opd[i]) && | |
!((*p)->opd[i] & REG_SMASK)) || | |
/* Register type mismatch (eg FS vs REG_DESS): die. */ | |
((((*p)->opd[i] & (REGISTER | FPUREG)) || | |
(tmp_ins.oprs[i].segment & SEG_RMREG)) && | |
!whichreg ((*p)->opd[i], tmp_ins.oprs[i].basereg))) { | |
works = FALSE; | |
break; | |
} | |
} | |
if (works) { | |
goodness = (*p)->flags & IF_PFMASK; | |
if ( goodness < best ) { | |
/* This is the best one found so far */ | |
best = goodness; | |
best_p = p; | |
best_length = length; | |
ins = tmp_ins; | |
} | |
} | |
} | |
} | |
if (!best_p) { /* no instruction was matched */ | |
sprintf(output, "db 0%02xh", data[0]); | |
return 1; | |
} | |
/* Pick the best match */ | |
p = best_p; | |
length = best_length; | |
slen = 0; | |
if (lock) | |
slen += sprintf(output+slen, "lock "); | |
for (i = 0; i < ins.nprefix; i++) | |
switch (ins.prefixes[i]) { | |
case P_REP: slen += sprintf(output+slen, "rep "); break; | |
case P_REPE: slen += sprintf(output+slen, "repe "); break; | |
case P_REPNE: slen += sprintf(output+slen, "repne "); break; | |
case P_A16: slen += sprintf(output+slen, "a16 "); break; | |
case P_A32: slen += sprintf(output+slen, "a32 "); break; | |
case P_O16: slen += sprintf(output+slen, "o16 "); break; | |
case P_O32: slen += sprintf(output+slen, "o32 "); break; | |
} | |
for (i = 0; i < (int)elements(ico); i++) | |
if ((*p)->opcode == ico[i]) { | |
slen += sprintf(output+slen, "%s%s", icn[i], | |
whichcond(ins.condition)); | |
break; | |
} | |
if (i >= (int)elements(ico)) | |
slen += sprintf(output+slen, "%s", insn_names[(*p)->opcode]); | |
colon = FALSE; | |
length += data - origdata; /* fix up for prefixes */ | |
for (i=0; i<(*p)->operands; i++) { | |
output[slen++] = (colon ? ':' : i==0 ? ' ' : ','); | |
if (ins.oprs[i].segment & SEG_RELATIVE) { | |
ins.oprs[i].offset += offset + length; | |
/* | |
* sort out wraparound | |
*/ | |
if (!(ins.oprs[i].segment & SEG_32BIT)) | |
ins.oprs[i].offset &= 0xFFFF; | |
} | |
if ((*p)->opd[i] & COLON) | |
colon = TRUE; | |
else | |
colon = FALSE; | |
if (((*p)->opd[i] & (REGISTER | FPUREG)) || | |
(ins.oprs[i].segment & SEG_RMREG)) | |
{ | |
ins.oprs[i].basereg = whichreg ((*p)->opd[i], | |
ins.oprs[i].basereg); | |
if ( (*p)->opd[i] & TO ) | |
slen += sprintf(output+slen, "to "); | |
slen += sprintf(output+slen, "%s", | |
reg_names[ins.oprs[i].basereg-EXPR_REG_START]); | |
} else if (!(UNITY & ~(*p)->opd[i])) { | |
output[slen++] = '1'; | |
} else if ( (*p)->opd[i] & IMMEDIATE ) { | |
if ( (*p)->opd[i] & BITS8 ) { | |
slen += sprintf(output+slen, "byte "); | |
if (ins.oprs[i].segment & SEG_SIGNED) { | |
if (ins.oprs[i].offset < 0) { | |
ins.oprs[i].offset *= -1; | |
output[slen++] = '-'; | |
} else | |
output[slen++] = '+'; | |
} | |
} else if ( (*p)->opd[i] & BITS16 ) { | |
slen += sprintf(output+slen, "word "); | |
} else if ( (*p)->opd[i] & BITS32 ) { | |
slen += sprintf(output+slen, "dword "); | |
} else if ( (*p)->opd[i] & NEAR ) { | |
slen += sprintf(output+slen, "near "); | |
} else if ( (*p)->opd[i] & SHORT ) { | |
slen += sprintf(output+slen, "short "); | |
} | |
slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); | |
} else if ( !(MEM_OFFS & ~(*p)->opd[i]) ) { | |
slen += sprintf(output+slen, "[%s%s%s0x%lx]", | |
(segover ? segover : ""), | |
(segover ? ":" : ""), | |
(ins.oprs[i].addr_size == 32 ? "dword " : | |
ins.oprs[i].addr_size == 16 ? "word " : ""), | |
ins.oprs[i].offset); | |
segover = NULL; | |
} else if ( !(REGMEM & ~(*p)->opd[i]) ) { | |
int started = FALSE; | |
if ( (*p)->opd[i] & BITS8 ) | |
slen += sprintf(output+slen, "byte "); | |
if ( (*p)->opd[i] & BITS16 ) | |
slen += sprintf(output+slen, "word "); | |
if ( (*p)->opd[i] & BITS32 ) | |
slen += sprintf(output+slen, "dword "); | |
if ( (*p)->opd[i] & BITS64 ) | |
slen += sprintf(output+slen, "qword "); | |
if ( (*p)->opd[i] & BITS80 ) | |
slen += sprintf(output+slen, "tword "); | |
if ( (*p)->opd[i] & FAR ) | |
slen += sprintf(output+slen, "far "); | |
if ( (*p)->opd[i] & NEAR ) | |
slen += sprintf(output+slen, "near "); | |
output[slen++] = '['; | |
if (ins.oprs[i].addr_size) | |
slen += sprintf(output+slen, "%s", | |
(ins.oprs[i].addr_size == 32 ? "dword " : | |
ins.oprs[i].addr_size == 16 ? "word " : "")); | |
if (segover) { | |
slen += sprintf(output+slen, "%s:", segover); | |
segover = NULL; | |
} | |
if (ins.oprs[i].basereg != -1) { | |
slen += sprintf(output+slen, "%s", | |
reg_names[(ins.oprs[i].basereg - | |
EXPR_REG_START)]); | |
started = TRUE; | |
} | |
if (ins.oprs[i].indexreg != -1) { | |
if (started) | |
output[slen++] = '+'; | |
slen += sprintf(output+slen, "%s", | |
reg_names[(ins.oprs[i].indexreg - | |
EXPR_REG_START)]); | |
if (ins.oprs[i].scale > 1) | |
slen += sprintf(output+slen, "*%d", ins.oprs[i].scale); | |
started = TRUE; | |
} | |
if (ins.oprs[i].segment & SEG_DISP8) { | |
int sign = '+'; | |
if (ins.oprs[i].offset & 0x80) { | |
ins.oprs[i].offset = - (signed char) ins.oprs[i].offset; | |
sign = '-'; | |
} | |
slen += sprintf(output+slen, "%c0x%lx", sign, | |
ins.oprs[i].offset); | |
} else if (ins.oprs[i].segment & SEG_DISP16) { | |
if (started) | |
output[slen++] = '+'; | |
slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); | |
} else if (ins.oprs[i].segment & SEG_DISP32) { | |
if (started) | |
output[slen++] = '+'; | |
slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); | |
} | |
output[slen++] = ']'; | |
} else { | |
slen += sprintf(output+slen, "<operand%d>", i); | |
} | |
} | |
output[slen] = '\0'; | |
if (segover) { /* unused segment override */ | |
char *p = output; | |
int count = slen+1; | |
while (count--) | |
p[count+3] = p[count]; | |
strncpy (output, segover, 2); | |
output[2] = ' '; | |
} | |
return length; | |
} |