x86_disasm(3)
disassemble a
Description
x86_disasm
NAME
x86_disasm, x86_disasm_forward, x86_disasm_range - disassemble a bytestream to x86 assembly language instructions
SYNOPSIS
#include <libdis.h>
typedef void
(*DISASM_CALLBACK)( x86_insn_t *, void * );
typedef long (*DISASM_RESOLVER)( x86_op_t *,
x86_insn_t *, void * );
int x86_disasm( unsigned char *buf, unsigned int buf_len,
|
unsigned long buf_rva, unsigned int offset, | |||
|
x86_insn_t * insn ); |
int x86_disasm_range( unsigned char *buf, unsigned long buf_rva,
|
unsigned int offset, unsigned int len, | |||
|
DISASM_CALLBACK func, void *arg ); |
int x86_disasm_forward( unsigned char *buf, unsigned int buf_len,
|
unsigned long buf_rva, unsigned int offset, | ||||
|
DISASM_CALLBACK func, void *arg, | ||||
|
DISASM_RESOLVER resolver, void *r_arg ); |
DESCRIPTION
#define
MAX_REGNAME 8
#define MAX_PREFIX_STR 32
#define MAX_MNEM_STR 16
|
/* same as in i386.h */ | |||
|
/* max possible operand size in string form */ |
#define MAX_OP_RAW_STRING 64 /*
max possible operand size in raw form */
#define MAX_OP_XML_STRING 256 /* max possible operand size
in xml form */
|
/* max # implicit and explicit operands */ |
#define MAX_INSN_STRING 512 /* 2
* 8 * MAX_OP_STRING */
#define MAX_INSN_RAW_STRING 1024 /* 2 * 8 *
MAX_OP_RAW_STRING */
#define MAX_INSN_XML_STRING 4096 /* 2 * 8 *
MAX_OP_XML_STRING */
|
enum x86_reg_type { |
/* NOTE: these may be ORed together */ |
||||
|
reg_gen = 0x00001, |
/* general purpose */ | ||||
|
reg_in = 0x00002, |
/* incoming args, ala RISC */ | ||||
|
reg_out = 0x00004, |
/* args to calls, ala RISC */ | ||||
|
reg_local = 0x00008, |
/* local vars, ala RISC */ | ||||
|
reg_fpu = 0x00010, |
/* FPU data register */ | ||||
|
reg_seg = 0x00020, |
/* segment register */ | ||||
|
reg_simd = 0x00040, |
/* SIMD/MMX reg */ | ||||
|
reg_sys = 0x00080, |
/* restricted/system register */ | ||||
|
reg_sp = 0x00100, |
/* stack pointer */ | ||||
|
reg_fp = 0x00200, |
/* frame pointer */ | ||||
|
reg_pc = 0x00400, |
/* program counter */ | ||||
|
reg_retaddr = 0x00800, |
/* return addr for func */ | ||||
|
reg_cond = 0x01000, |
/* condition code / flags */ | ||||
|
reg_zero = 0x02000, |
/* zero register, ala RISC */ | ||||
|
reg_ret = 0x04000, |
/* return value */ | ||||
|
reg_src = 0x10000, |
/* array/rep source */ | ||||
|
reg_dest = 0x20000, |
/* array/rep destination */ | ||||
|
reg_count = 0x40000 |
/* array/rep/loop counter */ |
};
typedef struct {
|
char name[MAX_REGNAME]; |
|||||||
|
enum x86_reg_type type; |
/* what register is used for */ | ||||||
|
unsigned int size; |
/* size of register in bytes */ |
||||||
|
unsigned int id; |
/* register ID #, for quick compares */ | ||||||
|
unsigned int alias; |
/* ID of reg this is an alias of */ |
||||||
|
unsigned int shift; |
/* amount to shift aliased reg by */ |
} x86_reg_t;
typedef struct {
|
unsigned int scale; |
/* scale factor */ | ||||
|
x86_reg_t index, base; |
/* index, base registers */ | ||||
|
long disp; |
/* displacement */ | ||||
|
char disp_sign; |
/* is negative? 1/0 */ | ||||
|
char disp_size; |
/* 0, 1, 2, 4 */ |
} x86_ea_t;
|
enum x86_op_type { |
/* mutually exclusive */ |
|||||
|
op_unused = 0, |
/* empty/unused operand */ |
|||||
|
op_register = 1, |
/* CPU register */ |
|||||
|
op_immediate = 2, |
/* Immediate Value */ |
|||||
|
op_relative_near = 3, |
/* Relative offset from IP */ | |||||
|
op_relative_far = 4, |
||||||
|
op_absolute = 5, |
/* Absolute address (ptr16:32) */ |
|||||
|
op_expression = 6, |
/* Address expression (scale/index/base/disp) */ |
|||||
|
op_offset = 7, |
/* Offset from start of segment (m32) */ |
|||||
|
op_unknown |
};
|
enum x86_op_datatype { |
/* these use Intel’s lame terminology */ | |||||
|
op_byte = 1, |
/* 1 byte integer */ |
|||||
|
op_word = 2, |
/* 2 byte integer */ |
|||||
|
op_dword = 3, |
/* 4 byte integer */ |
|||||
|
op_qword = 4, |
/* 8 byte integer */ |
|||||
|
op_dqword = 5, |
/* 16 byte integer */ |
|||||
|
op_sreal = 6, |
/* 4 byte real (single real) */ |
|||||
|
op_dreal = 7, |
/* 8 byte real (double real) */ |
|||||
|
op_extreal = 8, |
/* 10 byte real (extended real) */ | |||||
|
op_bcd = 9, |
/* 10 byte binary-coded decimal */ |
|||||
|
op_simd = 10, |
/* 16 byte packed (SIMD, MMX) */ |
op_ssimd = 10, /* 16 byte : 4
packed single FP (SIMD, MMX) */
op_dsimd = 11, /* 16 byte : 2 packed double FP (SIMD, MMX)
*/
op_sssimd = 12, /* 4 byte : scalar single FP (SIMD, MMX) */
op_sdsimd = 13, /* 8 byte : scalar double FP (SIMD, MMX)
*/
|
op_descr32 = 14, |
/* 6 byte Intel descriptor 2:4 */ | ||||
|
op_descr16 = 15, |
/* 4 byte Intel descriptor 2:2 */ | ||||
|
op_pdescr32 = 16, |
/* 6 byte Intel pseudo-descriptor 32:16 */ | ||||
|
op_pdescr16 = 17, |
/* 6 byte Intel pseudo-descriptor 8:24:16 */ | ||||
|
op_fpuenv = 11 |
/* 28 byte FPU control/environment data */ |
};
|
enum x86_op_access { |
/* ORed together */ | |||
|
op_read = 1, |
||||
|
op_write = 2, |
||||
|
op_execute = 4 |
};
|
enum x86_op_flags { |
/* ORed together, but segs are mutually exclusive */ |
|||||
|
op_signed = 1, |
/* signed integer */ |
|||||
|
op_string = 2, |
/* possible string or array */ |
|||||
|
op_constant = 4, |
/* symbolic constant */ |
|||||
|
op_pointer = 8, |
/* operand points to a memory address */ | |||||
|
op_sysref = 0x010, |
/* operand is a syscall number */ |
|||||
|
op_implied = 0x020, |
/* operand is implicit in insn */ |
|||||
|
op_hardcode = 0x040, /* operans is hardcoded in insn */ |
||||||
|
op_es_seg = 0x100, |
/* ES segment override */ |
|||||
|
op_cs_seg = 0x200, |
/* CS segment override */ |
|||||
|
op_ss_seg = 0x300, |
/* SS segment override */ |
|||||
|
op_ds_seg = 0x400, |
/* DS segment override */ |
|||||
|
op_fs_seg = 0x500, |
/* FS segment override */ |
|||||
|
op_gs_seg = 0x600 |
/* GS segment override */ |
};
typedef struct {
|
enum x86_op_type |
type; |
/* operand type */ | |||||||
|
enum x86_op_datatype |
datatype; |
/* operand size */ | |||||||
|
enum x86_op_access |
access; |
/* operand access [RWX] */ | |||||||
|
enum x86_op_flags |
flags; |
/* misc flags */ | |||||||
|
union { |
|||||||||
|
/* immediate values */ |
|||||||||
|
char |
sbyte; |
||||||||
|
short |
sword; |
||||||||
|
long |
sdword; |
||||||||
|
qword |
sqword; |
||||||||
|
unsigned char |
byte; |
||||||||
|
unsigned short |
word; |
||||||||
|
unsigned long |
dword; |
||||||||
|
qword |
qword; |
||||||||
|
float |
sreal; |
||||||||
|
double |
dreal; |
||||||||
|
/* misc large/non-native types */ |
|||||||||
|
unsigned char |
extreal[10]; |
||||||||
|
unsigned char |
bcd[10]; |
||||||||
|
qword |
dqword[2]; |
||||||||
|
unsigned char |
simd[16]; |
||||||||
|
unsigned char |
fpuenv[28]; |
||||||||
|
/* absolute address */ |
|||||||||
|
void |
* address; |
||||||||
|
/* offset from segment */ |
|||||||||
|
unsigned long |
offset; |
||||||||
|
/* ID of CPU register */ |
|||||||||
|
x86_reg_t |
reg; |
||||||||
|
/* offsets from current insn */ |
|||||||||
|
char |
relative_near; |
||||||||
|
long |
relative_far; |
||||||||
|
/* effective address [expression] */ |
|||||||||
|
x86_ea_t |
expression; |
||||||||
|
} data; |
|||||||||
|
void * insn; |
} x86_op_t;
typedef struct x86_operand_list {
|
x86_op_t op; | |
|
struct x86_operand_list *next; |
} x86_oplist_t;
enum x86_insn_group {
|
insn_none = 0, | |
|
insn_controlflow = 1, | |
|
insn_arithmetic = 2, | |
|
insn_logic = 3, | |
|
insn_stack = 4, | |
|
insn_comparison = 5, | |
|
insn_move = 6, | |
|
insn_string = 7, | |
|
insn_bit_manip = 8, | |
|
insn_flag_manip = 9, | |
|
insn_fpu = 10, | |
|
insn_interrupt = 13, | |
|
insn_system = 14, | |
|
insn_other = 15 |
};
enum x86_insn_type {
|
insn_invalid = 0, |
|||||
|
/* insn_controlflow */ |
|||||
|
insn_jmp = 0x1001, |
|||||
|
insn_jcc = 0x1002, |
|||||
|
insn_call = 0x1003, |
|||||
|
insn_callcc = 0x1004, |
|||||
|
insn_return = 0x1005, |
|||||
|
insn_loop = 0x1006, |
|||||
|
/* insn_arithmetic */ |
|||||
|
insn_add = 0x2001, |
|||||
|
insn_sub = 0x2002, |
|||||
|
insn_mul = 0x2003, |
|||||
|
insn_div = 0x2004, |
|||||
|
insn_inc = 0x2005, |
|||||
|
insn_dec = 0x2006, |
|||||
|
insn_shl = 0x2007, |
|||||
|
insn_shr = 0x2008, |
|||||
|
insn_rol = 0x2009, |
|||||
|
insn_ror = 0x200A, |
|||||
|
/* insn_logic */ |
|||||
|
insn_and = 0x3001, |
|||||
|
insn_or = 0x3002, |
|||||
|
insn_xor = 0x3003, |
|||||
|
insn_not = 0x3004, |
|||||
|
insn_neg = 0x3005, |
|||||
|
/* insn_stack */ |
|||||
|
insn_push = 0x4001, |
|||||
|
insn_pop = 0x4002, |
|||||
|
insn_pushregs = 0x4003, |
|||||
|
insn_popregs = 0x4004, |
|||||
|
insn_pushflags = 0x4005, |
|||||
|
insn_popflags = 0x4006, |
|||||
|
insn_enter = 0x4007, |
|||||
|
insn_leave = 0x4008, |
|||||
|
/* insn_comparison */ |
|||||
|
insn_test = 0x5001, |
|||||
|
insn_cmp = 0x5002, |
|||||
|
/* insn_move */ |
|||||
|
insn_mov = 0x6001, |
/* move */ |
||||
|
insn_movcc = 0x6002, |
/* conditional move */ | ||||
|
insn_xchg = 0x6003, |
/* exchange */ |
||||
|
insn_xchgcc = 0x6004, |
/* conditional exchange */ | ||||
|
/* insn_string */ |
|||||
|
insn_strcmp = 0x7001, |
|||||
|
insn_strload = 0x7002, |
|||||
|
insn_strmov = 0x7003, |
|||||
|
insn_strstore = 0x7004, |
|||||
|
insn_translate = 0x7005, |
/* xlat */ | ||||
|
/* insn_bit_manip */ |
|||||
|
insn_bittest = 0x8001, |
|||||
|
insn_bitset = 0x8002, |
|||||
|
insn_bitclear = 0x8003, |
|||||
|
/* insn_flag_manip */ |
|||||
|
insn_clear_carry = 0x9001, |
|||||
|
insn_clear_zero = 0x9002, |
|||||
|
insn_clear_oflow = 0x9003, |
|||||
|
insn_clear_dir = 0x9004, |
|||||
|
insn_clear_sign = 0x9005, |
|||||
|
insn_clear_parity = 0x9006, |
|||||
|
insn_set_carry = 0x9007, |
|||||
|
insn_set_zero = 0x9008, |
|||||
|
insn_set_oflow = 0x9009, |
|||||
|
insn_set_dir = 0x900A, |
|||||
|
insn_set_sign = 0x900B, |
|||||
|
insn_set_parity = 0x900C, |
|||||
|
insn_tog_carry = 0x9010, |
|||||
|
insn_tog_zero = 0x9020, |
|||||
|
insn_tog_oflow = 0x9030, |
|||||
|
insn_tog_dir = 0x9040, |
|||||
|
insn_tog_sign = 0x9050, |
|||||
|
insn_tog_parity = 0x9060, |
|||||
|
/* insn_fpu */ |
|||||
|
insn_fmov = 0xA001, |
|||||
|
insn_fmovcc = 0xA002, |
|||||
|
insn_fneg = 0xA003, |
|||||
|
insn_fabs = 0xA004, |
|||||
|
insn_fadd = 0xA005, |
|||||
|
insn_fsub = 0xA006, |
|||||
|
insn_fmul = 0xA007, |
|||||
|
insn_fdiv = 0xA008, |
|||||
|
insn_fsqrt = 0xA009, |
|||||
|
insn_fcmp = 0xA00A, |
|||||
|
insn_fcos = 0xA00C, |
|||||
|
insn_fldpi = 0xA00D, |
|||||
|
insn_fldz = 0xA00E, |
|||||
|
insn_ftan = 0xA00F, |
|||||
|
insn_fsine = 0xA010, |
|||||
|
insn_fsys = 0xA020, |
|||||
|
/* insn_interrupt */ |
|||||
|
insn_int = 0xD001, |
|||||
|
insn_intcc = 0xD002, |
/* not present in x86 ISA */ | ||||
|
insn_iret = 0xD003, |
|||||
|
insn_bound = 0xD004, |
|||||
|
insn_debug = 0xD005, |
|||||
|
insn_trace = 0xD006, |
|||||
|
insn_invalid_op = 0xD007, |
|||||
|
insn_oflow = 0xD008, |
|||||
|
/* insn_system */ |
|||||
|
insn_halt = 0xE001, |
|||||
|
insn_in = 0xE002, |
/* input from port/bus */ |
||||
|
insn_out = 0xE003, |
/* output to port/bus */ |
||||
|
insn_cpuid = 0xE004, |
|||||
|
/* insn_other */ |
|||||
|
insn_nop = 0xF001, |
|||||
|
insn_bcdconv = 0xF002, |
/* convert to or from BCD */ | ||||
|
insn_szconv = 0xF003 |
/* change size of operand */ |
};
enum x86_insn_note {
|
insn_note_ring0 |
= 1, |
/* Only available in ring 0 */ | |||||
|
insn_note_smm |
= 2, |
/* "" in System Management Mode */ |
|||||
|
insn_note_serial |
= 4 |
/* Serializing instruction */ |
};
enum x86_flag_status {
|
insn_carry_set = 0x1, | |
|
insn_zero_set = 0x2, | |
|
insn_oflow_set = 0x4, | |
|
insn_dir_set = 0x8, | |
|
insn_sign_set = 0x10, | |
|
insn_parity_set = 0x20, | |
|
insn_carry_or_zero_set = 0x40, | |
|
insn_zero_set_or_sign_ne_oflow = 0x80, | |
|
insn_carry_clear = 0x100, | |
|
insn_zero_clear = 0x200, | |
|
insn_oflow_clear = 0x400, | |
|
insn_dir_clear = 0x800, | |
|
insn_sign_clear = 0x1000, | |
|
insn_parity_clear = 0x2000, | |
|
insn_sign_eq_oflow = 0x4000, | |
|
insn_sign_ne_oflow = 0x8000 |
};
enum x86_insn_cpu {
|
cpu_8086 |
= 1, |
||||||
|
/* Intel */ |
|||||||
|
cpu_80286 |
= 2, |
||||||
|
cpu_80386 |
= 3, |
||||||
|
cpu_80387 |
= 4, |
||||||
|
cpu_80486 |
= 5, |
||||||
|
cpu_pentium |
= 6, |
||||||
|
cpu_pentiumpro |
= 7, |
||||||
|
cpu_pentium2 |
= 8, |
||||||
|
cpu_pentium3 |
= 9, >br> |
||||||
|
cpu_pentium4 |
= 10, |
||||||
|
cpu_k6 |
|||||||
|
= 16, |
/* AMD */ | ||||||
|
cpu_k7 |
|||||||
|
= 32, |
|||||||
|
cpu_athlon |
= 48 |
};
enum x86_insn_isa {
|
isa_gp |
= 1, |
/* general purpose */ | ||||
|
isa_fp |
= 2, |
/* floating point */ | ||||
|
isa_fpumgt |
= 3, |
/* FPU/SIMD management */ | ||||
|
isa_mmx |
= 4, |
/* Intel MMX */ | ||||
|
isa_sse1 |
= 5, |
/* Intel SSE SIMD */ |
||||
|
isa_sse2 |
= 6, |
/* Intel SSE2 SIMD */ |
||||
|
isa_sse3 |
= 7, |
/* Intel SSE3 SIMD */ |
||||
|
isa_3dnow |
= 8, |
/* AMD 3DNow! SIMD */ |
||||
|
isa_sys |
= 9 |
/* system instructions */ |
};
enum x86_insn_prefix {
|
insn_no_prefix = 0, | |
|
insn_rep_zero = 1, | |
|
insn_rep_notzero = 2, | |
|
insn_lock = 4, | |
|
insn_delay = 8 |
};
typedef struct {
|
/* information about the instruction */ |
|||||||
|
unsigned long addr; |
/* load address */ |
||||||
|
unsigned long offset; |
/* offset into file/buffer */ | ||||||
|
enum x86_insn_group group; |
/* meta-type, e.g. INSN_EXEC */ | ||||||
|
enum x86_insn_type type; |
/* type, e.g. INSN_BRANCH */ |
||||||
|
unsigned char bytes[MAX_INSN_SIZE]; |
|||||||
|
unsigned char size; |
/* size of insn in bytes */ |
||||||
|
enum x86_insn_prefix prefix; |
|||||||
|
enum x86_flag_status flags_set; /* flags set or tested by insn */ |
|||||||
|
enum x86_flag_status flags_tested; |
|||||||
|
/* the instruction proper */ |
|||||||
|
char prefix_string[32]; |
/* prefixes [might be truncated] */ | ||||||
|
char mnemonic[8]; |
|||||||
|
x86_op_t operands[3]; |
|||||||
|
/* convenience fields for user */ |
|||||||
|
void *block; |
/* code block containing this insn */ |
||||||
|
void *function; |
/* function containing this insn */ | ||||||
|
void *tag; |
/* tag the insn as seen/processed */ |
} x86_insn_t;
#define X86_WILDCARD_BYTE 0xF4
typedef struct {
enum x86_op_type type; /* operand type */
enum x86_op_datatype datatype; /* operand size */
enum x86_op_access access; /* operand access [RWX] */
enum x86_op_flags flags; /* misc flags */
} x86_invariant_op_t;
typedef struct {
|
unsigned char bytes[64]; |
/* invariant representation */ | ||||
|
unsigned int size; |
/* number of bytes in insn */ |
enum x86_insn_group group; /*
meta-type, e.g. INSN_EXEC */
enum x86_insn_type type; /* type, e.g. INSN_BRANCH */
|
x86_invariant_op_t operands[3]; |
/* use same ordering as x86_insn_t */ |
} x86_invariant_t;
EXAMPLES
The following sample callback outputs instructions in raw syntax:
void raw_print(
x86_insn_t *insn, void *arg ) {
char line[1024];
x86_format_insn(insn, line, 1024, raw_syntax);
printf( "%s0, line);
}
The following sample resolver performs very limited checking on the operand of a jump or call to determine what program address the operand refers to:
long resolver(
x86_op_t *op, x86_insn_t *insn ) {
long retval = -1;
/* this is a flat ripoff of internal_resolver in libdis.c --
we don’t do any register or stack resolving, or check
to see if we have already encountered this RVA */
if ( op->type == op_absolute || op->type == op_offset
) {
retval = op->data.sdword;
} else if (op->type == op_relative ){
if ( op->datatype == op_byte ) {
retval = insn->addr + insn->size + op->data.sbyte;
} else if ( op->datatype == op_word ) {
retval = insn->addr + insn->size + op->data.sword;
} else if ( op->datatype == op_dword ) {
retval = insn->addr + insn->size + op->data.sdword;
}
}
return( retval );
}
The following code snippets demonstrate how to use the various disassembly routines:
unsigned char
*buf; /* buffer of bytes to disassemble */
unsigned int buf_len;/* length of buffer */
unsigned long rva; /* load address of start of buffer */
unsigned int pos; /* position in buffer */
x86_insn_t insn; /* disassembled instruction */
/* disassemble
entire buffer, printing automatically */
x86_disasm_range( buf, buf_rva, pos, buf_len,
raw_print, NULL );
/* disassemble a
single instruction, then print it */
if (x86_disasm( buf, buf_len, buf_rva, pos, &insn ) ) {
raw_print( &insn, NULL );
}
/* disassemble
forward in ’buf’ starting at ’pos’
*/
x86_disasm_forward( buf, buf_len, buf_rva, pos,
raw_print, NULL, resolver );
SEE ALSO
libdisasm(7), x86_format_insn(3), x86_init(3), x86dis(1)