Blame - src/windows/mini_disassembler.cc - RealtimeRoboticsGroup/test

blob: 0c620047cec1d77a15ae81c7dec72c2913bc5499 [file] [log] [blame]

Austin Schuh	745610d	2015-09-06 18:19:50 -0700	[diff] [blame^]	1	// -- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil --
				2	/* Copyright (c) 2007, Google Inc.
				3	* All rights reserved.
				4	*
				5	* Redistribution and use in source and binary forms, with or without
				6	* modification, are permitted provided that the following conditions are
				7	* met:
				8	*
				9	* * Redistributions of source code must retain the above copyright
				10	* notice, this list of conditions and the following disclaimer.
				11	* * Redistributions in binary form must reproduce the above
				12	* copyright notice, this list of conditions and the following disclaimer
				13	* in the documentation and/or other materials provided with the
				14	* distribution.
				15	* * Neither the name of Google Inc. nor the names of its
				16	* contributors may be used to endorse or promote products derived from
				17	* this software without specific prior written permission.
				18	*
				19	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				30	*
				31	* ---
				32	* Author: Joi Sigurdsson
				33	*
				34	* Implementation of MiniDisassembler.
				35	*/
				36
				37	#include "mini_disassembler.h"
				38
				39	namespace sidestep {
				40
				41	MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
				42	bool address_default_is_32_bits)
				43	: operand_default_is_32_bits_(operand_default_is_32_bits),
				44	address_default_is_32_bits_(address_default_is_32_bits) {
				45	Initialize();
				46	}
				47
				48	MiniDisassembler::MiniDisassembler()
				49	: operand_default_is_32_bits_(true),
				50	address_default_is_32_bits_(true) {
				51	Initialize();
				52	}
				53
				54	InstructionType MiniDisassembler::Disassemble(
				55	unsigned char* start_byte,
				56	unsigned int& instruction_bytes) {
				57	// Clean up any state from previous invocations.
				58	Initialize();
				59
				60	// Start by processing any prefixes.
				61	unsigned char* current_byte = start_byte;
				62	unsigned int size = 0;
				63	InstructionType instruction_type = ProcessPrefixes(current_byte, size);
				64
				65	if (IT_UNKNOWN == instruction_type)
				66	return instruction_type;
				67
				68	current_byte += size;
				69	size = 0;
				70
				71	// Invariant: We have stripped all prefixes, and the operand_is_32_bits_
				72	// and address_is_32_bits_ flags are correctly set.
				73
				74	instruction_type = ProcessOpcode(current_byte, 0, size);
				75
				76	// Check for error processing instruction
				77	if ((IT_UNKNOWN == instruction_type_) \|\| (IT_UNUSED == instruction_type_)) {
				78	return IT_UNKNOWN;
				79	}
				80
				81	current_byte += size;
				82
				83	// Invariant: operand_bytes_ indicates the total size of operands
				84	// specified by the opcode and/or ModR/M byte and/or SIB byte.
				85	// pCurrentByte points to the first byte after the ModR/M byte, or after
				86	// the SIB byte if it is present (i.e. the first byte of any operands
				87	// encoded in the instruction).
				88
				89	// We get the total length of any prefixes, the opcode, and the ModR/M and
				90	// SIB bytes if present, by taking the difference of the original starting
				91	// address and the current byte (which points to the first byte of the
				92	// operands if present, or to the first byte of the next instruction if
				93	// they are not). Adding the count of bytes in the operands encoded in
				94	// the instruction gives us the full length of the instruction in bytes.
				95	instruction_bytes += operand_bytes_ + (current_byte - start_byte);
				96
				97	// Return the instruction type, which was set by ProcessOpcode().
				98	return instruction_type_;
				99	}
				100
				101	void MiniDisassembler::Initialize() {
				102	operand_is_32_bits_ = operand_default_is_32_bits_;
				103	address_is_32_bits_ = address_default_is_32_bits_;
				104	#ifdef _M_X64
				105	operand_default_support_64_bits_ = true;
				106	#else
				107	operand_default_support_64_bits_ = false;
				108	#endif
				109	operand_is_64_bits_ = false;
				110	operand_bytes_ = 0;
				111	have_modrm_ = false;
				112	should_decode_modrm_ = false;
				113	instruction_type_ = IT_UNKNOWN;
				114	got_f2_prefix_ = false;
				115	got_f3_prefix_ = false;
				116	got_66_prefix_ = false;
				117	}
				118
				119	InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
				120	unsigned int& size) {
				121	InstructionType instruction_type = IT_GENERIC;
				122	const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
				123
				124	switch (opcode.type_) {
				125	case IT_PREFIX_ADDRESS:
				126	address_is_32_bits_ = !address_default_is_32_bits_;
				127	goto nochangeoperand;
				128	case IT_PREFIX_OPERAND:
				129	operand_is_32_bits_ = !operand_default_is_32_bits_;
				130	nochangeoperand:
				131	case IT_PREFIX:
				132
				133	if (0xF2 == (*start_byte))
				134	got_f2_prefix_ = true;
				135	else if (0xF3 == (*start_byte))
				136	got_f3_prefix_ = true;
				137	else if (0x66 == (*start_byte))
				138	got_66_prefix_ = true;
				139	else if (operand_default_support_64_bits_ && (*start_byte) & 0x48)
				140	operand_is_64_bits_ = true;
				141
				142	instruction_type = opcode.type_;
				143	size ++;
				144	// we got a prefix, so add one and check next byte
				145	ProcessPrefixes(start_byte + 1, size);
				146	default:
				147	break; // not a prefix byte
				148	}
				149
				150	return instruction_type;
				151	}
				152
				153	InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
				154	unsigned int table_index,
				155	unsigned int& size) {
				156	const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table
				157	unsigned char current_byte = (*start_byte) >> table.shift_;
				158	current_byte = current_byte & table.mask_; // Mask out the bits we will use
				159
				160	// Check whether the byte we have is inside the table we have.
				161	if (current_byte < table.min_lim_ \|\| current_byte > table.max_lim_) {
				162	instruction_type_ = IT_UNKNOWN;
				163	return instruction_type_;
				164	}
				165
				166	const Opcode& opcode = table.table_[current_byte];
				167	if (IT_UNUSED == opcode.type_) {
				168	// This instruction is not used by the IA-32 ISA, so we indicate
				169	// this to the user. Probably means that we were pointed to
				170	// a byte in memory that was not the start of an instruction.
				171	instruction_type_ = IT_UNUSED;
				172	return instruction_type_;
				173	} else if (IT_REFERENCE == opcode.type_) {
				174	// We are looking at an opcode that has more bytes (or is continued
				175	// in the ModR/M byte). Recursively find the opcode definition in
				176	// the table for the opcode's next byte.
				177	size++;
				178	ProcessOpcode(start_byte + 1, opcode.table_index_, size);
				179	return instruction_type_;
				180	}
				181
				182	const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode;
				183	if (opcode.is_prefix_dependent_) {
				184	if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
				185	specific_opcode = &opcode.opcode_if_f2_prefix_;
				186	} else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
				187	specific_opcode = &opcode.opcode_if_f3_prefix_;
				188	} else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
				189	specific_opcode = &opcode.opcode_if_66_prefix_;
				190	}
				191	}
				192
				193	// Inv: The opcode type is known.
				194	instruction_type_ = specific_opcode->type_;
				195
				196	// Let's process the operand types to see if we have any immediate
				197	// operands, and/or a ModR/M byte.
				198
				199	ProcessOperand(specific_opcode->flag_dest_);
				200	ProcessOperand(specific_opcode->flag_source_);
				201	ProcessOperand(specific_opcode->flag_aux_);
				202
				203	// Inv: We have processed the opcode and incremented operand_bytes_
				204	// by the number of bytes of any operands specified by the opcode
				205	// that are stored in the instruction (not registers etc.). Now
				206	// we need to return the total number of bytes for the opcode and
				207	// for the ModR/M or SIB bytes if they are present.
				208
				209	if (table.mask_ != 0xff) {
				210	if (have_modrm_) {
				211	// we're looking at a ModR/M byte so we're not going to
				212	// count that into the opcode size
				213	ProcessModrm(start_byte, size);
				214	return IT_GENERIC;
				215	} else {
				216	// need to count the ModR/M byte even if it's just being
				217	// used for opcode extension
				218	size++;
				219	return IT_GENERIC;
				220	}
				221	} else {
				222	if (have_modrm_) {
				223	// The ModR/M byte is the next byte.
				224	size++;
				225	ProcessModrm(start_byte + 1, size);
				226	return IT_GENERIC;
				227	} else {
				228	size++;
				229	return IT_GENERIC;
				230	}
				231	}
				232	}
				233
				234	bool MiniDisassembler::ProcessOperand(int flag_operand) {
				235	bool succeeded = true;
				236	if (AM_NOT_USED == flag_operand)
				237	return succeeded;
				238
				239	// Decide what to do based on the addressing mode.
				240	switch (flag_operand & AM_MASK) {
				241	// No ModR/M byte indicated by these addressing modes, and no
				242	// additional (e.g. immediate) parameters.
				243	case AM_A: // Direct address
				244	case AM_F: // EFLAGS register
				245	case AM_X: // Memory addressed by the DS:SI register pair
				246	case AM_Y: // Memory addressed by the ES:DI register pair
				247	case AM_IMPLICIT: // Parameter is implicit, occupies no space in
				248	// instruction
				249	break;
				250
				251	// There is a ModR/M byte but it does not necessarily need
				252	// to be decoded.
				253	case AM_C: // reg field of ModR/M selects a control register
				254	case AM_D: // reg field of ModR/M selects a debug register
				255	case AM_G: // reg field of ModR/M selects a general register
				256	case AM_P: // reg field of ModR/M selects an MMX register
				257	case AM_R: // mod field of ModR/M may refer only to a general register
				258	case AM_S: // reg field of ModR/M selects a segment register
				259	case AM_T: // reg field of ModR/M selects a test register
				260	case AM_V: // reg field of ModR/M selects a 128-bit XMM register
				261	have_modrm_ = true;
				262	break;
				263
				264	// In these addressing modes, there is a ModR/M byte and it needs to be
				265	// decoded. No other (e.g. immediate) params than indicated in ModR/M.
				266	case AM_E: // Operand is either a general-purpose register or memory,
				267	// specified by ModR/M byte
				268	case AM_M: // ModR/M byte will refer only to memory
				269	case AM_Q: // Operand is either an MMX register or memory (complex
				270	// evaluation), specified by ModR/M byte
				271	case AM_W: // Operand is either a 128-bit XMM register or memory (complex
				272	// eval), specified by ModR/M byte
				273	have_modrm_ = true;
				274	should_decode_modrm_ = true;
				275	break;
				276
				277	// These addressing modes specify an immediate or an offset value
				278	// directly, so we need to look at the operand type to see how many
				279	// bytes.
				280	case AM_I: // Immediate data.
				281	case AM_J: // Jump to offset.
				282	case AM_O: // Operand is at offset.
				283	switch (flag_operand & OT_MASK) {
				284	case OT_B: // Byte regardless of operand-size attribute.
				285	operand_bytes_ += OS_BYTE;
				286	break;
				287	case OT_C: // Byte or word, depending on operand-size attribute.
				288	if (operand_is_32_bits_)
				289	operand_bytes_ += OS_WORD;
				290	else
				291	operand_bytes_ += OS_BYTE;
				292	break;
				293	case OT_D: // Doubleword, regardless of operand-size attribute.
				294	operand_bytes_ += OS_DOUBLE_WORD;
				295	break;
				296	case OT_DQ: // Double-quadword, regardless of operand-size attribute.
				297	operand_bytes_ += OS_DOUBLE_QUAD_WORD;
				298	break;
				299	case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
				300	// attribute.
				301	if (operand_is_32_bits_)
				302	operand_bytes_ += OS_48_BIT_POINTER;
				303	else
				304	operand_bytes_ += OS_32_BIT_POINTER;
				305	break;
				306	case OT_PS: // 128-bit packed single-precision floating-point data.
				307	operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
				308	break;
				309	case OT_Q: // Quadword, regardless of operand-size attribute.
				310	operand_bytes_ += OS_QUAD_WORD;
				311	break;
				312	case OT_S: // 6-byte pseudo-descriptor.
				313	operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
				314	break;
				315	case OT_SD: // Scalar Double-Precision Floating-Point Value
				316	case OT_PD: // Unaligned packed double-precision floating point value
				317	operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
				318	break;
				319	case OT_SS:
				320	// Scalar element of a 128-bit packed single-precision
				321	// floating data.
				322	// We simply return enItUnknown since we don't have to support
				323	// floating point
				324	succeeded = false;
				325	break;
				326	case OT_V: // Word, doubleword or quadword, depending on operand-size
				327	// attribute.
				328	if (operand_is_64_bits_ && flag_operand & AM_I &&
				329	flag_operand & IOS_64)
				330	operand_bytes_ += OS_QUAD_WORD;
				331	else if (operand_is_32_bits_)
				332	operand_bytes_ += OS_DOUBLE_WORD;
				333	else
				334	operand_bytes_ += OS_WORD;
				335	break;
				336	case OT_W: // Word, regardless of operand-size attribute.
				337	operand_bytes_ += OS_WORD;
				338	break;
				339
				340	// Can safely ignore these.
				341	case OT_A: // Two one-word operands in memory or two double-word
				342	// operands in memory
				343	case OT_PI: // Quadword MMX technology register (e.g. mm0)
				344	case OT_SI: // Doubleword integer register (e.g., eax)
				345	break;
				346
				347	default:
				348	break;
				349	}
				350	break;
				351
				352	default:
				353	break;
				354	}
				355
				356	return succeeded;
				357	}
				358
				359	bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
				360	unsigned int& size) {
				361	// If we don't need to decode, we just return the size of the ModR/M
				362	// byte (there is never a SIB byte in this case).
				363	if (!should_decode_modrm_) {
				364	size++;
				365	return true;
				366	}
				367
				368	// We never care about the reg field, only the combination of the mod
				369	// and r/m fields, so let's start by packing those fields together into
				370	// 5 bits.
				371	unsigned char modrm = (*start_byte);
				372	unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
				373	modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
				374	mod = mod >> 3; // shift the mod field to the right place
				375	modrm = mod \| modrm; // combine the r/m and mod fields as discussed
				376	mod = mod >> 3; // shift the mod field to bits 2..0
				377
				378	// Invariant: modrm contains the mod field in bits 4..3 and the r/m field
				379	// in bits 2..0, and mod contains the mod field in bits 2..0
				380
				381	const ModrmEntry* modrm_entry = 0;
				382	if (address_is_32_bits_)
				383	modrm_entry = &s_ia32_modrm_map_[modrm];
				384	else
				385	modrm_entry = &s_ia16_modrm_map_[modrm];
				386
				387	// Invariant: modrm_entry points to information that we need to decode
				388	// the ModR/M byte.
				389
				390	// Add to the count of operand bytes, if the ModR/M byte indicates
				391	// that some operands are encoded in the instruction.
				392	if (modrm_entry->is_encoded_in_instruction_)
				393	operand_bytes_ += modrm_entry->operand_size_;
				394
				395	// Process the SIB byte if necessary, and return the count
				396	// of ModR/M and SIB bytes.
				397	if (modrm_entry->use_sib_byte_) {
				398	size++;
				399	return ProcessSib(start_byte + 1, mod, size);
				400	} else {
				401	size++;
				402	return true;
				403	}
				404	}
				405
				406	bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
				407	unsigned char mod,
				408	unsigned int& size) {
				409	// get the mod field from the 2..0 bits of the SIB byte
				410	unsigned char sib_base = (*start_byte) & 0x07;
				411	if (0x05 == sib_base) {
				412	switch (mod) {
				413	case 0x00: // mod == 00
				414	case 0x02: // mod == 10
				415	operand_bytes_ += OS_DOUBLE_WORD;
				416	break;
				417	case 0x01: // mod == 01
				418	operand_bytes_ += OS_BYTE;
				419	break;
				420	case 0x03: // mod == 11
				421	// According to the IA-32 docs, there does not seem to be a disp
				422	// value for this value of mod
				423	default:
				424	break;
				425	}
				426	}
				427
				428	size++;
				429	return true;
				430	}
				431
				432	}; // namespace sidestep