blob: 35d7a9d5ffb1e8c9623c5e9ef2430586d348ffe7 [file] [log] [blame]
Austin Schuh745610d2015-09-06 18:19:50 -07001// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2/* Copyright (c) 2007, Google Inc.
3 * All rights reserved.
Brian Silverman20350ac2021-11-17 18:19:55 -08004 *
Austin Schuh745610d2015-09-06 18:19:50 -07005 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met:
Brian Silverman20350ac2021-11-17 18:19:55 -08008 *
Austin Schuh745610d2015-09-06 18:19:50 -07009 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following disclaimer
13 * in the documentation and/or other materials provided with the
14 * distribution.
15 * * Neither the name of Google Inc. nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
Brian Silverman20350ac2021-11-17 18:19:55 -080018 *
Austin Schuh745610d2015-09-06 18:19:50 -070019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * ---
32 * Author: Joi Sigurdsson
33 *
34 * Implementation of MiniDisassembler.
35 */
36
37#include "mini_disassembler.h"
38
39namespace sidestep {
40
41MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
42 bool address_default_is_32_bits)
43 : operand_default_is_32_bits_(operand_default_is_32_bits),
44 address_default_is_32_bits_(address_default_is_32_bits) {
45 Initialize();
46}
47
48MiniDisassembler::MiniDisassembler()
49 : operand_default_is_32_bits_(true),
50 address_default_is_32_bits_(true) {
51 Initialize();
52}
53
54InstructionType MiniDisassembler::Disassemble(
55 unsigned char* start_byte,
56 unsigned int& instruction_bytes) {
57 // Clean up any state from previous invocations.
58 Initialize();
59
60 // Start by processing any prefixes.
61 unsigned char* current_byte = start_byte;
62 unsigned int size = 0;
63 InstructionType instruction_type = ProcessPrefixes(current_byte, size);
64
65 if (IT_UNKNOWN == instruction_type)
66 return instruction_type;
67
68 current_byte += size;
69 size = 0;
70
71 // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
72 // and address_is_32_bits_ flags are correctly set.
73
74 instruction_type = ProcessOpcode(current_byte, 0, size);
75
76 // Check for error processing instruction
77 if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
78 return IT_UNKNOWN;
79 }
80
81 current_byte += size;
82
83 // Invariant: operand_bytes_ indicates the total size of operands
84 // specified by the opcode and/or ModR/M byte and/or SIB byte.
85 // pCurrentByte points to the first byte after the ModR/M byte, or after
86 // the SIB byte if it is present (i.e. the first byte of any operands
87 // encoded in the instruction).
88
89 // We get the total length of any prefixes, the opcode, and the ModR/M and
90 // SIB bytes if present, by taking the difference of the original starting
91 // address and the current byte (which points to the first byte of the
92 // operands if present, or to the first byte of the next instruction if
93 // they are not). Adding the count of bytes in the operands encoded in
94 // the instruction gives us the full length of the instruction in bytes.
95 instruction_bytes += operand_bytes_ + (current_byte - start_byte);
96
97 // Return the instruction type, which was set by ProcessOpcode().
98 return instruction_type_;
99}
100
101void MiniDisassembler::Initialize() {
102 operand_is_32_bits_ = operand_default_is_32_bits_;
103 address_is_32_bits_ = address_default_is_32_bits_;
104#ifdef _M_X64
105 operand_default_support_64_bits_ = true;
106#else
107 operand_default_support_64_bits_ = false;
108#endif
109 operand_is_64_bits_ = false;
110 operand_bytes_ = 0;
111 have_modrm_ = false;
112 should_decode_modrm_ = false;
113 instruction_type_ = IT_UNKNOWN;
114 got_f2_prefix_ = false;
115 got_f3_prefix_ = false;
116 got_66_prefix_ = false;
117}
118
119InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
120 unsigned int& size) {
121 InstructionType instruction_type = IT_GENERIC;
122 const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
123
124 switch (opcode.type_) {
125 case IT_PREFIX_ADDRESS:
126 address_is_32_bits_ = !address_default_is_32_bits_;
127 goto nochangeoperand;
128 case IT_PREFIX_OPERAND:
129 operand_is_32_bits_ = !operand_default_is_32_bits_;
130 nochangeoperand:
131 case IT_PREFIX:
132
133 if (0xF2 == (*start_byte))
134 got_f2_prefix_ = true;
135 else if (0xF3 == (*start_byte))
136 got_f3_prefix_ = true;
137 else if (0x66 == (*start_byte))
138 got_66_prefix_ = true;
139 else if (operand_default_support_64_bits_ && (*start_byte) & 0x48)
140 operand_is_64_bits_ = true;
141
142 instruction_type = opcode.type_;
143 size ++;
144 // we got a prefix, so add one and check next byte
145 ProcessPrefixes(start_byte + 1, size);
146 default:
147 break; // not a prefix byte
148 }
149
150 return instruction_type;
151}
152
153InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
154 unsigned int table_index,
155 unsigned int& size) {
156 const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table
157 unsigned char current_byte = (*start_byte) >> table.shift_;
158 current_byte = current_byte & table.mask_; // Mask out the bits we will use
159
160 // Check whether the byte we have is inside the table we have.
161 if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
162 instruction_type_ = IT_UNKNOWN;
163 return instruction_type_;
164 }
165
166 const Opcode& opcode = table.table_[current_byte];
167 if (IT_UNUSED == opcode.type_) {
168 // This instruction is not used by the IA-32 ISA, so we indicate
169 // this to the user. Probably means that we were pointed to
170 // a byte in memory that was not the start of an instruction.
171 instruction_type_ = IT_UNUSED;
172 return instruction_type_;
173 } else if (IT_REFERENCE == opcode.type_) {
174 // We are looking at an opcode that has more bytes (or is continued
175 // in the ModR/M byte). Recursively find the opcode definition in
176 // the table for the opcode's next byte.
177 size++;
178 ProcessOpcode(start_byte + 1, opcode.table_index_, size);
179 return instruction_type_;
180 }
181
182 const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode;
183 if (opcode.is_prefix_dependent_) {
184 if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
185 specific_opcode = &opcode.opcode_if_f2_prefix_;
186 } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
187 specific_opcode = &opcode.opcode_if_f3_prefix_;
188 } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
189 specific_opcode = &opcode.opcode_if_66_prefix_;
190 }
191 }
192
193 // Inv: The opcode type is known.
194 instruction_type_ = specific_opcode->type_;
195
196 // Let's process the operand types to see if we have any immediate
197 // operands, and/or a ModR/M byte.
198
199 ProcessOperand(specific_opcode->flag_dest_);
200 ProcessOperand(specific_opcode->flag_source_);
201 ProcessOperand(specific_opcode->flag_aux_);
202
203 // Inv: We have processed the opcode and incremented operand_bytes_
204 // by the number of bytes of any operands specified by the opcode
205 // that are stored in the instruction (not registers etc.). Now
206 // we need to return the total number of bytes for the opcode and
207 // for the ModR/M or SIB bytes if they are present.
208
209 if (table.mask_ != 0xff) {
210 if (have_modrm_) {
211 // we're looking at a ModR/M byte so we're not going to
212 // count that into the opcode size
213 ProcessModrm(start_byte, size);
214 return IT_GENERIC;
215 } else {
216 // need to count the ModR/M byte even if it's just being
217 // used for opcode extension
218 size++;
219 return IT_GENERIC;
220 }
221 } else {
222 if (have_modrm_) {
223 // The ModR/M byte is the next byte.
224 size++;
225 ProcessModrm(start_byte + 1, size);
226 return IT_GENERIC;
227 } else {
228 size++;
229 return IT_GENERIC;
230 }
231 }
232}
233
234bool MiniDisassembler::ProcessOperand(int flag_operand) {
235 bool succeeded = true;
236 if (AM_NOT_USED == flag_operand)
237 return succeeded;
238
239 // Decide what to do based on the addressing mode.
240 switch (flag_operand & AM_MASK) {
241 // No ModR/M byte indicated by these addressing modes, and no
242 // additional (e.g. immediate) parameters.
243 case AM_A: // Direct address
244 case AM_F: // EFLAGS register
245 case AM_X: // Memory addressed by the DS:SI register pair
246 case AM_Y: // Memory addressed by the ES:DI register pair
247 case AM_IMPLICIT: // Parameter is implicit, occupies no space in
248 // instruction
249 break;
250
251 // There is a ModR/M byte but it does not necessarily need
252 // to be decoded.
253 case AM_C: // reg field of ModR/M selects a control register
254 case AM_D: // reg field of ModR/M selects a debug register
255 case AM_G: // reg field of ModR/M selects a general register
256 case AM_P: // reg field of ModR/M selects an MMX register
257 case AM_R: // mod field of ModR/M may refer only to a general register
258 case AM_S: // reg field of ModR/M selects a segment register
259 case AM_T: // reg field of ModR/M selects a test register
260 case AM_V: // reg field of ModR/M selects a 128-bit XMM register
261 have_modrm_ = true;
262 break;
263
264 // In these addressing modes, there is a ModR/M byte and it needs to be
265 // decoded. No other (e.g. immediate) params than indicated in ModR/M.
266 case AM_E: // Operand is either a general-purpose register or memory,
267 // specified by ModR/M byte
268 case AM_M: // ModR/M byte will refer only to memory
269 case AM_Q: // Operand is either an MMX register or memory (complex
270 // evaluation), specified by ModR/M byte
271 case AM_W: // Operand is either a 128-bit XMM register or memory (complex
272 // eval), specified by ModR/M byte
273 have_modrm_ = true;
274 should_decode_modrm_ = true;
275 break;
276
277 // These addressing modes specify an immediate or an offset value
278 // directly, so we need to look at the operand type to see how many
279 // bytes.
280 case AM_I: // Immediate data.
281 case AM_J: // Jump to offset.
282 case AM_O: // Operand is at offset.
283 switch (flag_operand & OT_MASK) {
284 case OT_B: // Byte regardless of operand-size attribute.
285 operand_bytes_ += OS_BYTE;
286 break;
287 case OT_C: // Byte or word, depending on operand-size attribute.
288 if (operand_is_32_bits_)
289 operand_bytes_ += OS_WORD;
290 else
291 operand_bytes_ += OS_BYTE;
292 break;
293 case OT_D: // Doubleword, regardless of operand-size attribute.
294 operand_bytes_ += OS_DOUBLE_WORD;
295 break;
296 case OT_DQ: // Double-quadword, regardless of operand-size attribute.
297 operand_bytes_ += OS_DOUBLE_QUAD_WORD;
298 break;
299 case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
300 // attribute.
301 if (operand_is_32_bits_)
302 operand_bytes_ += OS_48_BIT_POINTER;
303 else
304 operand_bytes_ += OS_32_BIT_POINTER;
305 break;
306 case OT_PS: // 128-bit packed single-precision floating-point data.
307 operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
308 break;
309 case OT_Q: // Quadword, regardless of operand-size attribute.
310 operand_bytes_ += OS_QUAD_WORD;
311 break;
312 case OT_S: // 6-byte pseudo-descriptor.
313 operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
314 break;
315 case OT_SD: // Scalar Double-Precision Floating-Point Value
316 case OT_PD: // Unaligned packed double-precision floating point value
317 operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
318 break;
319 case OT_SS:
320 // Scalar element of a 128-bit packed single-precision
321 // floating data.
322 // We simply return enItUnknown since we don't have to support
323 // floating point
324 succeeded = false;
325 break;
Brian Silverman20350ac2021-11-17 18:19:55 -0800326 case OT_V: // Word, doubleword or quadword, depending on operand-size
Austin Schuh745610d2015-09-06 18:19:50 -0700327 // attribute.
328 if (operand_is_64_bits_ && flag_operand & AM_I &&
329 flag_operand & IOS_64)
330 operand_bytes_ += OS_QUAD_WORD;
331 else if (operand_is_32_bits_)
332 operand_bytes_ += OS_DOUBLE_WORD;
333 else
334 operand_bytes_ += OS_WORD;
335 break;
336 case OT_W: // Word, regardless of operand-size attribute.
337 operand_bytes_ += OS_WORD;
338 break;
339
340 // Can safely ignore these.
341 case OT_A: // Two one-word operands in memory or two double-word
342 // operands in memory
343 case OT_PI: // Quadword MMX technology register (e.g. mm0)
344 case OT_SI: // Doubleword integer register (e.g., eax)
345 break;
346
347 default:
348 break;
349 }
350 break;
351
352 default:
353 break;
354 }
355
356 return succeeded;
357}
358
359bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
360 unsigned int& size) {
361 // If we don't need to decode, we just return the size of the ModR/M
362 // byte (there is never a SIB byte in this case).
363 if (!should_decode_modrm_) {
364 size++;
365 return true;
366 }
367
368 // We never care about the reg field, only the combination of the mod
369 // and r/m fields, so let's start by packing those fields together into
370 // 5 bits.
371 unsigned char modrm = (*start_byte);
372 unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
373 modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
374 mod = mod >> 3; // shift the mod field to the right place
375 modrm = mod | modrm; // combine the r/m and mod fields as discussed
376 mod = mod >> 3; // shift the mod field to bits 2..0
377
378 // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
379 // in bits 2..0, and mod contains the mod field in bits 2..0
380
381 const ModrmEntry* modrm_entry = 0;
382 if (address_is_32_bits_)
383 modrm_entry = &s_ia32_modrm_map_[modrm];
384 else
385 modrm_entry = &s_ia16_modrm_map_[modrm];
386
387 // Invariant: modrm_entry points to information that we need to decode
388 // the ModR/M byte.
389
390 // Add to the count of operand bytes, if the ModR/M byte indicates
391 // that some operands are encoded in the instruction.
392 if (modrm_entry->is_encoded_in_instruction_)
393 operand_bytes_ += modrm_entry->operand_size_;
394
395 // Process the SIB byte if necessary, and return the count
396 // of ModR/M and SIB bytes.
397 if (modrm_entry->use_sib_byte_) {
398 size++;
399 return ProcessSib(start_byte + 1, mod, size);
400 } else {
401 size++;
402 return true;
403 }
404}
405
406bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
407 unsigned char mod,
408 unsigned int& size) {
409 // get the mod field from the 2..0 bits of the SIB byte
410 unsigned char sib_base = (*start_byte) & 0x07;
411 if (0x05 == sib_base) {
412 switch (mod) {
413 case 0x00: // mod == 00
414 case 0x02: // mod == 10
415 operand_bytes_ += OS_DOUBLE_WORD;
416 break;
417 case 0x01: // mod == 01
418 operand_bytes_ += OS_BYTE;
419 break;
420 case 0x03: // mod == 11
421 // According to the IA-32 docs, there does not seem to be a disp
422 // value for this value of mod
423 default:
424 break;
425 }
426 }
427
428 size++;
429 return true;
430}
431
432}; // namespace sidestep