Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame^] | 1 | // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- |
| 2 | /* Copyright (c) 2007, Google Inc. |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions are |
| 7 | * met: |
| 8 | * |
| 9 | * * Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * * Redistributions in binary form must reproduce the above |
| 12 | * copyright notice, this list of conditions and the following disclaimer |
| 13 | * in the documentation and/or other materials provided with the |
| 14 | * distribution. |
| 15 | * * Neither the name of Google Inc. nor the names of its |
| 16 | * contributors may be used to endorse or promote products derived from |
| 17 | * this software without specific prior written permission. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | * |
| 31 | * --- |
| 32 | * Author: Joi Sigurdsson |
| 33 | * |
| 34 | * Implementation of MiniDisassembler. |
| 35 | */ |
| 36 | |
| 37 | #include "mini_disassembler.h" |
| 38 | |
| 39 | namespace sidestep { |
| 40 | |
| 41 | MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, |
| 42 | bool address_default_is_32_bits) |
| 43 | : operand_default_is_32_bits_(operand_default_is_32_bits), |
| 44 | address_default_is_32_bits_(address_default_is_32_bits) { |
| 45 | Initialize(); |
| 46 | } |
| 47 | |
| 48 | MiniDisassembler::MiniDisassembler() |
| 49 | : operand_default_is_32_bits_(true), |
| 50 | address_default_is_32_bits_(true) { |
| 51 | Initialize(); |
| 52 | } |
| 53 | |
| 54 | InstructionType MiniDisassembler::Disassemble( |
| 55 | unsigned char* start_byte, |
| 56 | unsigned int& instruction_bytes) { |
| 57 | // Clean up any state from previous invocations. |
| 58 | Initialize(); |
| 59 | |
| 60 | // Start by processing any prefixes. |
| 61 | unsigned char* current_byte = start_byte; |
| 62 | unsigned int size = 0; |
| 63 | InstructionType instruction_type = ProcessPrefixes(current_byte, size); |
| 64 | |
| 65 | if (IT_UNKNOWN == instruction_type) |
| 66 | return instruction_type; |
| 67 | |
| 68 | current_byte += size; |
| 69 | size = 0; |
| 70 | |
| 71 | // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ |
| 72 | // and address_is_32_bits_ flags are correctly set. |
| 73 | |
| 74 | instruction_type = ProcessOpcode(current_byte, 0, size); |
| 75 | |
| 76 | // Check for error processing instruction |
| 77 | if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { |
| 78 | return IT_UNKNOWN; |
| 79 | } |
| 80 | |
| 81 | current_byte += size; |
| 82 | |
| 83 | // Invariant: operand_bytes_ indicates the total size of operands |
| 84 | // specified by the opcode and/or ModR/M byte and/or SIB byte. |
| 85 | // pCurrentByte points to the first byte after the ModR/M byte, or after |
| 86 | // the SIB byte if it is present (i.e. the first byte of any operands |
| 87 | // encoded in the instruction). |
| 88 | |
| 89 | // We get the total length of any prefixes, the opcode, and the ModR/M and |
| 90 | // SIB bytes if present, by taking the difference of the original starting |
| 91 | // address and the current byte (which points to the first byte of the |
| 92 | // operands if present, or to the first byte of the next instruction if |
| 93 | // they are not). Adding the count of bytes in the operands encoded in |
| 94 | // the instruction gives us the full length of the instruction in bytes. |
| 95 | instruction_bytes += operand_bytes_ + (current_byte - start_byte); |
| 96 | |
| 97 | // Return the instruction type, which was set by ProcessOpcode(). |
| 98 | return instruction_type_; |
| 99 | } |
| 100 | |
| 101 | void MiniDisassembler::Initialize() { |
| 102 | operand_is_32_bits_ = operand_default_is_32_bits_; |
| 103 | address_is_32_bits_ = address_default_is_32_bits_; |
| 104 | #ifdef _M_X64 |
| 105 | operand_default_support_64_bits_ = true; |
| 106 | #else |
| 107 | operand_default_support_64_bits_ = false; |
| 108 | #endif |
| 109 | operand_is_64_bits_ = false; |
| 110 | operand_bytes_ = 0; |
| 111 | have_modrm_ = false; |
| 112 | should_decode_modrm_ = false; |
| 113 | instruction_type_ = IT_UNKNOWN; |
| 114 | got_f2_prefix_ = false; |
| 115 | got_f3_prefix_ = false; |
| 116 | got_66_prefix_ = false; |
| 117 | } |
| 118 | |
| 119 | InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, |
| 120 | unsigned int& size) { |
| 121 | InstructionType instruction_type = IT_GENERIC; |
| 122 | const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; |
| 123 | |
| 124 | switch (opcode.type_) { |
| 125 | case IT_PREFIX_ADDRESS: |
| 126 | address_is_32_bits_ = !address_default_is_32_bits_; |
| 127 | goto nochangeoperand; |
| 128 | case IT_PREFIX_OPERAND: |
| 129 | operand_is_32_bits_ = !operand_default_is_32_bits_; |
| 130 | nochangeoperand: |
| 131 | case IT_PREFIX: |
| 132 | |
| 133 | if (0xF2 == (*start_byte)) |
| 134 | got_f2_prefix_ = true; |
| 135 | else if (0xF3 == (*start_byte)) |
| 136 | got_f3_prefix_ = true; |
| 137 | else if (0x66 == (*start_byte)) |
| 138 | got_66_prefix_ = true; |
| 139 | else if (operand_default_support_64_bits_ && (*start_byte) & 0x48) |
| 140 | operand_is_64_bits_ = true; |
| 141 | |
| 142 | instruction_type = opcode.type_; |
| 143 | size ++; |
| 144 | // we got a prefix, so add one and check next byte |
| 145 | ProcessPrefixes(start_byte + 1, size); |
| 146 | default: |
| 147 | break; // not a prefix byte |
| 148 | } |
| 149 | |
| 150 | return instruction_type; |
| 151 | } |
| 152 | |
| 153 | InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, |
| 154 | unsigned int table_index, |
| 155 | unsigned int& size) { |
| 156 | const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table |
| 157 | unsigned char current_byte = (*start_byte) >> table.shift_; |
| 158 | current_byte = current_byte & table.mask_; // Mask out the bits we will use |
| 159 | |
| 160 | // Check whether the byte we have is inside the table we have. |
| 161 | if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { |
| 162 | instruction_type_ = IT_UNKNOWN; |
| 163 | return instruction_type_; |
| 164 | } |
| 165 | |
| 166 | const Opcode& opcode = table.table_[current_byte]; |
| 167 | if (IT_UNUSED == opcode.type_) { |
| 168 | // This instruction is not used by the IA-32 ISA, so we indicate |
| 169 | // this to the user. Probably means that we were pointed to |
| 170 | // a byte in memory that was not the start of an instruction. |
| 171 | instruction_type_ = IT_UNUSED; |
| 172 | return instruction_type_; |
| 173 | } else if (IT_REFERENCE == opcode.type_) { |
| 174 | // We are looking at an opcode that has more bytes (or is continued |
| 175 | // in the ModR/M byte). Recursively find the opcode definition in |
| 176 | // the table for the opcode's next byte. |
| 177 | size++; |
| 178 | ProcessOpcode(start_byte + 1, opcode.table_index_, size); |
| 179 | return instruction_type_; |
| 180 | } |
| 181 | |
| 182 | const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode; |
| 183 | if (opcode.is_prefix_dependent_) { |
| 184 | if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { |
| 185 | specific_opcode = &opcode.opcode_if_f2_prefix_; |
| 186 | } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { |
| 187 | specific_opcode = &opcode.opcode_if_f3_prefix_; |
| 188 | } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { |
| 189 | specific_opcode = &opcode.opcode_if_66_prefix_; |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | // Inv: The opcode type is known. |
| 194 | instruction_type_ = specific_opcode->type_; |
| 195 | |
| 196 | // Let's process the operand types to see if we have any immediate |
| 197 | // operands, and/or a ModR/M byte. |
| 198 | |
| 199 | ProcessOperand(specific_opcode->flag_dest_); |
| 200 | ProcessOperand(specific_opcode->flag_source_); |
| 201 | ProcessOperand(specific_opcode->flag_aux_); |
| 202 | |
| 203 | // Inv: We have processed the opcode and incremented operand_bytes_ |
| 204 | // by the number of bytes of any operands specified by the opcode |
| 205 | // that are stored in the instruction (not registers etc.). Now |
| 206 | // we need to return the total number of bytes for the opcode and |
| 207 | // for the ModR/M or SIB bytes if they are present. |
| 208 | |
| 209 | if (table.mask_ != 0xff) { |
| 210 | if (have_modrm_) { |
| 211 | // we're looking at a ModR/M byte so we're not going to |
| 212 | // count that into the opcode size |
| 213 | ProcessModrm(start_byte, size); |
| 214 | return IT_GENERIC; |
| 215 | } else { |
| 216 | // need to count the ModR/M byte even if it's just being |
| 217 | // used for opcode extension |
| 218 | size++; |
| 219 | return IT_GENERIC; |
| 220 | } |
| 221 | } else { |
| 222 | if (have_modrm_) { |
| 223 | // The ModR/M byte is the next byte. |
| 224 | size++; |
| 225 | ProcessModrm(start_byte + 1, size); |
| 226 | return IT_GENERIC; |
| 227 | } else { |
| 228 | size++; |
| 229 | return IT_GENERIC; |
| 230 | } |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | bool MiniDisassembler::ProcessOperand(int flag_operand) { |
| 235 | bool succeeded = true; |
| 236 | if (AM_NOT_USED == flag_operand) |
| 237 | return succeeded; |
| 238 | |
| 239 | // Decide what to do based on the addressing mode. |
| 240 | switch (flag_operand & AM_MASK) { |
| 241 | // No ModR/M byte indicated by these addressing modes, and no |
| 242 | // additional (e.g. immediate) parameters. |
| 243 | case AM_A: // Direct address |
| 244 | case AM_F: // EFLAGS register |
| 245 | case AM_X: // Memory addressed by the DS:SI register pair |
| 246 | case AM_Y: // Memory addressed by the ES:DI register pair |
| 247 | case AM_IMPLICIT: // Parameter is implicit, occupies no space in |
| 248 | // instruction |
| 249 | break; |
| 250 | |
| 251 | // There is a ModR/M byte but it does not necessarily need |
| 252 | // to be decoded. |
| 253 | case AM_C: // reg field of ModR/M selects a control register |
| 254 | case AM_D: // reg field of ModR/M selects a debug register |
| 255 | case AM_G: // reg field of ModR/M selects a general register |
| 256 | case AM_P: // reg field of ModR/M selects an MMX register |
| 257 | case AM_R: // mod field of ModR/M may refer only to a general register |
| 258 | case AM_S: // reg field of ModR/M selects a segment register |
| 259 | case AM_T: // reg field of ModR/M selects a test register |
| 260 | case AM_V: // reg field of ModR/M selects a 128-bit XMM register |
| 261 | have_modrm_ = true; |
| 262 | break; |
| 263 | |
| 264 | // In these addressing modes, there is a ModR/M byte and it needs to be |
| 265 | // decoded. No other (e.g. immediate) params than indicated in ModR/M. |
| 266 | case AM_E: // Operand is either a general-purpose register or memory, |
| 267 | // specified by ModR/M byte |
| 268 | case AM_M: // ModR/M byte will refer only to memory |
| 269 | case AM_Q: // Operand is either an MMX register or memory (complex |
| 270 | // evaluation), specified by ModR/M byte |
| 271 | case AM_W: // Operand is either a 128-bit XMM register or memory (complex |
| 272 | // eval), specified by ModR/M byte |
| 273 | have_modrm_ = true; |
| 274 | should_decode_modrm_ = true; |
| 275 | break; |
| 276 | |
| 277 | // These addressing modes specify an immediate or an offset value |
| 278 | // directly, so we need to look at the operand type to see how many |
| 279 | // bytes. |
| 280 | case AM_I: // Immediate data. |
| 281 | case AM_J: // Jump to offset. |
| 282 | case AM_O: // Operand is at offset. |
| 283 | switch (flag_operand & OT_MASK) { |
| 284 | case OT_B: // Byte regardless of operand-size attribute. |
| 285 | operand_bytes_ += OS_BYTE; |
| 286 | break; |
| 287 | case OT_C: // Byte or word, depending on operand-size attribute. |
| 288 | if (operand_is_32_bits_) |
| 289 | operand_bytes_ += OS_WORD; |
| 290 | else |
| 291 | operand_bytes_ += OS_BYTE; |
| 292 | break; |
| 293 | case OT_D: // Doubleword, regardless of operand-size attribute. |
| 294 | operand_bytes_ += OS_DOUBLE_WORD; |
| 295 | break; |
| 296 | case OT_DQ: // Double-quadword, regardless of operand-size attribute. |
| 297 | operand_bytes_ += OS_DOUBLE_QUAD_WORD; |
| 298 | break; |
| 299 | case OT_P: // 32-bit or 48-bit pointer, depending on operand-size |
| 300 | // attribute. |
| 301 | if (operand_is_32_bits_) |
| 302 | operand_bytes_ += OS_48_BIT_POINTER; |
| 303 | else |
| 304 | operand_bytes_ += OS_32_BIT_POINTER; |
| 305 | break; |
| 306 | case OT_PS: // 128-bit packed single-precision floating-point data. |
| 307 | operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; |
| 308 | break; |
| 309 | case OT_Q: // Quadword, regardless of operand-size attribute. |
| 310 | operand_bytes_ += OS_QUAD_WORD; |
| 311 | break; |
| 312 | case OT_S: // 6-byte pseudo-descriptor. |
| 313 | operand_bytes_ += OS_PSEUDO_DESCRIPTOR; |
| 314 | break; |
| 315 | case OT_SD: // Scalar Double-Precision Floating-Point Value |
| 316 | case OT_PD: // Unaligned packed double-precision floating point value |
| 317 | operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; |
| 318 | break; |
| 319 | case OT_SS: |
| 320 | // Scalar element of a 128-bit packed single-precision |
| 321 | // floating data. |
| 322 | // We simply return enItUnknown since we don't have to support |
| 323 | // floating point |
| 324 | succeeded = false; |
| 325 | break; |
| 326 | case OT_V: // Word, doubleword or quadword, depending on operand-size |
| 327 | // attribute. |
| 328 | if (operand_is_64_bits_ && flag_operand & AM_I && |
| 329 | flag_operand & IOS_64) |
| 330 | operand_bytes_ += OS_QUAD_WORD; |
| 331 | else if (operand_is_32_bits_) |
| 332 | operand_bytes_ += OS_DOUBLE_WORD; |
| 333 | else |
| 334 | operand_bytes_ += OS_WORD; |
| 335 | break; |
| 336 | case OT_W: // Word, regardless of operand-size attribute. |
| 337 | operand_bytes_ += OS_WORD; |
| 338 | break; |
| 339 | |
| 340 | // Can safely ignore these. |
| 341 | case OT_A: // Two one-word operands in memory or two double-word |
| 342 | // operands in memory |
| 343 | case OT_PI: // Quadword MMX technology register (e.g. mm0) |
| 344 | case OT_SI: // Doubleword integer register (e.g., eax) |
| 345 | break; |
| 346 | |
| 347 | default: |
| 348 | break; |
| 349 | } |
| 350 | break; |
| 351 | |
| 352 | default: |
| 353 | break; |
| 354 | } |
| 355 | |
| 356 | return succeeded; |
| 357 | } |
| 358 | |
| 359 | bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, |
| 360 | unsigned int& size) { |
| 361 | // If we don't need to decode, we just return the size of the ModR/M |
| 362 | // byte (there is never a SIB byte in this case). |
| 363 | if (!should_decode_modrm_) { |
| 364 | size++; |
| 365 | return true; |
| 366 | } |
| 367 | |
| 368 | // We never care about the reg field, only the combination of the mod |
| 369 | // and r/m fields, so let's start by packing those fields together into |
| 370 | // 5 bits. |
| 371 | unsigned char modrm = (*start_byte); |
| 372 | unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field |
| 373 | modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field |
| 374 | mod = mod >> 3; // shift the mod field to the right place |
| 375 | modrm = mod | modrm; // combine the r/m and mod fields as discussed |
| 376 | mod = mod >> 3; // shift the mod field to bits 2..0 |
| 377 | |
| 378 | // Invariant: modrm contains the mod field in bits 4..3 and the r/m field |
| 379 | // in bits 2..0, and mod contains the mod field in bits 2..0 |
| 380 | |
| 381 | const ModrmEntry* modrm_entry = 0; |
| 382 | if (address_is_32_bits_) |
| 383 | modrm_entry = &s_ia32_modrm_map_[modrm]; |
| 384 | else |
| 385 | modrm_entry = &s_ia16_modrm_map_[modrm]; |
| 386 | |
| 387 | // Invariant: modrm_entry points to information that we need to decode |
| 388 | // the ModR/M byte. |
| 389 | |
| 390 | // Add to the count of operand bytes, if the ModR/M byte indicates |
| 391 | // that some operands are encoded in the instruction. |
| 392 | if (modrm_entry->is_encoded_in_instruction_) |
| 393 | operand_bytes_ += modrm_entry->operand_size_; |
| 394 | |
| 395 | // Process the SIB byte if necessary, and return the count |
| 396 | // of ModR/M and SIB bytes. |
| 397 | if (modrm_entry->use_sib_byte_) { |
| 398 | size++; |
| 399 | return ProcessSib(start_byte + 1, mod, size); |
| 400 | } else { |
| 401 | size++; |
| 402 | return true; |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | bool MiniDisassembler::ProcessSib(unsigned char* start_byte, |
| 407 | unsigned char mod, |
| 408 | unsigned int& size) { |
| 409 | // get the mod field from the 2..0 bits of the SIB byte |
| 410 | unsigned char sib_base = (*start_byte) & 0x07; |
| 411 | if (0x05 == sib_base) { |
| 412 | switch (mod) { |
| 413 | case 0x00: // mod == 00 |
| 414 | case 0x02: // mod == 10 |
| 415 | operand_bytes_ += OS_DOUBLE_WORD; |
| 416 | break; |
| 417 | case 0x01: // mod == 01 |
| 418 | operand_bytes_ += OS_BYTE; |
| 419 | break; |
| 420 | case 0x03: // mod == 11 |
| 421 | // According to the IA-32 docs, there does not seem to be a disp |
| 422 | // value for this value of mod |
| 423 | default: |
| 424 | break; |
| 425 | } |
| 426 | } |
| 427 | |
| 428 | size++; |
| 429 | return true; |
| 430 | } |
| 431 | |
| 432 | }; // namespace sidestep |