Squashed 'third_party/elfutils/' content from commit 555e15e

Change-Id: I61cde98949e47e5c8c09c33260de17f30921be79
git-subtree-dir: third_party/elfutils
git-subtree-split: 555e15ebe8bf1eb33d00747173cfc80cc65648a4
diff --git a/libcpu/ChangeLog b/libcpu/ChangeLog
new file mode 100644
index 0000000..86d2947
--- /dev/null
+++ b/libcpu/ChangeLog
@@ -0,0 +1,457 @@
+2018-02-09  Joshua Watt  <JPEWhacker@gmail.com>
+
+	* i386_disasm.c (i386_disasm): Use FALLTHOUGH macro instead of
+	comment.
+
+2017-08-18  Ulf Hermann  <ulf.hermann@qt.io>
+
+	* memory-access.h: Use attribute_packed.
+
+2017-02-27  Ulf Hermann  <ulf.hermann@qt.io>
+
+	* Makefile.am: Use fpic_CFLAGS.
+
+2017-07-18  Mark Wielaard  <mark@klomp.org>
+
+	* Makefile.am: Don't check HAVE_LINUX_BPF_H, just define libcpu_bpf.
+	* bpf_disasm.c: Include bpf.h instead of linux/bpf.h. Don't define
+	BPF_PSEUDO_MAP_FD.
+
+2017-04-20  Ulf Hermann <ulf.hermann@qt.io>
+
+	* Makefile.am: Add EXEEXT to gendis.
+
+2017-04-20  Ulf Hermann  <ulf.hermann@qt.io>
+
+	* i386_parse.y: Eliminate comparison_fn_t.
+
+2016-11-02  Mark Wielaard  <mjw@redhat.com>
+
+	* i386_disasm.c (i386_disasm): Add fallthrough comment.
+
+2016-10-11  Akihiko Odaki  <akihiko.odaki.4i@stu.hosei.ac.jp>
+
+	* i386_lex.l: Remove system.h include, add libeu.h include.
+	* i386_parse.y: Remove sys/param.h include, add libeu.h include.
+	* i386_disasm.c: Remove sys/param.h.
+
+2016-09-05  Mark Wielaard  <mjw@redhat.com>
+
+	* bpf_disasm.c: Define BPF_PSEUDO_MAP_FD if undefined.
+
+2016-08-10  Richard Henderson  <rth@redhat.com>
+
+	* bpf_disasm.c (bpf_disasm): Rearrange the printing of instructions
+	to use exactly the operands required.
+
+2016-06-28  Richard Henderson  <rth@redhat.com>
+
+	* Makefile.am (noinst_LIBRARIES): Add libcpu_bpf.a.
+	(libcpu_bpf_a_SOURCES, libcpu_bpf_a_CFLAGS): New.
+	* bpf_disasm.c: New file.
+	* i386_disasm.c (i386_disasm): Add ebl parameter.
+
+2015-10-05  Josh Stone  <jistone@redhat.com>
+
+	* Makefile.am (%_defs): Add AM_V_GEN and AM_V_at silencers.
+	($(srcdir)/%_dis.h): Ditto.
+	(%.mnemonics): Add AM_V_GEN silencer.
+
+2014-10-29  Jose E. Marchesi  <jose.marchesi@oracle.com>
+
+	* Makefile.am (AM_CFLAGS): Use -fPIC instead of -fpic to avoid
+	relocation overflows in some platforms.
+
+2014-04-13  Mark Wielaard  <mjw@redhat.com>
+
+	* Makefile.am (i386_gendis_LDADD): Remove libmudflap.
+
+2013-04-24  Mark Wielaard  <mjw@redhat.com>
+
+	* Makefile.am: Use AM_CPPFLAGS instead of INCLUDES.
+
+2012-10-10  Roland McGrath  <roland@hack.frob.com>
+
+	* Makefile.am (%_defs, $(srcdir)/%_dis.h): Redirect to temp file,
+	mv into place with separate command.
+
+2012-06-26  Roland McGrath  <roland@hack.frob.com>
+
+	* Makefile.am [!MAINTAINER_MODE] ($(srcdir)/%_dis.h): New rule.
+
+2012-02-24  Mark Wielaard  <mjw@redhat.com>
+
+	* Makefile.am (CLEANFILES): Move %_dis.h to...
+	(MAINTAINERCLEANFILES): here.
+
+2012-01-21  Ulrich Drepper  <drepper@gmail.com>
+
+	* i386_disasm.c (ADD_NSTRING): Define.
+	(i386_disasm): Print color codes in the appropriate places.
+
+2011-10-16  Roland McGrath  <roland@hack.frob.com>
+
+	* Makefile.am (libcpu_i386_a_SOURCES): Add i386_dis.h.
+	(libcpu_x86_64_a_SOURCES): Add x86_64_dis.h.
+	(i386_disasm.o, x86_64_disasm.o): Depend on those in $(srcdir).
+	(%_dis.h): Renamed target pattern to ...
+	($(srcdir)/%_dis.h): ... this.
+	(noinst_HEADERS, noinst_PROGRAMS): Put under [MAINTAINER_MODE].
+
+2010-08-16  Roland McGrath  <roland@redhat.com>
+
+	* Makefile.am (%_defs): New pattern rule.
+	(%_dis.h, %.mnemonics): Define as pattern rules using %_defs input.
+	(CLEANFILES): Include all those files.
+
+2010-02-15  Roland McGrath  <roland@redhat.com>
+
+	* Makefile.am: Use config/eu.am for common stuff.
+
+2009-04-14  Roland McGrath  <roland@redhat.com>
+
+	* Makefile.am (AM_CFLAGS): Add -fdollars-in-identifiers; it is not the
+	default on every machine.
+
+2009-01-23  Roland McGrath  <roland@redhat.com>
+
+	* Makefile.am (i386_parse_CFLAGS): Use quotes around command
+	substitution that can produce leading whitespace.
+
+2009-01-01  Ulrich Drepper  <drepper@redhat.com>
+
+	* i386_parse.y (instrtable_out): Optimize match_data table by not
+	emitting 0xff masks for leading bytes.
+	* i386_disasm.c (i386_disasm): Adjust reader of match_data.
+
+	* i386_disasm.c (i386_disasm): Reset bufcnt when not matched.  We
+	don't expect snprintf to fail.
+
+2008-12-31  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Add dppd, dpps, insertps, movntdqa, mpsadbw, packusdw,
+	pblendvb, pblendw, pcmpeqq, pcmpestri, pcmpestrm, pcmpistri, pcmpistrm,
+	pcmpgtq, phminposuw, pinsrb, pinsrd, pmaxsb, pmaxsd, pmaxud, pmaxuw,
+	pminsb, pminsd, pminud, pminuw, pmovsxbw, pmovsxbd, pmovsxbq, pmovsxwd,
+	pmovsxwq, pmovsxdq, pmovzxbw, pmovzxbd, pmovzxbq, pmovzxwd, pmovzxwq,
+	pmovzxdq, pmuldq, pmulld, popcnt, ptest, roundss, roundps, roundpd,
+	and roundsd opcodes.
+
+	* i386_disasm.c (i386_disasm): Correct resizing of buffer.
+
+	* i386_parse.y (struct argstring): Add off element.
+	(off_op_str): New global variable.
+	(print_op_str): Print strings as concatenated strings.  Keep track
+	of index and length.  Update ->off element.
+	(print_op_str_idx): New function.
+	(instrtable_out): Mark op%d_fct as const.
+	Emit two tables for the strings: the string itself (op%d_str) and the
+	index table (op%d_str_idx).
+	* i386_disasm.c (i386_disasm): Adjust for new op%d_str definition.
+
+	* i386_disasm.c [X86_64] (i386_disasm): Handle rex prefix when
+	printing only prefix.
+
+	* i386_disasm.c (i386_disasm): Minor optimizations.
+
+	* i386_parse.y (instrtable_out): No need to emit index, the reader can
+	keep track.
+	* i386_disasm.c (i386_disasm): The index is not emitted anymore, no
+	need to skip it.
+
+	* i386_disasm.c (amd3dnow): Mark as const.
+
+	* defs/i386: Add blendvpd and blendvps opcodes.
+
+2008-12-30  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Add blendpd and blendps opcodes.
+
+2008-12-19  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Add entry for AMD 3DNOW.
+	* i386_disasm.c: Implement AMD 3DNOW disassembly.
+
+2008-12-17  Ulrich Drepper  <drepper@redhat.com>
+
+	* i386_disasm.c (i386_disasm): If instruction matches prefix,
+	undoing the prefix match finishes the instruction.
+
+2008-01-21  Roland McGrath  <roland@redhat.com>
+
+	* defs/i386: Fix typo in comment.
+	* i386_disasm.c (i386_disasm): Handle cltq, cqto.
+
+	* i386_parse.y: Add sanity check for NMNES macro value.
+	* Makefile.am (i386_parse.o): Fix target in dependency rule.
+	(i386_parse.h): New target with empty commands.
+	(i386_lex.o): Depend on it in place of i386_parse.c.
+
+2008-01-21  Ulrich Drepper  <drepper@redhat.com>
+
+	* Makefile.am (EXTRA_DIST): Remove defs/x86_64.
+
+2008-01-14  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Add fixes for opcodes with register number in opcode,
+	64-bit immediate forms, nop with rex.B.
+	* i386_data.h [X86_64] (FCT_imm64$w): New function.
+	(FCT_oreg): New function.
+	(FCT_oreg$w): New function.
+	* i386_disasm.c (i386_disasm): Reinitialize fmt always before
+	starting the loop to process the string.  Handle 0x90 special for
+	x86-64.
+	* i386_parse.y (fillin_arg): Expand synonyms before concatening to
+	form the function name.
+
+2008-01-11  Ulrich Drepper  <drepper@redhat.com>
+
+	* i386_disasm.c (struct output_buffer): Remove symcb and symcbarg.
+	(i386_disasm): Remove appropriate initializers.
+	Use symcb to lookup symbol strings.
+
+	* i386_disasm.c (struct output_buffer): Add labelbuf, labelbufsize,
+	symaddr_use, and symaddr fields.
+	(i386_disasm): Remove labelbuf and labelbufsize variables.
+	Add back %e format.  Implement %a and %l formats.
+
+	* i386_data.h (general_mod$r_m): Set symaddr_use and symaddr for %rip
+	base addressing.
+
+	* i386_disasm.c (i386_disasm): Resize output buffer if necessary.
+	Optimize output_data initialization.  Free buffers before return.
+	(struct output_data): Remove op1str field.  Adjust code.
+	(i386_disasm): Store final NUL btye at end of functions.
+
+2008-01-10  Ulrich Drepper  <drepper@redhat.com>
+
+	* i386_data.h (FCT_crdb): New function.
+	(FCT_ccc): Use FCT_crdb.
+	(FCT_ddd): Likewise.
+
+	* defs/i386: Fix a few instructions with immediate arguments.
+
+	* i386_disasm.c: Rewrite interface to callback functions for operands
+	to take a single pointer to a structure.
+	* i386_data.h: Adjust all functions.
+
+2008-01-08  Ulrich Drepper  <drepper@redhat.com>
+
+	* Makefile.am: Enable x86-64 again.
+	* defs/i386: Lots of changes for x86-64.
+	* i386_data.h: Add support for use in x86-64 disassembler.
+	* i386_disasm.c: Likewise.
+	* i386_parse.y: Likewise.
+	* defs/x86_64: Removed.
+
+2008-01-04  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Cleanups, remove masks which are not needed.
+	Add remaining Intel opcodes.
+	* i386_data.h (FCT_imm8): Check for input buffer overrun.
+	* i386_disasm.c (i386_disasm): Likewise.
+	* i386_parse.y: Remove suffixes which are not needed anymore.
+
+2008-01-03  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Add yet more SSE instructions.
+
+2008-01-02  Ulrich Drepper  <drepper@redhat.com>
+
+	* i386_disasm.c (i386_disasm): Extend matcher to allow tables to
+	contain instructions with prefixes.
+	* defs/i386: Use for many SSE operations.
+	* i386_data.h (FCT_mmxreg2): Removed.
+
+2008-01-01  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: More 0f prefix support.
+	* i386_data.h (FCT_mmxreg): Implement.
+	(FCT_mmxreg2): Implement.
+	(FCT_mmreg): Remove.
+	* i386_disasm.c (i386_disasm): More special instructions.
+	Fix tttn suffix for cmov.
+	* i386_parse.y: Simplify test for mod/r_m mode.
+
+2007-12-31  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Fix order or arguments for mov of control/debug registers.
+	* i386_data.h (FCT_ccc): Implement
+	(FCT_ddd): Implement
+
+2007-12-30  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Fix 0f groups 6 and 7.
+	* i386_data.c (FCT_mod$16r_m): Implement.
+	* i386_disasm.c (i386_disasm): Third parameter can also have string.
+
+2007-12-29  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Add lots of floating point ops.
+	* i386_data.h (FCT_fmod$fr_m): Removed.
+	(FCT_freg): Implement.
+	* i386_disasm.c (i386_disasm): Implement suffix_D.
+	* i386_parse.y: Emit suffix_D.
+
+	* defs/i386: Use rel instead of dispA.
+	Fix lcall, dec, div, idiv, imul, inc, jmp, ljmp, mul, neg, not, push,
+	test.
+
+	* i386_data.h (FCT_dispA): Removed.
+	(FCT_ds_xx): Add test for end of input buffer.
+	* i386_disasm.c (ABORT_ENTRY): Removed.
+	(i386_disasm): Fix handling of SIB.  Pass correct address value to
+	operand callbacks.
+
+	* Makefile.am (*.mnemonics): Filter out INVALID entry.
+	* defs/i386: Define imms8 and use in appropriate places.
+	Add INVALID entries for special opcodes with special mnemonics.
+	Fix int3.  Fix typo in shl.  Correct xlat.
+	* i386_data.h (FCT_ds_xx): New function.
+	(FCT_ds_si): Use it.
+	(FCT_ds_bx): New function.
+	(FCT_imms8): New function.
+	* i386_disasm.c (MNE_INVALID): Define.
+	(i386_disasm): Handle invalid opcodes in mnemonics printing, not
+	separately.  Fix address value passed to operand handlers.
+	* i386_parse.y (bx_reg): Define.
+	(instrtable_out): Handle INVALID entries differently, just use
+	MNE_INVALID value for .mnemonic.
+
+2007-12-28  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Fix shift and mov immediate instructions.
+	* i386_data.h (FCT_imm16): Implement.
+
+	* defs/i386: Use absval instead of abs of lcall and ljmp.
+	Add parameters for cmps.  Fix test and mov immediate.
+	* i386_data.h: Implement FCT_absval.
+	* i386_disasm.c: Handle data16 for suffix_w  and FCT_imm.
+
+	* defs/i386: Move entries with 0x9b prefix together.
+	* i386_disasm.c (i386_disasm): Fix recognizing insufficient bytes in
+	input.  Handle data16 with suffix_W.
+
+	* i386_data.h (FCT_*): Add end parameter to all functions.  Check
+	before using more bytes.
+	(FCT_sel): Implement.
+	* i386_disasm.c (i386_disasm): Better handle end of input buffer.
+	Specal opcode 0x99.
+
+	* Makefile.am: Use m4 to preprocess defs/* files.
+	* defs/i386: Adjust appropriately.
+	* i386_data.c (FCT_ax): Implement.
+	(FCT_ax$w): Use FCT_ax.
+	* i386_disasm.c (ADD_STRING): Use _len instead of len.
+	(i386_disasm): If no instruction can be matched because of lack of
+	input and prefixes have been matched, print prefixes.
+	Recognize abort entries.
+	Handle special cases.
+	* i386_gendis.c: Recognize - input file name.
+	* i386_lex.c: Recognize INVALID token.
+	* i386_parse.y: Handle INVALID token input.
+
+	* defs/i386: Fix mov, pop.
+	* i386_data.h (FCT_sreg3): Implement.
+
+2007-12-27  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Fix adc, add, cmp, or, sbb, sub, xchg, xor.
+	* i386_data.h (FCT_imms): New function.
+	(FCT_imm$s): Use FCT_imms for handling of signed values.
+	(FCT_imm8): Sign extend values.
+	* i386_disasm.c (i386_disasm): Implement suffix_w0.
+	* i386_parse.y: Emit suffix w0.
+
+	* i386_data.h (FCT_disp8): Add 0x prefix.
+	(FCT_ds_si): Implement.
+	* i386_disasm.c (i386_disasm): Increment addr for invalid prefixes.
+	Implement tttn suffix.
+	* i386_parse.y: Emit tttn suffix definition.
+
+2007-12-26  Ulrich Drepper  <drepper@redhat.com>
+
+	* i386_data.h (struct instr_enc): Use suffix field.
+	(FCT_dx): Fill in body.
+	(FCT_es_di): Likewise.
+	(FCT_imm$s): Sign-extended byte values.
+	* i386_disasm.c: Protect ADD_CHAR and ADD_STRING macros.  Adjust uses.
+	(i386_disasm): Handle suffix.
+	* i386_parse.y: Emit suffix information.
+	* defs/i386: Remove unnecessary suffixes.
+
+	* Makefile.am: Disable building x86-64 version for now.
+
+	* defs/i386: Fix and, bound, cmp, or, pop, sbb, sub, xor.
+	* i386_data.h: Pass pointer to prefix to functions.  If not prefixes
+	are consumed this means invalid input.
+	* i386_disasm.c: Fix prefix printing.  Adjust function calls for
+	parameter change.
+	* i386_parse.y: Recognize moda prefix.
+
+2007-12-21  Ulrich Drepper  <drepper@redhat.com>
+
+	* i386_data.h: Fix SIB handling.
+	* i386_disasm.c: Likewise.
+
+2007-12-19  Ulrich Drepper  <drepper@redhat.com>
+
+	* defs/i386: Fix up 'and' opcode.
+
+2007-10-31  Ulrich Drepper  <drepper@redhat.com>
+
+	* Makefile.am: Add dependencies of the generated files on the source
+	files.
+	(i386_lex_CFLAGS): Add -Wno-sign-compare.
+
+	* defs/i386: A lot more data.
+	* defs/x86_64: Likewise.
+	* i386_data.h (struct instr_enc): Add off1_3, off2_3, and off3_3
+	fields.
+	(opfct_t): Add parameter for third operand.
+	(FCT_*): Likewise.
+	(data_prefix): New function.
+	(FCT_abs): Implement.
+	(FCT_ax): Renamed to FCT_ax$w amd implement.
+	(FCT_disp8): Implement.
+	(FCT_dispA): Implement.
+	(FCT_imm): Implement.
+	(FCT_imm$w): Implement.
+	(FCT_imm$s): Don't zero-pad numbers.
+	(FCT_imm8): Likewise.
+	(FCT_rel): Likewise.
+	(general_mod$r_m): New function.
+	(FCT_mod$r_m): Use it.
+	(FCT_mod$r_m$w): New function.
+	(FCT_mod$8r_m): New function.
+	(FCT_reg): Correctly handle 16-bit registers.
+	(FCT_reg$w): New function.
+	* i386_disasm.c (i386_disasm): Handle prefixes better.
+	Pass third parameter to operand functions.
+	* i386_parse.y (struct instruction): Add off3 field.
+	Handle third operand throughout.
+
+2007-02-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* i386_disasm.c: New file.
+	* i386_data.h: New file.
+	* i386_gendis.c: New file.
+	* i386_lex.l: New file.
+	* i386_parse.y: New file.
+	* memory-access.h: New file.
+	* x86_64_disasm.c: New file.
+	* defs/i386: New file.
+	* defs/i386.doc: New file.
+	* defs/x86_64: New file.
+
+2005-02-15  Ulrich Drepper  <drepper@redhat.com>
+
+	* Makefile (AM_CFLAGS): Add -Wunused -Wextra -Wformat=2.
+
+2005-02-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* Makefile.am (AM_CFLAGS): Define, instead of adding things to DEFS.
+
+2003-08-11  Ulrich Drepper  <drepper@redhat.com>
+
+	* Moved to CVS archive.
diff --git a/libcpu/Makefile.am b/libcpu/Makefile.am
new file mode 100644
index 0000000..4c8778d
--- /dev/null
+++ b/libcpu/Makefile.am
@@ -0,0 +1,92 @@
+## Process this file with automake to create Makefile.in
+##
+## Copyright (C) 2002-2012 Red Hat, Inc.
+## This file is part of elfutils.
+##
+## This file is free software; you can redistribute it and/or modify
+## it under the terms of either
+##
+##   * the GNU Lesser General Public License as published by the Free
+##     Software Foundation; either version 3 of the License, or (at
+##     your option) any later version
+##
+## or
+##
+##   * the GNU General Public License as published by the Free
+##     Software Foundation; either version 2 of the License, or (at
+##     your option) any later version
+##
+## or both in parallel, as here.
+##
+## elfutils is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received copies of the GNU General Public License and
+## the GNU Lesser General Public License along with this program.  If
+## not, see <http://www.gnu.org/licenses/>.
+##
+include $(top_srcdir)/config/eu.am
+AM_CPPFLAGS += -I$(srcdir)/../libelf -I$(srcdir)/../libebl \
+	    -I$(srcdir)/../libdw -I$(srcdir)/../libasm
+AM_CFLAGS += $(fpic_CFLAGS) -fdollars-in-identifiers
+LEXCOMPILE = $(LEX) $(LFLAGS) $(AM_LFLAGS) -P$(<F:lex.l=)
+LEX_OUTPUT_ROOT = lex.$(<F:lex.l=)
+AM_YFLAGS = -p$(<F:parse.y=)
+
+noinst_LIBRARIES = libcpu_i386.a libcpu_x86_64.a
+
+libcpu_i386_a_SOURCES = i386_disasm.c i386_dis.h
+libcpu_x86_64_a_SOURCES = x86_64_disasm.c x86_64_dis.h
+
+i386_gendis_SOURCES = i386_gendis.c i386_lex.l i386_parse.y
+
+i386_disasm.o: i386.mnemonics $(srcdir)/i386_dis.h
+x86_64_disasm.o: x86_64.mnemonics $(srcdir)/x86_64_dis.h
+
+noinst_LIBRARIES += libcpu_bpf.a
+libcpu_bpf_a_SOURCES = bpf_disasm.c
+libcpu_bpf_a_CFLAGS = $(AM_CFLAGS) -Wno-format-nonliteral
+
+%_defs: $(srcdir)/defs/i386
+	$(AM_V_GEN)m4 -D$* -DDISASSEMBLER $< > $@T
+	$(AM_V_at)mv -f $@T $@
+
+if MAINTAINER_MODE
+noinst_HEADERS = memory-access.h i386_parse.h i386_data.h
+
+noinst_PROGRAMS = i386_gendis$(EXEEXT)
+
+$(srcdir)/%_dis.h: %_defs i386_gendis$(EXEEXT)
+	$(AM_V_GEN)./i386_gendis$(EXEEXT) $< > $@T
+	$(AM_V_at)mv -f $@T $@
+
+else
+
+$(srcdir)/%_dis.h:
+	@echo '*** missing $@; configure with --enable-maintainer-mode'
+	@false
+
+endif
+
+%.mnemonics: %_defs
+	$(AM_V_GEN)sed '1,/^%%/d;/^#/d;/^[[:space:]]*$$/d;s/[^:]*:\([^[:space:]]*\).*/MNE(\1)/;s/{[^}]*}//g;/INVALID/d' \
+	  $< | sort -u > $@
+
+i386_lex_no_Werror = yes
+
+libeu = ../lib/libeu.a
+
+i386_lex_CFLAGS = -Wno-unused-label -Wno-unused-function -Wno-sign-compare
+i386_parse.o: i386_parse.c i386.mnemonics
+i386_parse_CFLAGS = -DNMNES="`wc -l < i386.mnemonics`"
+i386_lex.o: i386_parse.h
+i386_gendis_LDADD = $(libeu) -lm
+
+i386_parse.h: i386_parse.c ;
+
+EXTRA_DIST = defs/i386
+
+CLEANFILES += $(foreach P,i386 x86_64,$P_defs $P.mnemonics)
+MAINTAINERCLEANFILES = $(foreach P,i386 x86_64, $P_dis.h)
diff --git a/libcpu/bpf_disasm.c b/libcpu/bpf_disasm.c
new file mode 100644
index 0000000..054aba2
--- /dev/null
+++ b/libcpu/bpf_disasm.c
@@ -0,0 +1,480 @@
+/* Disassembler for BPF.
+   Copyright (C) 2016 Red Hat, Inc.
+   This file is part of elfutils.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <gelf.h>
+#include <inttypes.h>
+#include "bpf.h"
+
+#include "../libelf/common.h"
+#include "../libebl/libeblP.h"
+
+static const char class_string[8][8] = {
+  [BPF_LD]    = "ld",
+  [BPF_LDX]   = "ldx",
+  [BPF_ST]    = "st",
+  [BPF_STX]   = "stx",
+  [BPF_ALU]   = "alu",
+  [BPF_JMP]   = "jmp",
+  [BPF_RET]   = "6",		/* completely unused in ebpf */
+  [BPF_ALU64] = "alu64",
+};
+
+
+#define REG(N)		"r%" #N "$d"
+#define REGU(N)		"(u32)" REG(N)
+#define REGS(N)		"(s64)" REG(N)
+
+#define IMMS(N)		"%" #N "$d"
+#define IMMX(N)		"%" #N "$#x"
+
+#define OFF(N)		"%" #N "$+d"
+#define JMP(N)		"%" #N "$#x"
+
+#define A32(O, S)	REG(1) " = " REGU(1) " " #O " " S
+#define A64(O, S)	REG(1) " " #O "= " S
+#define J64(D, O, S)	"if " D " " #O " " S " goto " JMP(3)
+#define LOAD(T)		REG(1) " = *(" #T " *)(" REG(2) OFF(3) ")"
+#define STORE(T, S)	"*(" #T " *)(" REG(1) OFF(3) ") = " S
+#define XADD(T, S)	"lock *(" #T " *)(" REG(1) OFF(3) ") += " S
+#define LDSKB(T, S)	"r0 = *(" #T " *)skb[" S "]"
+
+static void
+bswap_bpf_insn (struct bpf_insn *p)
+{
+  /* Note that the dst_reg and src_reg fields are 4-bit bitfields.
+     That means these two nibbles are (typically) layed out in the
+     opposite order between big- and little-endian hosts.  This is
+     not required by any standard, but does happen to be true for
+     at least ppc, s390, arm and mips as big-endian hosts.  */
+  int t = p->dst_reg;
+  p->dst_reg = p->src_reg;
+  p->src_reg = t;
+
+  /* The other 2 and 4 byte fields are trivially converted.  */
+  CONVERT (p->off);
+  CONVERT (p->imm);
+}
+
+int
+bpf_disasm (Ebl *ebl, const uint8_t **startp, const uint8_t *end,
+	    GElf_Addr addr, const char *fmt __attribute__((unused)),
+	    DisasmOutputCB_t outcb,
+	    DisasmGetSymCB_t symcb __attribute__((unused)),
+	    void *outcbarg,
+	    void *symcbarg __attribute__((unused)))
+{
+  const bool need_bswap = MY_ELFDATA != ebl->data;
+  const uint8_t *start = *startp;
+  char buf[128];
+  int len, retval = 0;
+
+  while (start + sizeof(struct bpf_insn) <= end)
+    {
+      struct bpf_insn i;
+      unsigned code, class, jmp;
+      const char *code_fmt;
+
+      memcpy(&i, start, sizeof(struct bpf_insn));
+      if (need_bswap)
+	bswap_bpf_insn (&i);
+
+      start += sizeof(struct bpf_insn);
+      addr += sizeof(struct bpf_insn);
+      jmp = addr + i.off * sizeof(struct bpf_insn);
+
+      code = i.code;
+      switch (code)
+	{
+	case BPF_LD | BPF_IMM | BPF_DW:
+	  {
+	    struct bpf_insn i2;
+	    uint64_t imm64;
+
+	    if (start + sizeof(struct bpf_insn) > end)
+	      {
+		start -= sizeof(struct bpf_insn);
+		*startp = start;
+		goto done;
+	      }
+	    memcpy(&i2, start, sizeof(struct bpf_insn));
+	    if (need_bswap)
+	      bswap_bpf_insn (&i2);
+	    start += sizeof(struct bpf_insn);
+	    addr += sizeof(struct bpf_insn);
+
+	    imm64 = (uint32_t)i.imm | ((uint64_t)i2.imm << 32);
+	    switch (i.src_reg)
+	      {
+	      case 0:
+		code_fmt = REG(1) " = %2$#" PRIx64;
+		break;
+	      case BPF_PSEUDO_MAP_FD:
+		code_fmt = REG(1) " = map_fd(%2$#" PRIx64 ")";
+		break;
+	      default:
+		code_fmt = REG(1) " = ld_pseudo(%3$d, %2$#" PRIx64 ")";
+		break;
+	      }
+	    len = snprintf(buf, sizeof(buf), code_fmt,
+			   i.dst_reg, imm64, i.src_reg);
+	  }
+	  break;
+
+	case BPF_JMP | BPF_EXIT:
+	  len = snprintf(buf, sizeof(buf), "exit");
+	  break;
+	case BPF_JMP | BPF_JA:
+	  len = snprintf(buf, sizeof(buf), "goto " JMP(1), jmp);
+	  break;
+	case BPF_JMP | BPF_CALL:
+	  code_fmt = "call " IMMS(1);
+	  goto do_imm;
+
+	case BPF_ALU | BPF_END | BPF_TO_LE:
+	  /* The imm field contains {16,32,64}.  */
+	  code_fmt = REG(1) " = le" IMMS(2) "(" REG(1) ")";
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_END | BPF_TO_BE:
+	  code_fmt = REG(1) " = be" IMMS(2) "(" REG(1) ")";
+	  goto do_dst_imm;
+
+	case BPF_ALU | BPF_ADD | BPF_K:
+	  code_fmt = A32(+, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_SUB | BPF_K:
+	  code_fmt = A32(-, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_MUL | BPF_K:
+	  code_fmt = A32(*, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_DIV | BPF_K:
+	  code_fmt = A32(/, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_OR | BPF_K:
+	  code_fmt = A32(|, IMMX(2));
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_AND | BPF_K:
+	  code_fmt = A32(&, IMMX(2));
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_LSH | BPF_K:
+	  code_fmt = A32(<<, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_RSH | BPF_K:
+	  code_fmt = A32(>>, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_MOD | BPF_K:
+	  code_fmt = A32(%%, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_XOR | BPF_K:
+	  code_fmt = A32(^, IMMX(2));
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_MOV | BPF_K:
+	  code_fmt = REG(1) " = " IMMX(2);
+	  goto do_dst_imm;
+	case BPF_ALU | BPF_ARSH | BPF_K:
+	  code_fmt = REG(1) " = (u32)((s32)" REG(1) " >> " IMMS(2) ")";
+	  goto do_dst_imm;
+
+	case BPF_ALU | BPF_ADD | BPF_X:
+	  code_fmt = A32(+, REGU(2));
+	  goto do_dst_src;
+	case BPF_ALU | BPF_SUB | BPF_X:
+	  code_fmt = A32(-, REGU(2));
+	  goto do_dst_src;
+	case BPF_ALU | BPF_MUL | BPF_X:
+	  code_fmt = A32(*, REGU(2));
+	  goto do_dst_src;
+	case BPF_ALU | BPF_DIV | BPF_X:
+	  code_fmt = A32(/, REGU(2));
+	  goto do_dst_src;
+	case BPF_ALU | BPF_OR | BPF_X:
+	  code_fmt = A32(|, REGU(2));
+	  goto do_dst_src;
+	case BPF_ALU | BPF_AND | BPF_X:
+	  code_fmt = A32(&, REGU(2));
+	  goto do_dst_src;
+	case BPF_ALU | BPF_LSH | BPF_X:
+	  code_fmt = A32(<<, REGU(2));
+	  goto do_dst_src;
+	case BPF_ALU | BPF_RSH | BPF_X:
+	  code_fmt = A32(>>, REGU(2));
+	  goto do_dst_src;
+	case BPF_ALU | BPF_MOD | BPF_X:
+	  code_fmt = A32(%%, REGU(2));
+	  goto do_dst_src;
+	case BPF_ALU | BPF_XOR | BPF_X:
+	  code_fmt = A32(^, REGU(2));
+	  goto do_dst_src;
+	case BPF_ALU | BPF_MOV | BPF_X:
+	  code_fmt = REG(1) " = " REGU(2);
+	  goto do_dst_src;
+	case BPF_ALU | BPF_ARSH | BPF_X:
+	  code_fmt = REG(1) " = (u32)((s32)" REG(1) " >> " REG(2) ")";
+	  goto do_dst_src;
+
+	case BPF_ALU64 | BPF_ADD | BPF_K:
+	  code_fmt = A64(+, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU64 | BPF_SUB | BPF_K:
+	  code_fmt = A64(-, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU64 | BPF_MUL | BPF_K:
+	  code_fmt = A64(*, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU64 | BPF_DIV | BPF_K:
+	  code_fmt = A64(/, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU64 | BPF_OR | BPF_K:
+	  code_fmt = A64(|, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU64 | BPF_AND | BPF_K:
+	  code_fmt = A64(&, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU64 | BPF_LSH | BPF_K:
+	  code_fmt = A64(<<, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU64 | BPF_RSH | BPF_K:
+	  code_fmt = A64(>>, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU64 | BPF_MOD | BPF_K:
+	  code_fmt = A64(%%, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU64 | BPF_XOR | BPF_K:
+	  code_fmt = A64(^, IMMS(2));
+	  goto do_dst_imm;
+	case BPF_ALU64 | BPF_MOV | BPF_K:
+	  code_fmt = REG(1) " = " IMMS(2);
+	  goto do_dst_imm;
+	case BPF_ALU64 | BPF_ARSH | BPF_K:
+	  code_fmt = REG(1) " = (s64)" REG(1) " >> " IMMS(2);
+	  goto do_dst_imm;
+
+	case BPF_ALU64 | BPF_ADD | BPF_X:
+	  code_fmt = A64(+, REG(2));
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_SUB | BPF_X:
+	  code_fmt = A64(-, REG(2));
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_MUL | BPF_X:
+	  code_fmt = A64(*, REG(2));
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_DIV | BPF_X:
+	  code_fmt = A64(/, REG(2));
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_OR | BPF_X:
+	  code_fmt = A64(|, REG(2));
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_AND | BPF_X:
+	  code_fmt = A64(&, REG(2));
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_LSH | BPF_X:
+	  code_fmt = A64(<<, REG(2));
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_RSH | BPF_X:
+	  code_fmt = A64(>>, REG(2));
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_MOD | BPF_X:
+	  code_fmt = A64(%%, REG(2));
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_XOR | BPF_X:
+	  code_fmt = A64(^, REG(2));
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+	  code_fmt = REG(1) " = " REG(2);
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_ARSH | BPF_X:
+	  code_fmt = REG(1) " = (s64)" REG(1) " >> " REG(2);
+	  goto do_dst_src;
+
+	case BPF_ALU | BPF_NEG:
+	  code_fmt = REG(1) " = (u32)-" REG(1);
+	  goto do_dst_src;
+	case BPF_ALU64 | BPF_NEG:
+	  code_fmt = REG(1) " = -" REG(1);
+	  goto do_dst_src;
+
+	case BPF_JMP | BPF_JEQ | BPF_K:
+	  code_fmt = J64(REG(1), ==, IMMS(2));
+	  goto do_dst_imm_jmp;
+	case BPF_JMP | BPF_JGT | BPF_K:
+	  code_fmt = J64(REG(1), >, IMMS(2));
+	  goto do_dst_imm_jmp;
+	case BPF_JMP | BPF_JGE | BPF_K:
+	  code_fmt = J64(REG(1), >=, IMMS(2));
+	  goto do_dst_imm_jmp;
+	case BPF_JMP | BPF_JSET | BPF_K:
+	  code_fmt = J64(REG(1), &, IMMS(2));
+	  goto do_dst_imm_jmp;
+	case BPF_JMP | BPF_JNE | BPF_K:
+	  code_fmt = J64(REG(1), !=, IMMS(2));
+	  goto do_dst_imm_jmp;
+	case BPF_JMP | BPF_JSGT | BPF_K:
+	  code_fmt = J64(REGS(1), >, IMMS(2));
+	  goto do_dst_imm_jmp;
+	case BPF_JMP | BPF_JSGE | BPF_K:
+	  code_fmt = J64(REGS(1), >=, IMMS(2));
+	  goto do_dst_imm_jmp;
+
+	case BPF_JMP | BPF_JEQ | BPF_X:
+	  code_fmt = J64(REG(1), ==, REG(2));
+	  goto do_dst_src_jmp;
+	case BPF_JMP | BPF_JGT | BPF_X:
+	  code_fmt = J64(REG(1), >, REG(2));
+	  goto do_dst_src_jmp;
+	case BPF_JMP | BPF_JGE | BPF_X:
+	  code_fmt = J64(REG(1), >=, REG(2));
+	  goto do_dst_src_jmp;
+	case BPF_JMP | BPF_JSET | BPF_X:
+	  code_fmt = J64(REG(1), &, REG(2));
+	  goto do_dst_src_jmp;
+	case BPF_JMP | BPF_JNE | BPF_X:
+	  code_fmt = J64(REG(1), !=, REG(2));
+	  goto do_dst_src_jmp;
+	case BPF_JMP | BPF_JSGT | BPF_X:
+	  code_fmt = J64(REGS(1), >, REGS(2));
+	  goto do_dst_src_jmp;
+	case BPF_JMP | BPF_JSGE | BPF_X:
+	  code_fmt = J64(REGS(1), >=, REGS(2));
+	  goto do_dst_src_jmp;
+
+	case BPF_LDX | BPF_MEM | BPF_B:
+	  code_fmt = LOAD(u8);
+	  goto do_dst_src_off;
+	case BPF_LDX | BPF_MEM | BPF_H:
+	  code_fmt = LOAD(u16);
+	  goto do_dst_src_off;
+	case BPF_LDX | BPF_MEM | BPF_W:
+	  code_fmt = LOAD(u32);
+	  goto do_dst_src_off;
+	case BPF_LDX | BPF_MEM | BPF_DW:
+	  code_fmt = LOAD(u64);
+	  goto do_dst_src_off;
+
+	case BPF_STX | BPF_MEM | BPF_B:
+	  code_fmt = STORE(u8, REG(2));
+	  goto do_dst_src_off;
+	case BPF_STX | BPF_MEM | BPF_H:
+	  code_fmt = STORE(u16, REG(2));
+	  goto do_dst_src_off;
+	case BPF_STX | BPF_MEM | BPF_W:
+	  code_fmt = STORE(u32, REG(2));
+	  goto do_dst_src_off;
+	case BPF_STX | BPF_MEM | BPF_DW:
+	  code_fmt = STORE(u64, REG(2));
+	  goto do_dst_src_off;
+
+	case BPF_STX | BPF_XADD | BPF_W:
+	  code_fmt = XADD(u32, REG(2));
+	  goto do_dst_src_off;
+	case BPF_STX | BPF_XADD | BPF_DW:
+	  code_fmt = XADD(u64, REG(2));
+	  goto do_dst_src_off;
+
+	case BPF_ST | BPF_MEM | BPF_B:
+	  code_fmt = STORE(u8, IMMS(2));
+	  goto do_dst_imm_off;
+	case BPF_ST | BPF_MEM | BPF_H:
+	  code_fmt = STORE(u16, IMMS(2));
+	  goto do_dst_imm_off;
+	case BPF_ST | BPF_MEM | BPF_W:
+	  code_fmt = STORE(u32, IMMS(2));
+	  goto do_dst_imm_off;
+	case BPF_ST | BPF_MEM | BPF_DW:
+	  code_fmt = STORE(u64, IMMS(2));
+	  goto do_dst_imm_off;
+
+	case BPF_LD | BPF_ABS | BPF_B:
+	  code_fmt = LDSKB(u8, IMMS(1));
+	  goto do_imm;
+	case BPF_LD | BPF_ABS | BPF_H:
+	  code_fmt = LDSKB(u16, IMMS(1));
+	  goto do_imm;
+	case BPF_LD | BPF_ABS | BPF_W:
+	  code_fmt = LDSKB(u32, IMMS(1));
+	  goto do_imm;
+
+	case BPF_LD | BPF_IND | BPF_B:
+	  code_fmt = LDSKB(u8, REG(1) "+" IMMS(2));
+	  goto do_src_imm;
+	case BPF_LD | BPF_IND | BPF_H:
+	  code_fmt = LDSKB(u16, REG(1) "+" IMMS(2));
+	  goto do_src_imm;
+	case BPF_LD | BPF_IND | BPF_W:
+	  code_fmt = LDSKB(u32, REG(1) "+" IMMS(2));
+	  goto do_src_imm;
+
+	do_imm:
+	  len = snprintf(buf, sizeof(buf), code_fmt, i.imm);
+	  break;
+	do_dst_imm:
+	  len = snprintf(buf, sizeof(buf), code_fmt, i.dst_reg, i.imm);
+	  break;
+	do_src_imm:
+	  len = snprintf(buf, sizeof(buf), code_fmt, i.src_reg, i.imm);
+	  break;
+	do_dst_src:
+	  len = snprintf(buf, sizeof(buf), code_fmt, i.dst_reg, i.src_reg);
+	  break;
+	do_dst_imm_jmp:
+	  len = snprintf(buf, sizeof(buf), code_fmt, i.dst_reg, i.imm, jmp);
+	  break;
+	do_dst_src_jmp:
+	  len = snprintf(buf, sizeof(buf), code_fmt,
+			 i.dst_reg, i.src_reg, jmp);
+	  break;
+	do_dst_imm_off:
+	  len = snprintf(buf, sizeof(buf), code_fmt, i.dst_reg, i.imm, i.off);
+	  break;
+	do_dst_src_off:
+	  len = snprintf(buf, sizeof(buf), code_fmt,
+			 i.dst_reg, i.src_reg, i.off);
+	  break;
+
+	default:
+	  class = BPF_CLASS(code);
+	  len = snprintf(buf, sizeof(buf), "invalid class %s",
+			 class_string[class]);
+	  break;
+        }
+
+      *startp = start;
+      retval = outcb (buf, len, outcbarg);
+      if (retval != 0)
+	goto done;
+    }
+
+ done:
+  return retval;
+}
diff --git a/libcpu/defs/i386 b/libcpu/defs/i386
new file mode 100644
index 0000000..e0db28d
--- /dev/null
+++ b/libcpu/defs/i386
@@ -0,0 +1,970 @@
+%mask {s}	1
+%mask {w}	1
+%mask {w1}	1
+%mask {W1}	1
+%mask {W2}	1
+dnl floating point reg suffix
+%mask {D}	1
+%mask {imm8}	8
+%mask {imms8}	8
+%mask {imm16}	16
+%mask {reg}	3
+%mask {oreg}	3
+%mask {reg16}	3
+%mask {reg64}	3
+%mask {tttn}	4
+%mask {mod}	2
+%mask {moda}	2
+%mask {MOD}	2
+%mask {r_m}	3
+dnl like {r_m} but referencing byte register
+%mask {8r_m}	3
+dnl like {r_m} but referencing 16-bit register
+%mask {16r_m}	3
+dnl like {r_m} but referencing 32- or 64-bit register
+%mask {64r_m}	3
+%mask {disp8}	8
+dnl imm really is 8/16/32 bit depending on the situation.
+%mask {imm}	8
+%mask {imm64}	8
+%mask {imms}	8
+%mask {rel}	32
+%mask {abs}	32
+%mask {absval}	32
+%mask {sel}	16
+%mask {imm32}	32
+%mask {ccc}	3
+%mask {ddd}	3
+%mask {sreg3}	3
+%mask {sreg2}	2
+%mask {mmxreg}	3
+%mask {R_M}	3
+%mask {Mod}	2
+%mask {xmmreg}	3
+%mask {R_m}	3
+%mask {xmmreg1} 3
+%mask {xmmreg2} 3
+%mask {mmxreg1} 3
+%mask {mmxreg2} 3
+%mask {predps}	8
+%mask {freg}	3
+%mask {fmod}	2
+%mask {fr_m}	3
+%prefix {R}
+%prefix {RE}
+%suffix {W}
+%suffix {w0}
+%synonym {xmmreg1} {xmmreg}
+%synonym {xmmreg2} {xmmreg}
+%synonym {mmxreg1} {mmxreg}
+%synonym {mmxreg2} {mmxreg}
+ifdef(`i386',
+`%synonym {oreg} {reg}
+%synonym {imm64} {imm}
+')dnl
+
+%%
+ifdef(`i386',
+`00110111:aaa
+11010101,00001010:aad
+11010100,00001010:aam
+00111111:aas
+')dnl
+0001010{w},{imm}:adc {imm}{w},{ax}{w}
+1000000{w},{mod}010{r_m},{imm}:adc{w} {imm}{w},{mod}{r_m}{w}
+1000001{w},{mod}010{r_m},{imms8}:adc{w} {imms8},{mod}{r_m}
+0001000{w},{mod}{reg}{r_m}:adc {reg}{w},{mod}{r_m}{w}
+0001001{w},{mod}{reg}{r_m}:adc {mod}{r_m}{w},{reg}{w}
+0000010{w},{imm}:add {imm}{w},{ax}{w}
+1000000{w},{mod}000{r_m},{imm}:add{w} {imm}{w},{mod}{r_m}{w}
+10000011,{mod}000{r_m},{imms8}:add{w} {imms8},{mod}{r_m}
+0000000{w},{mod}{reg}{r_m}:add {reg}{w},{mod}{r_m}{w}
+0000001{w},{mod}{reg}{r_m}:add {mod}{r_m}{w},{reg}{w}
+01100110,00001111,11010000,{Mod}{xmmreg}{R_m}:addsubpd {Mod}{R_m},{xmmreg}
+11110010,00001111,11010000,{Mod}{xmmreg}{R_m}:addsubps {Mod}{R_m},{xmmreg}
+0010010{w},{imm}:and {imm}{w},{ax}{w}
+1000000{w},{mod}100{r_m},{imm}:and{w} {imm}{w},{mod}{r_m}{w}
+1000001{w},{mod}100{r_m},{imms8}:and{w} {imms8},{mod}{r_m}
+0010000{w},{mod}{reg}{r_m}:and {reg}{w},{mod}{r_m}{w}
+0010001{w},{mod}{reg}{r_m}:and {mod}{r_m}{w},{reg}{w}
+01100110,00001111,01010100,{Mod}{xmmreg}{R_m}:andpd {Mod}{R_m},{xmmreg}
+00001111,01010100,{Mod}{xmmreg}{R_m}:andps {Mod}{R_m},{xmmreg}
+01100110,00001111,01010101,{Mod}{xmmreg}{R_m}:andnpd {Mod}{R_m},{xmmreg}
+00001111,01010101,{Mod}{xmmreg}{R_m}:andnps {Mod}{R_m},{xmmreg}
+ifdef(`i386',
+`01100011,{mod}{reg16}{r_m}:arpl {reg16},{mod}{r_m}
+01100010,{moda}{reg}{r_m}:bound {reg},{moda}{r_m}
+',
+`01100011,{mod}{reg64}{r_m}:movslq {mod}{r_m},{reg64}
+')dnl
+00001111,10111100,{mod}{reg}{r_m}:bsf {mod}{r_m},{reg}
+00001111,10111101,{mod}{reg}{r_m}:bsr {mod}{r_m},{reg}
+00001111,11001{reg}:bswap {reg}
+00001111,10100011,{mod}{reg}{r_m}:bt {reg},{mod}{r_m}
+00001111,10111010,{mod}100{r_m},{imm8}:bt{w} {imm8},{mod}{r_m}
+00001111,10111011,{mod}{reg}{r_m}:btc {reg},{mod}{r_m}
+00001111,10111010,{mod}111{r_m},{imm8}:btc{w} {imm8},{mod}{r_m}
+00001111,10110011,{mod}{reg}{r_m}:btr {reg},{mod}{r_m}
+00001111,10111010,{mod}110{r_m},{imm8}:btr{w} {imm8},{mod}{r_m}
+00001111,10101011,{mod}{reg}{r_m}:bts {reg},{mod}{r_m}
+00001111,10111010,{mod}101{r_m},{imm8}:bts{w} {imm8},{mod}{r_m}
+11101000,{rel}:call{W} {rel}
+11111111,{mod}010{64r_m}:call{W} *{mod}{64r_m}
+ifdef(`i386',
+`10011010,{absval},{sel}:lcall {sel},{absval}
+')dnl
+11111111,{mod}011{64r_m}:lcall{W} *{mod}{64r_m}
+# SPECIAL 10011000:[{rex.w}?cltq:{dpfx}?cbtw:cwtl]
+10011000:INVALID
+# SPECIAL 10011001:[{rex.w}?cqto:{dpfx}?cltd:cwtd]
+10011001:INVALID
+11111000:clc
+11111100:cld
+11111010:cli
+00001111,00000101:syscall
+00001111,00000110:clts
+00001111,00000111:sysret
+00001111,00110100:sysenter
+00001111,00110101:sysexit
+11110101:cmc
+00001111,0100{tttn},{mod}{reg}{r_m}:cmov{tttn} {mod}{r_m},{reg}
+0011110{w},{imm}:cmp {imm}{w},{ax}{w}
+1000000{w},{mod}111{r_m},{imm}:cmp{w} {imm}{w},{mod}{r_m}{w}
+10000011,{mod}111{r_m},{imms8}:cmp{w} {imms8},{mod}{r_m}
+0011100{w},{mod}{reg}{r_m}:cmp {reg}{w},{mod}{r_m}{w}
+0011101{w},{mod}{reg}{r_m}:cmp {mod}{r_m}{w},{reg}{w}
+ifdef(`ASSEMBLER',
+`11110010,00001111,11000010,{Mod}{xmmreg}{R_m},{imm8}:cmpsd {imm8},{Mod}{R_m},{xmmreg}
+11110011,00001111,11000010,{Mod}{xmmreg}{R_m},{imm8}:cmpss {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,11000010,{Mod}{xmmreg}{R_m},{imm8}:cmppd {imm8},{Mod}{R_m},{xmmreg}
+00001111,11000010,{Mod}{xmmreg}{R_m},{imm8}:cmpps {imm8},{Mod}{R_m},{xmmreg}
+',
+`11110010,00001111,11000010,{Mod}{xmmreg}{R_m},{imm8}:INVALID {Mod}{R_m},{xmmreg}
+11110011,00001111,11000010,{Mod}{xmmreg}{R_m},{imm8}:INVALID {Mod}{R_m},{xmmreg}
+01100110,00001111,11000010,{Mod}{xmmreg}{R_m},{imm8}:INVALID {Mod}{R_m},{xmmreg}
+00001111,11000010,{Mod}{xmmreg}{R_m},{imm8}:INVALID {Mod}{R_m},{xmmreg}
+')dnl
+1010011{w}:{RE}cmps{w} {es_di},{ds_si}
+00001111,1011000{w},{mod}{reg}{r_m}:cmpxchg {reg}{w},{mod}{r_m}{w}
+ifdef(`i386',
+`00001111,11000111,{mod}001{r_m}:cmpxchg8b {mod}{r_m}
+',
+`# SPECIAL 00001111,11000111,{mod}001{r_m}:[{rex.w}?cmpxchg16b:cmpxchg8b] {reg},{mod}{r_m}
+00001111,11000111,{mod}001{r_m}:INVALID {mod}{r_m}
+')dnl
+00001111,10100010:cpuid
+11110011,00001111,11100110,{Mod}{xmmreg}{R_m}:cvtdq2pd {Mod}{R_m},{xmmreg}
+11110010,00001111,11100110,{Mod}{xmmreg}{R_m}:cvtpd2dq {Mod}{R_m},{xmmreg}
+01100110,00001111,11100110,{Mod}{xmmreg}{R_m}:cvttpd2dq {Mod}{R_m},{xmmreg}
+ifdef(`i386',
+`00100111:daa
+00101111:das
+')dnl
+1111111{w},{mod}001{r_m}:dec{w} {mod}{r_m}{w}
+ifdef(`i386',
+`01001{reg}:dec {reg}
+')dnl
+1111011{w},{mod}110{r_m}:div{w} {mod}{r_m}{w}
+00001111,01110111:emms
+11001000,{imm16},{imm8}:enter{W} {imm16},{imm8}
+11011001,11010000:fnop
+11011001,11100000:fchs
+11011001,11100001:fabs
+11011001,11100100:ftst
+11011001,11100101:fxam
+11011001,11101000:fld1
+11011001,11101001:fldl2t
+11011001,11101010:fldl2e
+11011001,11101011:fldpi
+11011001,11101100:fldlg2
+11011001,11101101:fldln2
+11011001,11101110:fldz
+11011001,11110000:f2xm1
+11011001,11110001:fyl2x
+11011001,11110010:fptan
+11011001,11110011:fpatan
+11011001,11110100:fxtract
+11011001,11110101:fprem1
+11011001,11110110:fdecstp
+11011001,11110111:fincstp
+11011001,11111000:fprem
+11011001,11111001:fyl2xp1
+11011001,11111010:fsqrt
+11011001,11111011:fsincos
+11011001,11111100:frndint
+11011001,11111101:fscale
+11011001,11111110:fsin
+11011001,11111111:fcos
+# ORDER
+11011000,11000{freg}:fadd {freg},%st
+11011100,11000{freg}:fadd %st,{freg}
+11011{D}00,{mod}000{r_m}:fadd{D} {mod}{r_m}
+# ORDER END
+# ORDER
+11011000,11001{freg}:fmul {freg},%st
+11011100,11001{freg}:fmul %st,{freg}
+11011{D}00,{mod}001{r_m}:fmul{D} {mod}{r_m}
+# ORDER END
+# ORDER
+11011000,11100{freg}:fsub {freg},%st
+11011100,11100{freg}:fsub %st,{freg}
+11011{D}00,{mod}100{r_m}:fsub{D} {mod}{r_m}
+# ORDER END
+# ORDER
+11011000,11101{freg}:fsubr {freg},%st
+11011100,11101{freg}:fsubr %st,{freg}
+11011{D}00,{mod}101{r_m}:fsubr{D} {mod}{r_m}
+# ORDER END
+# ORDER
+11011101,11010{freg}:fst {freg}
+11011{D}01,{mod}010{r_m}:fst{D} {mod}{r_m}
+# ORDER END
+# ORDER
+11011101,11011{freg}:fstp {freg}
+11011{D}01,{mod}011{r_m}:fstp{D} {mod}{r_m}
+# ORDER END
+11011001,{mod}100{r_m}:fldenv {mod}{r_m}
+11011001,{mod}101{r_m}:fldcw {mod}{r_m}
+11011001,{mod}110{r_m}:fnstenv {mod}{r_m}
+11011001,{mod}111{r_m}:fnstcw {mod}{r_m}
+11011001,11001{freg}:fxch {freg}
+# ORDER
+11011110,11000{freg}:faddp %st,{freg}
+ifdef(`ASSEMBLER',
+`11011110,11000001:faddp
+')dnl
+# ORDER
+11011010,11000{freg}:fcmovb {freg},%st
+11011{w1}10,{mod}000{r_m}:fiadd{w1} {mod}{r_m}
+# ORDER END
+# ORDER
+11011010,11001{freg}:fcmove {freg},%st
+11011110,11001{freg}:fmulp %st,{freg}
+11011{w1}10,{mod}001{r_m}:fimul{w1} {mod}{r_m}
+# ORDER END
+# ORDER
+11011110,11100{freg}:fsubp %st,{freg}
+11011{w1}10,{mod}100{r_m}:fisub{w1} {mod}{r_m}
+# ORDER END
+# ORDER
+11011110,11101{freg}:fsubrp %st,{freg}
+11011{w1}10,{mod}101{r_m}:fisubr{w1} {mod}{r_m}
+# ORDER END
+# ORDER
+11011111,11100000:fnstsw %ax
+11011111,{mod}100{r_m}:fbld {mod}{r_m}
+# ORDER END
+# ORDER
+11011111,11110{freg}:fcomip {freg},%st
+11011111,{mod}110{r_m}:fbstp {mod}{r_m}
+# ORDER END
+11011001,11100000:fchs
+# ORDER
+10011011,11011011,11100010:fclex
+10011011,11011011,11100011:finit
+10011011:fwait
+# END ORDER
+11011011,11100010:fnclex
+11011010,11000{freg}:fcmovb {freg},%st
+11011010,11001{freg}:fcmove {freg},%st
+11011010,11010{freg}:fcmovbe {freg},%st
+11011010,11011{freg}:fcmovu {freg},%st
+11011011,11000{freg}:fcmovnb {freg},%st
+11011011,11001{freg}:fcmovne {freg},%st
+11011011,11010{freg}:fcmovnbe {freg},%st
+11011011,11011{freg}:fcmovnu {freg},%st
+# ORDER
+11011000,11010{freg}:fcom {freg}
+ifdef(`ASSEMBLER',
+`11011000,11010001:fcom
+')dnl
+11011{D}00,{mod}010{r_m}:fcom{D} {mod}{r_m}
+# END ORDER
+# ORDER
+11011000,11011{freg}:fcomp {freg}
+ifdef(`ASSEMBLER',
+`11011000,11011001:fcomp
+')dnl
+11011{D}00,{mod}011{r_m}:fcomp{D} {mod}{r_m}
+# END ORDER
+11011110,11011001:fcompp
+11011011,11110{freg}:fcomi {freg},%st
+11011111,11110{freg}:fcomip {freg},%st
+11011011,11101{freg}:fucomi {freg},%st
+11011111,11101{freg}:fucomip {freg},%st
+11011001,11111111:fcos
+11011001,11110110:fdecstp
+# ORDER
+11011000,11110{freg}:fdiv {freg},%st
+11011100,11110{freg}:fdiv %st,{freg}
+11011{D}00,{mod}110{r_m}:fdiv{D} {mod}{r_m}
+# END ORDER
+11011010,{mod}110{r_m}:fidivl {mod}{r_m}
+# ORDER
+11011110,11110{freg}:fdivp %st,{freg}
+11011110,{mod}110{r_m}:fidiv {mod}{r_m}
+# END ORDER
+11011110,11111{freg}:fdivrp %st,{freg}
+ifdef(`ASSEMBLER',
+`11011110,11111001:fdivp
+')dnl
+# ORDER
+11011000,11111{freg}:fdivr {freg},%st
+11011100,11111{freg}:fdivr %st,{freg}
+11011{D}00,{mod}111{r_m}:fdivr{D} {mod}{r_m}
+# END ORDER
+11011010,{mod}111{r_m}:fidivrl {mod}{r_m}
+11011110,{mod}111{r_m}:fidivr {mod}{r_m}
+11011110,11110{freg}:fdivrp %st,{freg}
+ifdef(`ASSEMBLER',
+`11011110,11110001:fdivrp
+')dnl
+11011101,11000{freg}:ffree {freg}
+11011010,11010{freg}:fcmovbe {freg}
+11011{w1}10,{mod}010{r_m}:ficom{w1} {mod}{r_m}
+11011010,11011{freg}:fcmovu {freg}
+11011{w1}10,{mod}011{r_m}:ficomp{w1} {mod}{r_m}
+11011111,{mod}000{r_m}:fild {mod}{r_m}
+11011011,{mod}000{r_m}:fildl {mod}{r_m}
+11011111,{mod}101{r_m}:fildll {mod}{r_m}
+11011001,11110111:fincstp
+11011011,11100011:fninit
+11011{w1}11,{mod}010{r_m}:fist{w1} {mod}{r_m}
+11011{w1}11,{mod}011{r_m}:fistp{w1} {mod}{r_m}
+11011111,{mod}111{r_m}:fistpll {mod}{r_m}
+11011{w1}11,{mod}001{r_m}:fisttp{w1} {mod}{r_m}
+11011101,{mod}001{r_m}:fisttpll {mod}{r_m}
+11011011,{mod}101{r_m}:fldt {mod}{r_m}
+11011011,{mod}111{r_m}:fstpt {mod}{r_m}
+# ORDER
+11011001,11000{freg}:fld {freg}
+11011{D}01,{mod}000{r_m}:fld{D} {mod}{r_m}
+# ORDER END
+# ORDER
+11011101,11100{freg}:fucom {freg}
+11011101,{mod}100{r_m}:frstor {mod}{r_m}
+# ORDER END
+11011101,11101{freg}:fucomp {freg}
+11011101,{mod}110{r_m}:fnsave {mod}{r_m}
+11011101,{mod}111{r_m}:fnstsw {mod}{r_m}
+#
+#
+#
+11110100:hlt
+1111011{w},{mod}111{r_m}:idiv{w} {mod}{r_m}{w}
+1111011{w},{mod}101{r_m}:imul{w} {mod}{r_m}{w}
+00001111,10101111,{mod}{reg}{r_m}:imul {mod}{r_m},{reg}
+011010{s}1,{mod}{reg}{r_m},{imm}:imul {imm}{s},{mod}{r_m},{reg}
+1110010{w},{imm8}:in {imm8},{ax}{w}
+1110110{w}:in {dx},{ax}{w}
+1111111{w},{mod}000{r_m}:inc{w} {mod}{r_m}{w}
+ifdef(`i386',
+`01000{reg}:inc {reg}
+')dnl
+0110110{w}:{R}ins{w} {dx},{es_di}
+11001101,{imm8}:int {imm8}
+11001100:int3
+ifdef(`i386',
+`11001110:into
+')dnl
+00001111,00001000:invd
+# ORDER
+00001111,00000001,11111000:swapgs
+00001111,00000001,{mod}111{r_m}:invlpg {mod}{r_m}
+# ORDER END
+11001111:iret{W1}
+0111{tttn},{disp8}:j{tttn} {disp8}
+00001111,1000{tttn},{rel}:j{tttn} {rel}
+00001111,1001{tttn},{mod}000{8r_m}:set{tttn} {mod}{8r_m}
+# SPECIAL 11100011,{disp8}:[{dpfx}?jcxz:jecxz] {disp8}
+11100011,{disp8}:INVALID {disp8}
+11101011,{disp8}:jmp {disp8}
+11101001,{rel}:jmp{W} {rel}
+11111111,{mod}100{64r_m}:jmp{W} *{mod}{64r_m}
+11101010,{absval},{sel}:ljmp {sel},{absval}
+11111111,{mod}101{64r_m}:ljmp{W} *{mod}{64r_m}
+10011111:lahf
+00001111,00000010,{mod}{reg}{16r_m}:lar {mod}{16r_m},{reg}
+ifdef(`i386',
+`11000101,{mod}{reg}{r_m}:lds {mod}{r_m},{reg}
+')dnl
+10001101,{mod}{reg}{r_m}:lea {mod}{r_m},{reg}
+11001001:leave{W}
+ifdef(`i386',
+`11000100,{mod}{reg}{r_m}:les {mod}{r_m},{reg}
+')dnl
+00001111,10110100,{mod}{reg}{r_m}:lfs {mod}{r_m},{reg}
+00001111,10110101,{mod}{reg}{r_m}:lgs {mod}{r_m},{reg}
+ifdef(`i386',
+`00001111,00000001,{mod}010{r_m}:lgdt{w0} {mod}{r_m}
+00001111,00000001,{mod}011{r_m}:lidt{w0} {mod}{r_m}
+',
+`00001111,00000001,{mod}010{r_m}:lgdt {mod}{r_m}
+00001111,00000001,{mod}011{r_m}:lidt {mod}{r_m}
+')dnl
+00001111,00000000,{mod}010{16r_m}:lldt {mod}{16r_m}
+00001111,00000001,{mod}110{16r_m}:lmsw {mod}{16r_m}
+11110000:lock
+1010110{w}:{R}lods {ds_si},{ax}{w}
+11100010,{disp8}:loop {disp8}
+11100001,{disp8}:loope {disp8}
+11100000,{disp8}:loopne {disp8}
+00001111,00000011,{mod}{reg}{16r_m}:lsl {mod}{16r_m},{reg}
+00001111,10110010,{mod}{reg}{r_m}:lss {mod}{r_m},{reg}
+00001111,00000000,{mod}011{16r_m}:ltr {mod}{16r_m}
+1000100{w},{mod}{reg}{r_m}:mov {reg}{w},{mod}{r_m}{w}
+1000101{w},{mod}{reg}{r_m}:mov {mod}{r_m}{w},{reg}{w}
+1100011{w},{mod}000{r_m},{imm}:mov{w} {imm}{w},{mod}{r_m}{w}
+1011{w}{oreg},{imm64}:mov {imm64}{w},{oreg}{w}
+1010000{w},{abs}:mov {abs},{ax}{w}
+1010001{w},{abs}:mov {ax}{w},{abs}
+00001111,00100000,11{ccc}{reg64}:mov {ccc},{reg64}
+00001111,00100010,11{ccc}{reg64}:mov {reg64},{ccc}
+00001111,00100001,11{ddd}{reg64}:mov {ddd},{reg64}
+00001111,00100011,11{ddd}{reg64}:mov {reg64},{ddd}
+10001100,{mod}{sreg3}{r_m}:mov {sreg3},{mod}{r_m}
+10001110,{mod}{sreg3}{r_m}:mov {mod}{r_m},{sreg3}
+1010010{w}:{R}movs{w} {ds_si},{es_di}
+00001111,10111110,{mod}{reg}{8r_m}:movsbl {mod}{8r_m},{reg}
+00001111,10111111,{mod}{reg}{16r_m}:movswl {mod}{16r_m},{reg}
+00001111,10110110,{mod}{reg}{8r_m}:movzbl {mod}{8r_m},{reg}
+00001111,10110111,{mod}{reg}{16r_m}:movzwl {mod}{16r_m},{reg}
+1111011{w},{mod}100{r_m}:mul{w} {mod}{r_m}{w}
+1111011{w},{mod}011{r_m}:neg{w} {mod}{r_m}{w}
+11110011,10010000:pause
+ifdef(`i386',
+`10010000:nop
+',
+`10010000:INVALID
+')dnl
+# ORDER before out
+11110011,00001111,10111000,{mod}{reg}{r_m}:popcnt {mod}{r_m},{reg}
+# END ORDER
+1111011{w},{mod}010{r_m}:not{w} {mod}{r_m}{w}
+0000100{w},{mod}{reg}{r_m}:or {reg}{w},{mod}{r_m}{w}
+0000101{w},{mod}{reg}{r_m}:or {mod}{r_m}{w},{reg}{w}
+1000000{w},{mod}001{r_m},{imm}:or{w} {imm}{w},{mod}{r_m}{w}
+1000001{w},{mod}001{r_m},{imms8}:or{w} {imms8},{mod}{r_m}{w}
+0000110{w},{imm}:or {imm}{w},{ax}{w}
+1110011{w},{imm8}:out {ax}{w},{imm8}
+1110111{w}:out {ax}{w},{dx}
+0110111{w}:{R}outs{w} {ds_si},{dx}
+ifdef(`i386',
+`10001111,{mod}000{r_m}:pop{w} {mod}{r_m}
+',
+# XXX This is not the cleanest way...
+`10001111,11000{reg64}:pop {reg64}
+10001111,{mod}000{r_m}:pop{W} {mod}{r_m}
+')dnl
+00001111,10{sreg3}001:pop{W} {sreg3}
+10011101:popf{W}
+# XXX This is not the cleanest way...
+ifdef(`i386',
+`11111111,{mod}110{r_m}:push{w} {mod}{r_m}
+',
+`11111111,11110{reg64}:push {reg64}
+11111111,{mod}110{r_m}:pushq {mod}{r_m}
+')dnl
+ifdef(`i386',
+`01010{reg}:push {reg}
+01011{reg}:pop {reg}
+',
+`01010{reg64}:push {reg64}
+01011{reg64}:pop {reg64}
+')dnl
+011010{s}0,{imm}:push{W} {imm}{s}
+000{sreg2}110:push {sreg2}
+00001111,10{sreg3}000:push{W} {sreg3}
+ifdef(`i386',
+`01100000:pusha{W}
+01100001:popa{W}
+')dnl
+10011100:pushf{W}
+1101000{w},{mod}010{r_m}:rcl{w} {mod}{r_m}{w}
+1101001{w},{mod}010{r_m}:rcl{w} %cl,{mod}{r_m}{w}
+1100000{w},{mod}010{r_m},{imm8}:rcl{w} {imm8},{mod}{r_m}{w}
+1101000{w},{mod}011{r_m}:rcr{w} {mod}{r_m}{w}
+1101001{w},{mod}011{r_m}:rcr{w} %cl,{mod}{r_m}{w}
+1100000{w},{mod}011{r_m},{imm8}:rcr{w} {imm8},{mod}{r_m}{w}
+00001111,00110010:rdmsr
+00001111,00110011:rdpmc
+00001111,00110001:rdtsc
+11000011:ret{W}
+11000010,{imm16}:ret{W} {imm16}
+11001011:lret
+11001010,{imm16}:lret {imm16}
+1101000{w},{mod}000{r_m}:rol{w} {mod}{r_m}{w}
+1101001{w},{mod}000{r_m}:rol{w} %cl,{mod}{r_m}{w}
+1100000{w},{mod}000{r_m},{imm8}:rol{w} {imm8},{mod}{r_m}{w}
+1101000{w},{mod}001{r_m}:ror{w} {mod}{r_m}{w}
+1101001{w},{mod}001{r_m}:ror{w} %cl,{mod}{r_m}{w}
+1100000{w},{mod}001{r_m},{imm8}:ror{w} {imm8},{mod}{r_m}{w}
+00001111,10101010:rsm
+10011110:sahf
+1101000{w},{mod}111{r_m}:sar{w} {mod}{r_m}{w}
+1101001{w},{mod}111{r_m}:sar{w} %cl,{mod}{r_m}{w}
+1100000{w},{mod}111{r_m},{imm8}:sar{w} {imm8},{mod}{r_m}{w}
+0001100{w},{mod}{reg}{r_m}:sbb {reg}{w},{mod}{r_m}{w}
+0001101{w},{mod}{reg}{r_m}:sbb {mod}{r_m}{w},{reg}{w}
+0001110{w},{imm}:sbb {imm}{w},{ax}{w}
+1000000{w},{mod}011{r_m},{imm}:sbb{w} {imm}{w},{mod}{r_m}{w}
+1000001{w},{mod}011{r_m},{imms8}:sbb{w} {imms8},{mod}{r_m}
+1010111{w}:{RE}scas {es_di},{ax}{w}
+00001111,1001{tttn},{mod}000{r_m}:set{tttn} {mod}{r_m}
+1101000{w},{mod}100{r_m}:shl{w} {mod}{r_m}{w}
+1101001{w},{mod}100{r_m}:shl{w} %cl,{mod}{r_m}{w}
+1100000{w},{mod}100{r_m},{imm8}:shl{w} {imm8},{mod}{r_m}{w}
+1101000{w},{mod}101{r_m}:shr{w} {mod}{r_m}{w}
+00001111,10100100,{mod}{reg}{r_m},{imm8}:shld {imm8},{reg},{mod}{r_m}
+00001111,10100101,{mod}{reg}{r_m}:shld %cl,{reg},{mod}{r_m}
+1101001{w},{mod}101{r_m}:shr{w} %cl,{mod}{r_m}{w}
+1100000{w},{mod}101{r_m},{imm8}:shr{w} {imm8},{mod}{r_m}{w}
+00001111,10101100,{mod}{reg}{r_m},{imm8}:shrd {imm8},{reg},{mod}{r_m}
+00001111,10101101,{mod}{reg}{r_m}:shrd %cl,{reg},{mod}{r_m}
+# ORDER
+00001111,00000001,11000001:vmcall
+00001111,00000001,11000010:vmlaunch
+00001111,00000001,11000011:vmresume
+00001111,00000001,11000100:vmxoff
+00001111,01111000,{mod}{reg64}{64r_m}:vmread {reg64},{mod}{64r_m}
+00001111,01111001,{mod}{reg64}{64r_m}:vmwrite {mod}{64r_m},{reg64}
+ifdef(`i386',
+`00001111,00000001,{mod}000{r_m}:sgdtl {mod}{r_m}
+',
+`00001111,00000001,{mod}000{r_m}:sgdt {mod}{r_m}
+')dnl
+# ORDER END
+# ORDER
+ifdef(`i386',
+`00001111,00000001,11001000:monitor %eax,%ecx,%edx
+00001111,00000001,11001001:mwait %eax,%ecx
+',
+`00001111,00000001,11001000:monitor %rax,%rcx,%rdx
+00001111,00000001,11001001:mwait %rax,%rcx
+')dnl
+ifdef(`i386',
+`00001111,00000001,{mod}001{r_m}:sidtl {mod}{r_m}
+',
+`00001111,00000001,{mod}001{r_m}:sidt {mod}{r_m}
+')dnl
+# ORDER END
+00001111,00000000,{mod}000{r_m}:sldt {mod}{r_m}
+00001111,00000001,{mod}100{r_m}:smsw {mod}{r_m}
+11111001:stc
+11111101:std
+11111011:sti
+1010101{w}:{R}stos {ax}{w},{es_di}
+00001111,00000000,{mod}001{r_m}:str {mod}{r_m}
+0010100{w},{mod}{reg}{r_m}:sub {reg}{w},{mod}{r_m}{w}
+0010101{w},{mod}{reg}{r_m}:sub {mod}{r_m}{w},{reg}{w}
+0010110{w},{imm}:sub {imm}{w},{ax}{w}
+1000000{w},{mod}101{r_m},{imm}:sub{w} {imm}{w},{mod}{r_m}{w}
+1000001{w},{mod}101{r_m},{imms8}:sub{w} {imms8},{mod}{r_m}
+1000010{w},{mod}{reg}{r_m}:test {reg}{w},{mod}{r_m}{w}
+1010100{w},{imm}:test {imm}{w},{ax}{w}
+1111011{w},{mod}000{r_m},{imm}:test{w} {imm}{w},{mod}{r_m}{w}
+00001111,00001011:ud2a
+00001111,00000000,{mod}100{16r_m}:verr {mod}{16r_m}
+00001111,00000000,{mod}101{16r_m}:verw {mod}{16r_m}
+00001111,00001001:wbinvd
+00001111,00001101,{mod}000{8r_m}:prefetch {mod}{8r_m}
+00001111,00001101,{mod}001{8r_m}:prefetchw {mod}{8r_m}
+00001111,00011000,{mod}000{r_m}:prefetchnta {mod}{r_m}
+00001111,00011000,{mod}001{r_m}:prefetcht0 {mod}{r_m}
+00001111,00011000,{mod}010{r_m}:prefetcht1 {mod}{r_m}
+00001111,00011000,{mod}011{r_m}:prefetcht2 {mod}{r_m}
+00001111,00011111,{mod}{reg}{r_m}:nop{w} {mod}{r_m}
+00001111,00110000:wrmsr
+00001111,1100000{w},{mod}{reg}{r_m}:xadd {reg}{w},{mod}{r_m}{w}
+1000011{w},{mod}{reg}{r_m}:xchg {reg}{w},{mod}{r_m}{w}
+10010{oreg}:xchg {ax},{oreg}
+11010111:xlat {ds_bx}
+0011000{w},{mod}{reg}{r_m}:xor {reg}{w},{mod}{r_m}{w}
+0011001{w},{mod}{reg}{r_m}:xor {mod}{r_m}{w},{reg}{w}
+0011010{w},{imm}:xor {imm}{w},{ax}{w}
+1000000{w},{mod}110{r_m},{imm}:xor{w} {imm}{w},{mod}{r_m}{w}
+1000001{w},{mod}110{r_m},{imms8}:xor{w} {imms8},{mod}{r_m}
+00001111,01110111:emms
+01100110,00001111,11011011,{Mod}{xmmreg}{R_m}:pand {Mod}{R_m},{xmmreg}
+00001111,11011011,{MOD}{mmxreg}{R_M}:pand {MOD}{R_M},{mmxreg}
+01100110,00001111,11011111,{Mod}{xmmreg}{R_m}:pandn {Mod}{R_m},{xmmreg}
+00001111,11011111,{MOD}{mmxreg}{R_M}:pandn {MOD}{R_M},{mmxreg}
+01100110,00001111,11110101,{Mod}{xmmreg}{R_m}:pmaddwd {Mod}{R_m},{xmmreg}
+00001111,11110101,{MOD}{mmxreg}{R_M}:pmaddwd {MOD}{R_M},{mmxreg}
+01100110,00001111,11101011,{Mod}{xmmreg}{R_m}:por {Mod}{R_m},{xmmreg}
+00001111,11101011,{MOD}{mmxreg}{R_M}:por {MOD}{R_M},{mmxreg}
+01100110,00001111,11101111,{Mod}{xmmreg}{R_m}:pxor {Mod}{R_m},{xmmreg}
+00001111,11101111,{MOD}{mmxreg}{R_M}:pxor {MOD}{R_M},{mmxreg}
+00001111,01010101,{Mod}{xmmreg}{R_m}:andnps {Mod}{R_m},{xmmreg}
+00001111,01010100,{Mod}{xmmreg}{R_m}:andps {Mod}{R_m},{xmmreg}
+00001111,11000010,{Mod}{xmmreg}{R_m},00000000:cmpeqps {Mod}{R_m},{xmmreg}
+00001111,11000010,{Mod}{xmmreg}{R_m},00000001:cmpltps {Mod}{R_m},{xmmreg}
+00001111,11000010,{Mod}{xmmreg}{R_m},00000010:cmpleps {Mod}{R_m},{xmmreg}
+00001111,11000010,{Mod}{xmmreg}{R_m},00000011:cmpunordps {Mod}{R_m},{xmmreg}
+00001111,11000010,{Mod}{xmmreg}{R_m},00000100:cmpneqps {Mod}{R_m},{xmmreg}
+00001111,11000010,{Mod}{xmmreg}{R_m},00000101:cmpnltps {Mod}{R_m},{xmmreg}
+00001111,11000010,{Mod}{xmmreg}{R_m},00000110:cmpnleps {Mod}{R_m},{xmmreg}
+00001111,11000010,{Mod}{xmmreg}{R_m},00000111:cmpordps {Mod}{R_m},{xmmreg}
+11110011,00001111,11000010,{Mod}{xmmreg}{R_m},00000000:cmpeqss {Mod}{R_m},{xmmreg}
+11110011,00001111,11000010,{Mod}{xmmreg}{R_m},00000001:cmpltss {Mod}{R_m},{xmmreg}
+11110011,00001111,11000010,{Mod}{xmmreg}{R_m},00000010:cmpless {Mod}{R_m},{xmmreg}
+11110011,00001111,11000010,{Mod}{xmmreg}{R_m},00000011:cmpunordss {Mod}{R_m},{xmmreg}
+11110011,00001111,11000010,{Mod}{xmmreg}{R_m},00000100:cmpneqss {Mod}{R_m},{xmmreg}
+11110011,00001111,11000010,{Mod}{xmmreg}{R_m},00000101:cmpnltss {Mod}{R_m},{xmmreg}
+11110011,00001111,11000010,{Mod}{xmmreg}{R_m},00000110:cmpnless {Mod}{R_m},{xmmreg}
+11110011,00001111,11000010,{Mod}{xmmreg}{R_m},00000111:cmpordss {Mod}{R_m},{xmmreg}
+00001111,10101110,{mod}001{r_m}:fxrstor {mod}{r_m}
+00001111,10101110,{mod}000{r_m}:fxsave {mod}{r_m}
+00001111,10101110,{mod}010{r_m}:ldmxcsr {mod}{r_m}
+00001111,10101110,{mod}011{r_m}:stmxcsr {mod}{r_m}
+11110010,00001111,00010000,{Mod}{xmmreg}{R_m}:movsd {Mod}{R_m},{xmmreg}
+11110011,00001111,00010000,{Mod}{xmmreg}{R_m}:movss {Mod}{R_m},{xmmreg}
+01100110,00001111,00010000,{Mod}{xmmreg}{R_m}:movupd {Mod}{R_m},{xmmreg}
+00001111,00010000,{Mod}{xmmreg}{R_m}:movups {Mod}{R_m},{xmmreg}
+11110010,00001111,00010001,{Mod}{xmmreg}{R_m}:movsd {xmmreg},{Mod}{R_m}
+11110011,00001111,00010001,{Mod}{xmmreg}{R_m}:movss {xmmreg},{Mod}{R_m}
+01100110,00001111,00010001,{Mod}{xmmreg}{R_m}:movupd {xmmreg},{Mod}{R_m}
+00001111,00010001,{Mod}{xmmreg}{R_m}:movups {xmmreg},{Mod}{R_m}
+11110010,00001111,00010010,{Mod}{xmmreg}{R_m}:movddup {Mod}{R_m},{xmmreg}
+11110011,00001111,00010010,{Mod}{xmmreg}{R_m}:movsldup {Mod}{R_m},{xmmreg}
+01100110,00001111,00010010,{Mod}{xmmreg}{R_m}:movlpd {Mod}{R_m},{xmmreg}
+00001111,00010010,11{xmmreg1}{xmmreg2}:movhlps {xmmreg2},{xmmreg1}
+00001111,00010010,{Mod}{xmmreg}{R_m}:movlps {Mod}{R_m},{xmmreg}
+01100110,00001111,00010011,11{xmmreg1}{xmmreg2}:movhlpd {xmmreg1},{xmmreg2}
+00001111,00010011,11{xmmreg1}{xmmreg2}:movhlps {xmmreg1},{xmmreg2}
+01100110,00001111,00010011,{Mod}{xmmreg}{R_m}:movlpd {xmmreg},{Mod}{R_m}
+00001111,00010011,{Mod}{xmmreg}{R_m}:movlps {xmmreg},{Mod}{R_m}
+01100110,00001111,00010100,{Mod}{xmmreg}{R_m}:unpcklpd {Mod}{R_m},{xmmreg}
+00001111,00010100,{Mod}{xmmreg}{R_m}:unpcklps {Mod}{R_m},{xmmreg}
+01100110,00001111,00010101,{Mod}{xmmreg}{R_m}:unpckhpd {Mod}{R_m},{xmmreg}
+00001111,00010101,{Mod}{xmmreg}{R_m}:unpckhps {Mod}{R_m},{xmmreg}
+11110011,00001111,00010110,{Mod}{xmmreg}{R_m}:movshdup {Mod}{R_m},{xmmreg}
+01100110,00001111,00010110,{Mod}{xmmreg}{R_m}:movhpd {Mod}{R_m},{xmmreg}
+00001111,00010110,11{xmmreg1}{xmmreg2}:movlhps {xmmreg2},{xmmreg1}
+00001111,00010110,{Mod}{xmmreg}{R_m}:movhps {Mod}{R_m},{xmmreg}
+01100110,00001111,00010111,11{xmmreg1}{xmmreg2}:movlhpd {xmmreg1},{xmmreg2}
+00001111,00010111,11{xmmreg1}{xmmreg2}:movlhps {xmmreg1},{xmmreg2}
+01100110,00001111,00010111,{Mod}{xmmreg}{R_m}:movhpd {xmmreg},{Mod}{R_m}
+00001111,00010111,{Mod}{xmmreg}{R_m}:movhps {xmmreg},{Mod}{R_m}
+01100110,00001111,00101000,{Mod}{xmmreg}{R_m}:movapd {Mod}{R_m},{xmmreg}
+00001111,00101000,{Mod}{xmmreg}{R_m}:movaps {Mod}{R_m},{xmmreg}
+01100110,00001111,00101001,{Mod}{xmmreg}{R_m}:movapd {xmmreg},{Mod}{R_m}
+00001111,00101001,{Mod}{xmmreg}{R_m}:movaps {xmmreg},{Mod}{R_m}
+11110010,00001111,00101010,{mod}{xmmreg}{r_m}:cvtsi2sd {mod}{r_m},{xmmreg}
+11110011,00001111,00101010,{mod}{xmmreg}{r_m}:cvtsi2ss {mod}{r_m},{xmmreg}
+01100110,00001111,00101010,{MOD}{xmmreg}{R_M}:cvtpi2pd {MOD}{R_M},{xmmreg}
+00001111,00101010,{MOD}{xmmreg}{R_M}:cvtpi2ps {MOD}{R_M},{xmmreg}
+01100110,00001111,00101011,{mod}{xmmreg}{r_m}:movntpd {xmmreg},{mod}{r_m}
+00001111,00101011,{mod}{xmmreg}{r_m}:movntps {xmmreg},{mod}{r_m}
+11110010,00001111,00101100,{Mod}{reg}{R_m}:cvttsd2si {Mod}{R_m},{reg}
+11110011,00001111,00101100,{Mod}{reg}{R_m}:cvttss2si {Mod}{R_m},{reg}
+01100110,00001111,00101100,{Mod}{mmxreg}{R_m}:cvttpd2pi {Mod}{R_m},{mmxreg}
+00001111,00101100,{Mod}{mmxreg}{R_m}:cvttps2pi {Mod}{R_m},{mmxreg}
+01100110,00001111,00101101,{Mod}{mmxreg}{R_m}:cvtpd2pi {Mod}{R_m},{mmxreg}
+11110010,00001111,00101101,{Mod}{reg}{R_m}:cvtsd2si {Mod}{R_m},{reg}
+11110011,00001111,00101101,{Mod}{reg}{R_m}:cvtss2si {Mod}{R_m},{reg}
+00001111,00101101,{Mod}{mmxreg}{R_m}:cvtps2pi {Mod}{R_m},{mmxreg}
+01100110,00001111,00101110,{Mod}{xmmreg}{R_m}:ucomisd {Mod}{R_m},{xmmreg}
+00001111,00101110,{Mod}{xmmreg}{R_m}:ucomiss {Mod}{R_m},{xmmreg}
+01100110,00001111,00101111,{Mod}{xmmreg}{R_m}:comisd {Mod}{R_m},{xmmreg}
+00001111,00101111,{Mod}{xmmreg}{R_m}:comiss {Mod}{R_m},{xmmreg}
+00001111,00110111:getsec
+01100110,00001111,01010000,11{reg}{xmmreg}:movmskpd {xmmreg},{reg}
+00001111,01010000,11{reg}{xmmreg}:movmskps {xmmreg},{reg}
+01100110,00001111,01010001,{Mod}{xmmreg}{R_m}:sqrtpd {Mod}{R_m},{xmmreg}
+11110010,00001111,01010001,{Mod}{xmmreg}{R_m}:sqrtsd {Mod}{R_m},{xmmreg}
+11110011,00001111,01010001,{Mod}{xmmreg}{R_m}:sqrtss {Mod}{R_m},{xmmreg}
+00001111,01010001,{Mod}{xmmreg}{R_m}:sqrtps {Mod}{R_m},{xmmreg}
+11110011,00001111,01010010,{Mod}{xmmreg}{R_m}:rsqrtss {Mod}{R_m},{xmmreg}
+00001111,01010010,{Mod}{xmmreg}{R_m}:rsqrtps {Mod}{R_m},{xmmreg}
+11110011,00001111,01010011,{Mod}{xmmreg}{R_m}:rcpss {Mod}{R_m},{xmmreg}
+00001111,01010011,{Mod}{xmmreg}{R_m}:rcpps {Mod}{R_m},{xmmreg}
+01100110,00001111,01010100,{Mod}{xmmreg}{R_m}:andpd {Mod}{R_m},{xmmreg}
+00001111,01010100,{Mod}{xmmreg}{R_m}:andps {Mod}{R_m},{xmmreg}
+01100110,00001111,01010101,{Mod}{xmmreg}{R_m}:andnpd {Mod}{R_m},{xmmreg}
+00001111,01010101,{Mod}{xmmreg}{R_m}:andnps {Mod}{R_m},{xmmreg}
+01100110,00001111,01010110,{Mod}{xmmreg}{R_m}:orpd {Mod}{R_m},{xmmreg}
+00001111,01010110,{Mod}{xmmreg}{R_m}:orps {Mod}{R_m},{xmmreg}
+01100110,00001111,01010111,{Mod}{xmmreg}{R_m}:xorpd {Mod}{R_m},{xmmreg}
+00001111,01010111,{Mod}{xmmreg}{R_m}:xorps {Mod}{R_m},{xmmreg}
+11110010,00001111,01011000,{Mod}{xmmreg}{R_m}:addsd {Mod}{R_m},{xmmreg}
+11110011,00001111,01011000,{Mod}{xmmreg}{R_m}:addss {Mod}{R_m},{xmmreg}
+01100110,00001111,01011000,{Mod}{xmmreg}{R_m}:addpd {Mod}{R_m},{xmmreg}
+00001111,01011000,{Mod}{xmmreg}{R_m}:addps {Mod}{R_m},{xmmreg}
+11110010,00001111,01011001,{Mod}{xmmreg}{R_m}:mulsd {Mod}{R_m},{xmmreg}
+11110011,00001111,01011001,{Mod}{xmmreg}{R_m}:mulss {Mod}{R_m},{xmmreg}
+01100110,00001111,01011001,{Mod}{xmmreg}{R_m}:mulpd {Mod}{R_m},{xmmreg}
+00001111,01011001,{Mod}{xmmreg}{R_m}:mulps {Mod}{R_m},{xmmreg}
+11110010,00001111,01011010,{Mod}{xmmreg}{R_m}:cvtsd2ss {Mod}{R_m},{xmmreg}
+11110011,00001111,01011010,{Mod}{xmmreg}{R_m}:cvtss2sd {Mod}{R_m},{xmmreg}
+01100110,00001111,01011010,{Mod}{xmmreg}{R_m}:cvtpd2ps {Mod}{R_m},{xmmreg}
+00001111,01011010,{Mod}{xmmreg}{R_m}:cvtps2pd {Mod}{R_m},{xmmreg}
+01100110,00001111,01011011,{Mod}{xmmreg}{R_m}:cvtps2dq {Mod}{R_m},{xmmreg}
+11110011,00001111,01011011,{Mod}{xmmreg}{R_m}:cvttps2dq {Mod}{R_m},{xmmreg}
+00001111,01011011,{Mod}{xmmreg}{R_m}:cvtdq2ps {Mod}{R_m},{xmmreg}
+11110010,00001111,01011100,{Mod}{xmmreg}{R_m}:subsd {Mod}{R_m},{xmmreg}
+11110011,00001111,01011100,{Mod}{xmmreg}{R_m}:subss {Mod}{R_m},{xmmreg}
+01100110,00001111,01011100,{Mod}{xmmreg}{R_m}:subpd {Mod}{R_m},{xmmreg}
+00001111,01011100,{Mod}{xmmreg}{R_m}:subps {Mod}{R_m},{xmmreg}
+11110010,00001111,01011101,{Mod}{xmmreg}{R_m}:minsd {Mod}{R_m},{xmmreg}
+11110011,00001111,01011101,{Mod}{xmmreg}{R_m}:minss {Mod}{R_m},{xmmreg}
+01100110,00001111,01011101,{Mod}{xmmreg}{R_m}:minpd {Mod}{R_m},{xmmreg}
+00001111,01011101,{Mod}{xmmreg}{R_m}:minps {Mod}{R_m},{xmmreg}
+11110010,00001111,01011110,{Mod}{xmmreg}{R_m}:divsd {Mod}{R_m},{xmmreg}
+11110011,00001111,01011110,{Mod}{xmmreg}{R_m}:divss {Mod}{R_m},{xmmreg}
+01100110,00001111,01011110,{Mod}{xmmreg}{R_m}:divpd {Mod}{R_m},{xmmreg}
+00001111,01011110,{Mod}{xmmreg}{R_m}:divps {Mod}{R_m},{xmmreg}
+11110010,00001111,01011111,{Mod}{xmmreg}{R_m}:maxsd {Mod}{R_m},{xmmreg}
+11110011,00001111,01011111,{Mod}{xmmreg}{R_m}:maxss {Mod}{R_m},{xmmreg}
+01100110,00001111,01011111,{Mod}{xmmreg}{R_m}:maxpd {Mod}{R_m},{xmmreg}
+00001111,01011111,{Mod}{xmmreg}{R_m}:maxps {Mod}{R_m},{xmmreg}
+01100110,00001111,01100000,{Mod}{xmmreg}{R_m}:punpcklbw {Mod}{R_m},{xmmreg}
+00001111,01100000,{MOD}{mmxreg}{R_M}:punpcklbw {MOD}{R_M},{mmxreg}
+01100110,00001111,01100001,{Mod}{xmmreg}{R_m}:punpcklwd {Mod}{R_m},{xmmreg}
+00001111,01100001,{MOD}{mmxreg}{R_M}:punpcklwd {MOD}{R_M},{mmxreg}
+01100110,00001111,01100010,{Mod}{xmmreg}{R_m}:punpckldq {Mod}{R_m},{xmmreg}
+00001111,01100010,{MOD}{mmxreg}{R_M}:punpckldq {MOD}{R_M},{mmxreg}
+01100110,00001111,01100011,{Mod}{xmmreg}{R_m}:packsswb {Mod}{R_m},{xmmreg}
+00001111,01100011,{MOD}{mmxreg}{R_M}:packsswb {MOD}{R_M},{mmxreg}
+01100110,00001111,01100100,{Mod}{xmmreg}{R_m}:pcmpgtb {Mod}{R_m},{xmmreg}
+00001111,01100100,{MOD}{mmxreg}{R_M}:pcmpgtb {MOD}{R_M},{mmxreg}
+01100110,00001111,01100101,{Mod}{xmmreg}{R_m}:pcmpgtw {Mod}{R_m},{xmmreg}
+00001111,01100101,{MOD}{mmxreg}{R_M}:pcmpgtw {MOD}{R_M},{mmxreg}
+01100110,00001111,01100110,{Mod}{xmmreg}{R_m}:pcmpgtd {Mod}{R_m},{xmmreg}
+00001111,01100110,{MOD}{mmxreg}{R_M}:pcmpgtd {MOD}{R_M},{mmxreg}
+01100110,00001111,01100111,{Mod}{xmmreg}{R_m}:packuswb {Mod}{R_m},{xmmreg}
+00001111,01100111,{MOD}{mmxreg}{R_M}:packuswb {MOD}{R_M},{mmxreg}
+01100110,00001111,01101000,{Mod}{xmmreg}{R_m}:punpckhbw {Mod}{R_m},{xmmreg}
+00001111,01101000,{MOD}{mmxreg}{R_M}:punpckhbw {MOD}{R_M},{mmxreg}
+01100110,00001111,01101001,{Mod}{xmmreg}{R_m}:punpckhwd {Mod}{R_m},{xmmreg}
+00001111,01101001,{MOD}{mmxreg}{R_M}:punpckhwd {MOD}{R_M},{mmxreg}
+01100110,00001111,01101010,{Mod}{xmmreg}{R_m}:punpckhdq {Mod}{R_m},{xmmreg}
+00001111,01101010,{MOD}{mmxreg}{R_M}:punpckhdq {MOD}{R_M},{mmxreg}
+01100110,00001111,01101011,{Mod}{xmmreg}{R_m}:packssdw {Mod}{R_m},{xmmreg}
+00001111,01101011,{MOD}{mmxreg}{R_M}:packssdw {MOD}{R_M},{mmxreg}
+01100110,00001111,01101100,{Mod}{xmmreg}{R_m}:punpcklqdq {Mod}{R_m},{xmmreg}
+01100110,00001111,01101101,{Mod}{xmmreg}{R_m}:punpckhqdq {Mod}{R_m},{xmmreg}
+01100110,00001111,01101110,{mod}{xmmreg}{r_m}:movd {mod}{r_m},{xmmreg}
+00001111,01101110,{mod}{mmxreg}{r_m}:movd {mod}{r_m},{mmxreg}
+01100110,00001111,01101111,{Mod}{xmmreg}{R_m}:movdqa {Mod}{R_m},{xmmreg}
+11110011,00001111,01101111,{Mod}{xmmreg}{R_m}:movdqu {Mod}{R_m},{xmmreg}
+00001111,01101111,{MOD}{mmxreg}{R_M}:movq {MOD}{R_M},{mmxreg}
+01100110,00001111,01110000,{Mod}{xmmreg}{R_m},{imm8}:pshufd {imm8},{Mod}{R_m},{xmmreg}
+11110010,00001111,01110000,{Mod}{xmmreg}{R_m},{imm8}:pshuflw {imm8},{Mod}{R_m},{xmmreg}
+11110011,00001111,01110000,{Mod}{xmmreg}{R_m},{imm8}:pshufhw {imm8},{Mod}{R_m},{xmmreg}
+00001111,01110000,{MOD}{mmxreg}{R_M},{imm8}:pshufw {imm8},{MOD}{R_M},{mmxreg}
+01100110,00001111,01110100,{Mod}{xmmreg}{R_m}:pcmpeqb {Mod}{R_m},{xmmreg}
+00001111,01110100,{MOD}{mmxreg}{R_M}:pcmpeqb {MOD}{R_M},{mmxreg}
+01100110,00001111,01110101,{Mod}{xmmreg}{R_m}:pcmpeqw {Mod}{R_m},{xmmreg}
+00001111,01110101,{MOD}{mmxreg}{R_M}:pcmpeqw {MOD}{R_M},{mmxreg}
+01100110,00001111,01110110,{Mod}{xmmreg}{R_m}:pcmpeqd {Mod}{R_m},{xmmreg}
+00001111,01110110,{MOD}{mmxreg}{R_M}:pcmpeqd {MOD}{R_M},{mmxreg}
+01100110,00001111,01111100,{Mod}{xmmreg}{R_m}:haddpd {Mod}{R_m},{xmmreg}
+11110010,00001111,01111100,{Mod}{xmmreg}{R_m}:haddps {Mod}{R_m},{xmmreg}
+01100110,00001111,01111101,{Mod}{xmmreg}{R_m}:hsubpd {Mod}{R_m},{xmmreg}
+11110010,00001111,01111101,{Mod}{xmmreg}{R_m}:hsubps {Mod}{R_m},{xmmreg}
+01100110,00001111,01111110,{mod}{xmmreg}{r_m}:movd {xmmreg},{mod}{r_m}
+11110011,00001111,01111110,{Mod}{xmmreg}{R_m}:movq {Mod}{R_m},{xmmreg}
+00001111,01111110,{mod}{mmxreg}{r_m}:movd {mmxreg},{mod}{r_m}
+01100110,00001111,01111111,{Mod}{xmmreg}{R_m}:movdqa {xmmreg},{Mod}{R_m}
+11110011,00001111,01111111,{Mod}{xmmreg}{R_m}:movdqu {xmmreg},{Mod}{R_m}
+00001111,01111111,{MOD}{mmxreg}{R_M}:movq {mmxreg},{MOD}{R_M}
+00001111,11000011,{mod}{reg}{r_m}:movnti {reg},{mod}{r_m}
+01100110,00001111,11000100,{mod}{xmmreg}{r_m},{imm8}:pinsrw {imm8},{mod}{r_m},{xmmreg}
+00001111,11000100,{mod}{mmxreg}{r_m},{imm8}:pinsrw {imm8},{mod}{r_m},{mmxreg}
+01100110,00001111,11000101,11{reg}{xmmreg},{imm8}:pextrw {imm8},{xmmreg},{reg}
+00001111,11000101,11{reg}{mmxreg},{imm8}:pextrw {imm8},{mmxreg},{reg}
+01100110,00001111,11000110,{Mod}{xmmreg}{R_m},{imm8}:shufpd {imm8},{Mod}{R_m},{xmmreg}
+00001111,11000110,{Mod}{xmmreg}{R_m},{imm8}:shufps {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,11010001,{Mod}{xmmreg}{R_m}:psrlw {Mod}{R_m},{xmmreg}
+00001111,11010001,{MOD}{mmxreg}{R_M}:psrlw {MOD}{R_M},{mmxreg}
+01100110,00001111,11010010,{Mod}{xmmreg}{R_m}:psrld {Mod}{R_m},{xmmreg}
+00001111,11010010,{MOD}{mmxreg}{R_M}:psrld {MOD}{R_M},{mmxreg}
+01100110,00001111,11010011,{Mod}{xmmreg}{R_m}:psrlq {Mod}{R_m},{xmmreg}
+00001111,11010011,{MOD}{mmxreg}{R_M}:psrlq {MOD}{R_M},{mmxreg}
+01100110,00001111,11010100,{Mod}{xmmreg}{R_m}:paddq {Mod}{R_m},{xmmreg}
+00001111,11010100,{MOD}{mmxreg}{R_M}:paddq {MOD}{R_M},{mmxreg}
+01100110,00001111,11010101,{Mod}{xmmreg}{R_m}:pmullw {Mod}{R_m},{xmmreg}
+00001111,11010101,{MOD}{mmxreg}{R_M}:pmullw {MOD}{R_M},{mmxreg}
+01100110,00001111,11010110,{Mod}{xmmreg}{R_m}:movq {xmmreg},{Mod}{R_m}
+11110010,00001111,11010110,11{mmxreg}{xmmreg}:movdq2q {xmmreg},{mmxreg}
+11110011,00001111,11010110,11{xmmreg}{mmxreg}:movq2dq {mmxreg},{xmmreg}
+01100110,00001111,11010111,11{reg}{xmmreg}:pmovmskb {xmmreg},{reg}
+00001111,11010111,11{reg}{mmxreg}:pmovmskb {mmxreg},{reg}
+01100110,00001111,11011000,{Mod}{xmmreg}{R_m}:psubusb {Mod}{R_m},{xmmreg}
+00001111,11011000,{MOD}{mmxreg}{R_M}:psubusb {MOD}{R_M},{mmxreg}
+01100110,00001111,11011001,{Mod}{xmmreg}{R_m}:psubusw {Mod}{R_m},{xmmreg}
+00001111,11011001,{MOD}{mmxreg}{R_M}:psubusw {MOD}{R_M},{mmxreg}
+01100110,00001111,11011010,{Mod}{xmmreg}{R_m}:pminub {Mod}{R_m},{xmmreg}
+00001111,11011010,{MOD}{mmxreg}{R_M}:pminub {MOD}{R_M},{mmxreg}
+01100110,00001111,11011100,{Mod}{xmmreg}{R_m}:paddusb {Mod}{R_m},{xmmreg}
+00001111,11011100,{MOD}{mmxreg}{R_M}:paddusb {MOD}{R_M},{mmxreg}
+01100110,00001111,11011101,{Mod}{xmmreg}{R_m}:paddusw {Mod}{R_m},{xmmreg}
+00001111,11011101,{MOD}{mmxreg}{R_M}:paddusw {MOD}{R_M},{mmxreg}
+01100110,00001111,11011110,{Mod}{xmmreg}{R_m}:pmaxub {Mod}{R_m},{xmmreg}
+00001111,11011110,{MOD}{mmxreg}{R_M}:pmaxub {MOD}{R_M},{mmxreg}
+01100110,00001111,11100000,{Mod}{xmmreg}{R_m}:pavgb {Mod}{R_m},{xmmreg}
+00001111,11100000,{MOD}{mmxreg}{R_M}:pavgb {MOD}{R_M},{mmxreg}
+01100110,00001111,11100001,{Mod}{xmmreg}{R_m}:psraw {Mod}{R_m},{xmmreg}
+00001111,11100001,{MOD}{mmxreg}{R_M}:psraw {MOD}{R_M},{mmxreg}
+01100110,00001111,11100010,{Mod}{xmmreg}{R_m}:psrad {Mod}{R_m},{xmmreg}
+00001111,11100010,{MOD}{mmxreg}{R_M}:psrad {MOD}{R_M},{mmxreg}
+01100110,00001111,11100011,{Mod}{xmmreg}{R_m}:pavgw {Mod}{R_m},{xmmreg}
+00001111,11100011,{MOD}{mmxreg}{R_M}:pavgw {MOD}{R_M},{mmxreg}
+01100110,00001111,11100100,{Mod}{xmmreg}{R_m}:pmulhuw {Mod}{R_m},{xmmreg}
+00001111,11100100,{MOD}{mmxreg}{R_M}:pmulhuw {MOD}{R_M},{mmxreg}
+01100110,00001111,11100101,{Mod}{xmmreg}{R_m}:pmulhw {Mod}{R_m},{xmmreg}
+00001111,11100101,{MOD}{mmxreg}{R_M}:pmulhw {MOD}{R_M},{mmxreg}
+01100110,00001111,11100111,{Mod}{xmmreg}{R_m}:movntdq {xmmreg},{Mod}{R_m}
+00001111,11100111,{MOD}{mmxreg}{R_M}:movntq {mmxreg},{MOD}{R_M}
+01100110,00001111,11101000,{Mod}{xmmreg}{R_m}:psubsb {Mod}{R_m},{xmmreg}
+00001111,11101000,{MOD}{mmxreg}{R_M}:psubsb {MOD}{R_M},{mmxreg}
+01100110,00001111,11101001,{Mod}{xmmreg}{R_m}:psubsw {Mod}{R_m},{xmmreg}
+00001111,11101001,{MOD}{mmxreg}{R_M}:psubsw {MOD}{R_M},{mmxreg}
+01100110,00001111,11101010,{Mod}{xmmreg}{R_m}:pminsw {Mod}{R_m},{xmmreg}
+00001111,11101010,{MOD}{mmxreg}{R_M}:pminsw {MOD}{R_M},{mmxreg}
+01100110,00001111,11101100,{Mod}{xmmreg}{R_m}:paddsb {Mod}{R_m},{xmmreg}
+00001111,11101100,{MOD}{mmxreg}{R_M}:paddsb {MOD}{R_M},{mmxreg}
+01100110,00001111,11101101,{Mod}{xmmreg}{R_m}:paddsw {Mod}{R_m},{xmmreg}
+00001111,11101101,{MOD}{mmxreg}{R_M}:paddsw {MOD}{R_M},{mmxreg}
+01100110,00001111,11101110,{Mod}{xmmreg}{R_m}:pmaxsw {Mod}{R_m},{xmmreg}
+00001111,11101110,{MOD}{mmxreg}{R_M}:pmaxsw {MOD}{R_M},{mmxreg}
+11110010,00001111,11110000,{mod}{xmmreg}{r_m}:lddqu {mod}{r_m},{xmmreg}
+01100110,00001111,11110001,{Mod}{xmmreg}{R_m}:psllw {Mod}{R_m},{xmmreg}
+00001111,11110001,{MOD}{mmxreg}{R_M}:psllw {MOD}{R_M},{mmxreg}
+01100110,00001111,11110010,{Mod}{xmmreg}{R_m}:pslld {Mod}{R_m},{xmmreg}
+00001111,11110010,{MOD}{mmxreg}{R_M}:pslld {MOD}{R_M},{mmxreg}
+01100110,00001111,11110011,{Mod}{xmmreg}{R_m}:psllq {Mod}{R_m},{xmmreg}
+00001111,11110011,{MOD}{mmxreg}{R_M}:psllq {MOD}{R_M},{mmxreg}
+01100110,00001111,11110100,{Mod}{xmmreg}{R_m}:pmuludq {Mod}{R_m},{xmmreg}
+00001111,11110100,{MOD}{mmxreg}{R_M}:pmuludq {MOD}{R_M},{mmxreg}
+01100110,00001111,11110110,{Mod}{xmmreg}{R_m}:psadbw {Mod}{R_m},{xmmreg}
+00001111,11110110,{MOD}{mmxreg}{R_M}:psadbw {MOD}{R_M},{mmxreg}
+01100110,00001111,11110111,11{xmmreg1}{xmmreg2}:maskmovdqu {xmmreg2},{xmmreg1}
+00001111,11110111,11{mmxreg1}{mmxreg2}:maskmovq {mmxreg2},{mmxreg1}
+01100110,00001111,11111000,{Mod}{xmmreg}{R_m}:psubb {Mod}{R_m},{xmmreg}
+00001111,11111000,{MOD}{mmxreg}{R_M}:psubb {MOD}{R_M},{mmxreg}
+01100110,00001111,11111001,{Mod}{xmmreg}{R_m}:psubw {Mod}{R_m},{xmmreg}
+00001111,11111001,{MOD}{mmxreg}{R_M}:psubw {MOD}{R_M},{mmxreg}
+01100110,00001111,11111010,{Mod}{xmmreg}{R_m}:psubd {Mod}{R_m},{xmmreg}
+00001111,11111010,{MOD}{mmxreg}{R_M}:psubd {MOD}{R_M},{mmxreg}
+01100110,00001111,11111011,{Mod}{xmmreg}{R_m}:psubq {Mod}{R_m},{xmmreg}
+00001111,11111011,{MOD}{mmxreg}{R_M}:psubq {MOD}{R_M},{mmxreg}
+01100110,00001111,11111100,{Mod}{xmmreg}{R_m}:paddb {Mod}{R_m},{xmmreg}
+00001111,11111100,{MOD}{mmxreg}{R_M}:paddb {MOD}{R_M},{mmxreg}
+01100110,00001111,11111101,{Mod}{xmmreg}{R_m}:paddw {Mod}{R_m},{xmmreg}
+00001111,11111101,{MOD}{mmxreg}{R_M}:paddw {MOD}{R_M},{mmxreg}
+01100110,00001111,11111110,{Mod}{xmmreg}{R_m}:paddd {Mod}{R_m},{xmmreg}
+00001111,11111110,{MOD}{mmxreg}{R_M}:paddd {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00000000,{Mod}{xmmreg}{R_m}:pshufb {Mod}{R_m},{xmmreg}
+00001111,00111000,00000000,{MOD}{mmxreg}{R_M}:pshufb {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00000001,{Mod}{xmmreg}{R_m}:phaddw {Mod}{R_m},{xmmreg}
+00001111,00111000,00000001,{MOD}{mmxreg}{R_M}:phaddw {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00000010,{Mod}{xmmreg}{R_m}:phaddd {Mod}{R_m},{xmmreg}
+00001111,00111000,00000010,{MOD}{mmxreg}{R_M}:phaddd {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00000011,{Mod}{xmmreg}{R_m}:phaddsw {Mod}{R_m},{xmmreg}
+00001111,00111000,00000011,{MOD}{mmxreg}{R_M}:phaddsw {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00000100,{Mod}{xmmreg}{R_m}:pmaddubsw {Mod}{R_m},{xmmreg}
+00001111,00111000,00000100,{MOD}{mmxreg}{R_M}:pmaddubsw {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00000101,{Mod}{xmmreg}{R_m}:phsubw {Mod}{R_m},{xmmreg}
+00001111,00111000,00000101,{MOD}{mmxreg}{R_M}:phsubw {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00000110,{Mod}{xmmreg}{R_m}:phsubd {Mod}{R_m},{xmmreg}
+00001111,00111000,00000110,{MOD}{mmxreg}{R_M}:phsubd {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00000111,{Mod}{xmmreg}{R_m}:phsubsw {Mod}{R_m},{xmmreg}
+00001111,00111000,00000111,{MOD}{mmxreg}{R_M}:phsubsw {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00001000,{Mod}{xmmreg}{R_m}:psignb {Mod}{R_m},{xmmreg}
+00001111,00111000,00001000,{MOD}{mmxreg}{R_M}:psignb {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00001001,{Mod}{xmmreg}{R_m}:psignw {Mod}{R_m},{xmmreg}
+00001111,00111000,00001001,{MOD}{mmxreg}{R_M}:psignw {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00001010,{Mod}{xmmreg}{R_m}:psignd {Mod}{R_m},{xmmreg}
+00001111,00111000,00001010,{MOD}{mmxreg}{R_M}:psignd {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00001011,{Mod}{xmmreg}{R_m}:pmulhrsw {Mod}{R_m},{xmmreg}
+00001111,00111000,00001011,{MOD}{mmxreg}{R_M}:pmulhrsw {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00011100,{Mod}{xmmreg}{R_m}:pabsb {Mod}{R_m},{xmmreg}
+00001111,00111000,00011100,{MOD}{mmxreg}{R_M}:pabsb {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00011101,{Mod}{xmmreg}{R_m}:pabsw {Mod}{R_m},{xmmreg}
+00001111,00111000,00011101,{MOD}{mmxreg}{R_M}:pabsw {MOD}{R_M},{mmxreg}
+01100110,00001111,00111000,00011110,{Mod}{xmmreg}{R_m}:pabsd {Mod}{R_m},{xmmreg}
+00001111,00111000,00011110,{MOD}{mmxreg}{R_M}:pabsd {MOD}{R_M},{mmxreg}
+01100110,00001111,00111010,00001111,{Mod}{xmmreg}{R_m},{imm8}:palignr {imm8},{Mod}{R_m},{xmmreg}
+00001111,00111010,00001111,{MOD}{mmxreg}{R_M},{imm8}:palignr {imm8},{MOD}{R_M},{mmxreg}
+01100110,00001111,11000111,{mod}110{r_m}:vmclear {mod}{r_m}
+11110011,00001111,11000111,{mod}110{r_m}:vmxon {mod}{r_m}
+00001111,11000111,{mod}110{r_m}:vmptrld {mod}{r_m}
+00001111,11000111,{mod}111{r_m}:vmptrst {mod}{r_m}
+01100110,00001111,01110001,11010{xmmreg},{imm8}:psrlw {imm8},{xmmreg}
+00001111,01110001,11010{mmxreg},{imm8}:psrlw {imm8},{mmxreg}
+01100110,00001111,01110001,11100{xmmreg},{imm8}:psraw {imm8},{xmmreg}
+00001111,01110001,11100{mmxreg},{imm8}:psraw {imm8},{mmxreg}
+01100110,00001111,01110001,11110{xmmreg},{imm8}:psllw {imm8},{xmmreg}
+00001111,01110001,11110{mmxreg},{imm8}:psllw {imm8},{mmxreg}
+01100110,00001111,01110010,11010{xmmreg},{imm8}:psrld {imm8},{xmmreg}
+00001111,01110010,11010{mmxreg},{imm8}:psrld {imm8},{mmxreg}
+01100110,00001111,01110010,11100{xmmreg},{imm8}:psrad {imm8},{xmmreg}
+00001111,01110010,11100{mmxreg},{imm8}:psrad {imm8},{mmxreg}
+01100110,00001111,01110010,11110{xmmreg},{imm8}:pslld {imm8},{xmmreg}
+00001111,01110010,11110{mmxreg},{imm8}:pslld {imm8},{mmxreg}
+01100110,00001111,01110011,11010{xmmreg},{imm8}:psrlq {imm8},{xmmreg}
+00001111,01110011,11010{mmxreg},{imm8}:psrlq {imm8},{mmxreg}
+01100110,00001111,01110011,11011{xmmreg},{imm8}:psrldq {imm8},{xmmreg}
+01100110,00001111,01110011,11110{xmmreg},{imm8}:psllq {imm8},{xmmreg}
+00001111,01110011,11110{mmxreg},{imm8}:psllq {imm8},{mmxreg}
+01100110,00001111,01110011,11111{xmmreg},{imm8}:pslldq {imm8},{xmmreg}
+00001111,10101110,11101000:lfence
+00001111,10101110,11110000:mfence
+00001111,10101110,11111000:sfence
+00001111,10101110,{mod}111{r_m}:clflush {mod}{r_m}
+00001111,00001111,{MOD}{mmxreg}{R_M}:INVALID {MOD}{R_M},{mmxreg}
+01100110,00001111,00111010,00001100,{Mod}{xmmreg}{R_m},{imm8}:blendps {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,00001101,{Mod}{xmmreg}{R_m},{imm8}:blendpd {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00010100,{Mod}{xmmreg}{R_m}:blendvps %xmm0,{Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00010101,{Mod}{xmmreg}{R_m}:blendvpd %xmm0,{Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,01000000,{Mod}{xmmreg}{R_m},{imm8}:dpps {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,01000001,{Mod}{xmmreg}{R_m},{imm8}:dppd {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,00100001,{Mod}{xmmreg}{R_m},{imm8}:insertps {imm8},{Mod}{R_m},{xmmreg}
+# Mod == 11 is not valid
+01100110,00001111,00111000,00101010,{Mod}{xmmreg}{R_m}:movntdqa {Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,01000010,{Mod}{xmmreg}{R_m},{imm8}:mpsadbw {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00101011,{Mod}{xmmreg}{R_m}:packusdw {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00010000,{Mod}{xmmreg}{R_m}:pblendvb %xmm0,{Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,00001110,{Mod}{xmmreg}{R_m},{imm8}:pblendw {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00101001,{Mod}{xmmreg}{R_m}:pcmpeqq {Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,01100001,{Mod}{xmmreg}{R_m},{imm8}:pcmpestri {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,01100000,{Mod}{xmmreg}{R_m},{imm8}:pcmpestrm {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,01100011,{Mod}{xmmreg}{R_m},{imm8}:pcmpistri {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,01100010,{Mod}{xmmreg}{R_m},{imm8}:pcmpistrm {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00110111,{Mod}{xmmreg}{R_m}:pcmpgtq {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,01000001,{Mod}{xmmreg}{R_m}:phminposuw {Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,00100000,{mod}{xmmreg}{r_m},{imm8}:pinsrb {imm8},{mod}{r_m},{xmmreg}
+01100110,00001111,00111010,00100010,{mod}{xmmreg}{r_m},{imm8}:pinsrd {imm8},{mod}{r_m},{xmmreg}
+01100110,00001111,00111000,00111100,{Mod}{xmmreg}{R_m}:pmaxsb {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00111101,{Mod}{xmmreg}{R_m}:pmaxsd {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00111111,{Mod}{xmmreg}{R_m}:pmaxud {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00111110,{Mod}{xmmreg}{R_m}:pmaxuw {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00111000,{Mod}{xmmreg}{R_m}:pminsb {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00111001,{Mod}{xmmreg}{R_m}:pminsd {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00111011,{Mod}{xmmreg}{R_m}:pminud {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00111010,{Mod}{xmmreg}{R_m}:pminuw {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00100000,{Mod}{xmmreg}{R_m}:pmovsxbw {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00100001,{Mod}{xmmreg}{R_m}:pmovsxbd {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00100010,{Mod}{xmmreg}{R_m}:pmovsxbq {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00100011,{Mod}{xmmreg}{R_m}:pmovsxwd {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00100100,{Mod}{xmmreg}{R_m}:pmovsxwq {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00100101,{Mod}{xmmreg}{R_m}:pmovsxdq {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00110000,{Mod}{xmmreg}{R_m}:pmovzxbw {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00110001,{Mod}{xmmreg}{R_m}:pmovzxbd {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00110010,{Mod}{xmmreg}{R_m}:pmovzxbq {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00110011,{Mod}{xmmreg}{R_m}:pmovzxwd {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00110100,{Mod}{xmmreg}{R_m}:pmovzxwq {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00110101,{Mod}{xmmreg}{R_m}:pmovzxdq {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00101000,{Mod}{xmmreg}{R_m}:pmuldq {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,01000000,{Mod}{xmmreg}{R_m}:pmulld {Mod}{R_m},{xmmreg}
+01100110,00001111,00111000,00010111,{Mod}{xmmreg}{R_m}:ptest {Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,00001000,{Mod}{xmmreg}{R_m},{imm8}:roundps {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,00001001,{Mod}{xmmreg}{R_m},{imm8}:roundpd {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,00001010,{Mod}{xmmreg}{R_m},{imm8}:roundss {imm8},{Mod}{R_m},{xmmreg}
+01100110,00001111,00111010,00001011,{Mod}{xmmreg}{R_m},{imm8}:roundsd {imm8},{Mod}{R_m},{xmmreg}
+# ORDER:
+dnl Many previous entries depend on this being last.
+000{sreg2}111:pop {sreg2}
+# ORDER END:
diff --git a/libcpu/defs/i386.doc b/libcpu/defs/i386.doc
new file mode 100644
index 0000000..732cd23
--- /dev/null
+++ b/libcpu/defs/i386.doc
@@ -0,0 +1,74 @@
+{imm} only parameter:
+  - is {s} in opcode: {s} == 0, unsigned (8/)16/32 bit immediate
+                      {s} == 1, signed 8 bit immediate
+
+{es:di}: segment register normally %es, can be overwritten
+         edi/di depending on apfx
+
+{ds:si}: segment register normally %ds, can be overwritten
+         esi/si depending on apfx
+
+{ax}     al/ax/eax depending of dpfx and w
+
+{dx}     (%edx) or (%dx) depending on apfx
+
+
+{w}      0 = b, 1 = { no dpfx = l, dpfx = w }
+
+{W}      no dpfx = <empty>, dpfx = w
+{WW}     no dpfx = l, dpfx = w
+
+{R} rep prefix possible
+{RE} repe or repne prefix possible
+
+{ccc} CRx registers
+{ddd} DRx registers
+
+{gg}  00 = b, 01 = w, 10 = d, 11 = <illegal>
+{0g}  00 = b, 01 = w, 10 = <illegal>, 11 = <illegal>
+{GG}  00 = <illegal>, 01 = w, 10 = d, 11 = q
+{gG}  00 = <illegal>, 01 = w, 10 = d, 11 = <illegal>
+
+{modr/m} normal registers
+{MODR/M} MMX registers
+{ModR/m} XMM registers
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Special opcodes (prefixes):
+
+
+01100111:{apfx}
+01100110:{dpfx}
+
+00101110:{cs}
+00111110:{ds}
+00100110:{es}
+01100100:{fs}
+01100101:{gs}
+
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+table format
+
+1bit RE flag
+1bit R flag
+16bit mnemonic
+3bit suffix
+
+5bit fct
+2bit string
+6bit offset1
+5bit offset2
+
+4bit fct
+1bit string
+6bit offset1
+4bit offset2
+
+2bit fct
+1bit string
+3bit offset1
+1bit offset2
+
+61bit
diff --git a/libcpu/i386_data.h b/libcpu/i386_data.h
new file mode 100644
index 0000000..b8a34c3
--- /dev/null
+++ b/libcpu/i386_data.h
@@ -0,0 +1,1418 @@
+/* Helper routines for disassembler for x86/x86-64.
+   Copyright (C) 2007, 2008 Red Hat, Inc.
+   This file is part of elfutils.
+   Written by Ulrich Drepper <drepper@redhat.com>, 2007.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <libasm.h>
+
+struct instr_enc
+{
+  /* The mnemonic.  Especially encoded for the optimized table.  */
+  unsigned int mnemonic : MNEMONIC_BITS;
+
+  /* The rep/repe prefixes.  */
+  unsigned int rep : 1;
+  unsigned int repe : 1;
+
+  /* Mnemonic suffix.  */
+  unsigned int suffix : SUFFIX_BITS;
+
+  /* Nonzero if the instruction uses modr/m.  */
+  unsigned int modrm : 1;
+
+  /* 1st parameter.  */
+  unsigned int fct1 : FCT1_BITS;
+#ifdef STR1_BITS
+  unsigned int str1 : STR1_BITS;
+#endif
+  unsigned int off1_1 : OFF1_1_BITS;
+  unsigned int off1_2 : OFF1_2_BITS;
+  unsigned int off1_3 : OFF1_3_BITS;
+
+  /* 2nd parameter.  */
+  unsigned int fct2 : FCT2_BITS;
+#ifdef STR2_BITS
+  unsigned int str2 : STR2_BITS;
+#endif
+  unsigned int off2_1 : OFF2_1_BITS;
+  unsigned int off2_2 : OFF2_2_BITS;
+  unsigned int off2_3 : OFF2_3_BITS;
+
+  /* 3rd parameter.  */
+  unsigned int fct3 : FCT3_BITS;
+#ifdef STR3_BITS
+  unsigned int str3 : STR3_BITS;
+#endif
+  unsigned int off3_1 : OFF3_1_BITS;
+#ifdef OFF3_2_BITS
+  unsigned int off3_2 : OFF3_2_BITS;
+#endif
+#ifdef OFF3_3_BITS
+  unsigned int off3_3 : OFF3_3_BITS;
+#endif
+};
+
+
+typedef int (*opfct_t) (struct output_data *);
+
+
+static int
+data_prefix (struct output_data *d)
+{
+  char ch = '\0';
+  if (*d->prefixes & has_cs)
+    {
+      ch = 'c';
+      *d->prefixes &= ~has_cs;
+    }
+  else if (*d->prefixes & has_ds)
+    {
+      ch = 'd';
+      *d->prefixes &= ~has_ds;
+    }
+  else if (*d->prefixes & has_es)
+    {
+      ch = 'e';
+      *d->prefixes &= ~has_es;
+    }
+  else if (*d->prefixes & has_fs)
+    {
+      ch = 'f';
+      *d->prefixes &= ~has_fs;
+    }
+  else if (*d->prefixes & has_gs)
+    {
+      ch = 'g';
+      *d->prefixes &= ~has_gs;
+    }
+  else if (*d->prefixes & has_ss)
+    {
+      ch = 's';
+      *d->prefixes &= ~has_ss;
+    }
+  else
+    return 0;
+
+  if (*d->bufcntp + 4 > d->bufsize)
+    return *d->bufcntp + 4 - d->bufsize;
+
+  d->bufp[(*d->bufcntp)++] = '%';
+  d->bufp[(*d->bufcntp)++] = ch;
+  d->bufp[(*d->bufcntp)++] = 's';
+  d->bufp[(*d->bufcntp)++] = ':';
+
+  return 0;
+}
+
+#ifdef X86_64
+static const char hiregs[8][4] =
+  {
+    "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
+  };
+static const char aregs[8][4] =
+  {
+    "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"
+  };
+static const char dregs[8][4] =
+  {
+    "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"
+  };
+#else
+static const char aregs[8][4] =
+  {
+    "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"
+  };
+# define dregs aregs
+#endif
+
+static int
+general_mod$r_m (struct output_data *d)
+{
+  int r = data_prefix (d);
+  if (r != 0)
+    return r;
+
+  int prefixes = *d->prefixes;
+  const uint8_t *data = &d->data[d->opoff1 / 8];
+  char *bufp = d->bufp;
+  size_t *bufcntp = d->bufcntp;
+  size_t bufsize = d->bufsize;
+
+  uint_fast8_t modrm = data[0];
+#ifndef X86_64
+  if (unlikely ((prefixes & has_addr16) != 0))
+    {
+      int16_t disp = 0;
+      bool nodisp = false;
+
+      if ((modrm & 0xc7) == 6 || (modrm & 0xc0) == 0x80)
+	/* 16 bit displacement.  */
+	disp = read_2sbyte_unaligned (&data[1]);
+      else if ((modrm & 0xc0) == 0x40)
+	/* 8 bit displacement.  */
+	disp = *(const int8_t *) &data[1];
+      else if ((modrm & 0xc0) == 0)
+	nodisp = true;
+
+      char tmpbuf[sizeof ("-0x1234(%rr,%rr)")];
+      int n;
+      if ((modrm & 0xc7) == 6)
+	n = snprintf (tmpbuf, sizeof (tmpbuf), "0x%" PRIx16, disp);
+      else
+	{
+	  n = 0;
+	  if (!nodisp)
+	    n = snprintf (tmpbuf, sizeof (tmpbuf), "%s0x%" PRIx16,
+			  disp < 0 ? "-" : "", disp < 0 ? -disp : disp);
+
+	  if ((modrm & 0x4) == 0)
+	    n += snprintf (tmpbuf + n, sizeof (tmpbuf) - n, "(%%b%c,%%%ci)",
+			   "xp"[(modrm >> 1) & 1], "sd"[modrm & 1]);
+	  else
+	    n += snprintf (tmpbuf + n, sizeof (tmpbuf) - n, "(%%%s)",
+			   ((const char [4][3]) { "si", "di", "bp", "bx" })[modrm & 3]);
+	}
+
+      if (*bufcntp + n + 1 > bufsize)
+	return *bufcntp + n + 1 - bufsize;
+
+      memcpy (&bufp[*bufcntp], tmpbuf, n + 1);
+      *bufcntp += n;
+    }
+  else
+#endif
+    {
+      if ((modrm & 7) != 4)
+	{
+	  int32_t disp = 0;
+	  bool nodisp = false;
+
+	  if ((modrm & 0xc7) == 5 || (modrm & 0xc0) == 0x80)
+	    /* 32 bit displacement.  */
+	    disp = read_4sbyte_unaligned (&data[1]);
+	  else if ((modrm & 0xc0) == 0x40)
+	    /* 8 bit displacement.  */
+	    disp = *(const int8_t *) &data[1];
+	  else if ((modrm & 0xc0) == 0)
+	    nodisp = true;
+
+	  char tmpbuf[sizeof ("-0x12345678(%rrrr)")];
+	  int n;
+	  if (nodisp)
+	    {
+	      n = snprintf (tmpbuf, sizeof (tmpbuf), "(%%%s)",
+#ifdef X86_64
+			    (prefixes & has_rex_b) ? hiregs[modrm & 7] :
+#endif
+			    aregs[modrm & 7]);
+#ifdef X86_64
+	      if (prefixes & has_addr16)
+		{
+		  if (prefixes & has_rex_b)
+		    tmpbuf[n++] = 'd';
+		  else
+		    tmpbuf[2] = 'e';
+		}
+#endif
+	    }
+	  else if ((modrm & 0xc7) != 5)
+	    {
+	      int p;
+	      n = snprintf (tmpbuf, sizeof (tmpbuf), "%s0x%" PRIx32 "(%%%n%s)",
+			    disp < 0 ? "-" : "", disp < 0 ? -disp : disp, &p,
+#ifdef X86_64
+			    (prefixes & has_rex_b) ? hiregs[modrm & 7] :
+#endif
+			    aregs[modrm & 7]);
+#ifdef X86_64
+	      if (prefixes & has_addr16)
+		{
+		  if (prefixes & has_rex_b)
+		    tmpbuf[n++] = 'd';
+		  else
+		    tmpbuf[p] = 'e';
+		}
+#endif
+	    }
+	  else
+	    {
+#ifdef X86_64
+	      n = snprintf (tmpbuf, sizeof (tmpbuf), "%s0x%" PRIx32 "(%%rip)",
+			    disp < 0 ? "-" : "", disp < 0 ? -disp : disp);
+
+	      d->symaddr_use = addr_rel_always;
+	      d->symaddr = disp;
+#else
+	      n = snprintf (tmpbuf, sizeof (tmpbuf), "0x%" PRIx32, disp);
+#endif
+	    }
+
+	  if (*bufcntp + n + 1 > bufsize)
+	    return *bufcntp + n + 1 - bufsize;
+
+	  memcpy (&bufp[*bufcntp], tmpbuf, n + 1);
+	  *bufcntp += n;
+	}
+      else
+	{
+	  /* SIB */
+	  uint_fast8_t sib = data[1];
+	  int32_t disp = 0;
+	  bool nodisp = false;
+
+	  if ((modrm & 0xc7) == 5 || (modrm & 0xc0) == 0x80
+	      || ((modrm & 0xc7) == 0x4 && (sib & 0x7) == 0x5))
+	    /* 32 bit displacement.  */
+	    disp = read_4sbyte_unaligned (&data[2]);
+	  else if ((modrm & 0xc0) == 0x40)
+	    /* 8 bit displacement.  */
+	    disp = *(const int8_t *) &data[2];
+	  else
+	    nodisp = true;
+
+	  char tmpbuf[sizeof ("-0x12345678(%rrrr,%rrrr,N)")];
+	  char *cp = tmpbuf;
+	  int n;
+	  if ((modrm & 0xc0) != 0 || (sib & 0x3f) != 0x25
+#ifdef X86_64
+	      || (prefixes & has_rex_x) != 0
+#endif
+	      )
+	    {
+	      if (!nodisp)
+		{
+		  n = snprintf (cp, sizeof (tmpbuf), "%s0x%" PRIx32,
+				disp < 0 ? "-" : "", disp < 0 ? -disp : disp);
+		  cp += n;
+		}
+
+	      *cp++ = '(';
+
+	      if ((modrm & 0xc7) != 0x4 || (sib & 0x7) != 0x5)
+		{
+		  *cp++ = '%';
+		  cp = stpcpy (cp,
+#ifdef X86_64
+			       (prefixes & has_rex_b) ? hiregs[sib & 7] :
+			       (prefixes & has_addr16) ? dregs[sib & 7] :
+#endif
+			       aregs[sib & 7]);
+#ifdef X86_64
+		  if ((prefixes & (has_rex_b | has_addr16))
+		      == (has_rex_b | has_addr16))
+		    *cp++ = 'd';
+#endif
+		}
+
+	      if ((sib & 0x38) != 0x20
+#ifdef X86_64
+		  || (prefixes & has_rex_x) != 0
+#endif
+		  )
+		{
+		  *cp++ = ',';
+		  *cp++ = '%';
+		  cp = stpcpy (cp,
+#ifdef X86_64
+			       (prefixes & has_rex_x)
+			       ? hiregs[(sib >> 3) & 7] :
+			       (prefixes & has_addr16)
+			       ? dregs[(sib >> 3) & 7] :
+#endif
+			       aregs[(sib >> 3) & 7]);
+#ifdef X86_64
+		  if ((prefixes & (has_rex_b | has_addr16))
+		      == (has_rex_b | has_addr16))
+		    *cp++ = 'd';
+#endif
+
+		  *cp++ = ',';
+		  *cp++ = '0' + (1 << (sib >> 6));
+		}
+
+	      *cp++ = ')';
+	    }
+	  else
+	    {
+	      assert (! nodisp);
+#ifdef X86_64
+	      if ((prefixes & has_addr16) == 0)
+		n = snprintf (cp, sizeof (tmpbuf), "0x%" PRIx64,
+			      (int64_t) disp);
+	      else
+#endif
+		n = snprintf (cp, sizeof (tmpbuf), "0x%" PRIx32, disp);
+	      cp += n;
+	    }
+
+	  if (*bufcntp + (cp - tmpbuf) > bufsize)
+	    return *bufcntp + (cp - tmpbuf) - bufsize;
+
+	  memcpy (&bufp[*bufcntp], tmpbuf, cp - tmpbuf);
+	  *bufcntp += cp - tmpbuf;
+	}
+    }
+  return 0;
+}
+
+
+static int
+FCT_MOD$R_M (struct output_data *d)
+{
+  assert (d->opoff1 % 8 == 0);
+  uint_fast8_t modrm = d->data[d->opoff1 / 8];
+  if ((modrm & 0xc0) == 0xc0)
+    {
+      assert (d->opoff1 / 8 == d->opoff2 / 8);
+      assert (d->opoff2 % 8 == 5);
+      //uint_fast8_t byte = d->data[d->opoff2 / 8] & 7;
+      uint_fast8_t byte = modrm & 7;
+
+      size_t *bufcntp = d->bufcntp;
+      char *buf = d->bufp + *bufcntp;
+      size_t avail = d->bufsize - *bufcntp;
+      int needed;
+      if (*d->prefixes & (has_rep | has_repne))
+	needed = snprintf (buf, avail, "%%%s", dregs[byte]);
+      else
+	needed = snprintf (buf, avail, "%%mm%" PRIxFAST8, byte);
+      if ((size_t) needed > avail)
+	return needed - avail;
+      *bufcntp += needed;
+      return 0;
+    }
+
+  return general_mod$r_m (d);
+}
+
+
+static int
+FCT_Mod$R_m (struct output_data *d)
+{
+  assert (d->opoff1 % 8 == 0);
+  uint_fast8_t modrm = d->data[d->opoff1 / 8];
+  if ((modrm & 0xc0) == 0xc0)
+    {
+      assert (d->opoff1 / 8 == d->opoff2 / 8);
+      assert (d->opoff2 % 8 == 5);
+      //uint_fast8_t byte = data[opoff2 / 8] & 7;
+      uint_fast8_t byte = modrm & 7;
+
+      size_t *bufcntp = d->bufcntp;
+      size_t avail = d->bufsize - *bufcntp;
+      int needed = snprintf (&d->bufp[*bufcntp], avail, "%%xmm%" PRIxFAST8,
+			     byte);
+      if ((size_t) needed > avail)
+	return needed - avail;
+      *d->bufcntp += needed;
+      return 0;
+    }
+
+  return general_mod$r_m (d);
+}
+
+static int
+generic_abs (struct output_data *d, const char *absstring
+#ifdef X86_64
+	     , int abslen
+#else
+# define abslen 4
+#endif
+	     )
+{
+  int r = data_prefix (d);
+  if (r != 0)
+    return r;
+
+  assert (d->opoff1 % 8 == 0);
+  assert (d->opoff1 / 8 == 1);
+  if (*d->param_start + abslen > d->end)
+    return -1;
+  *d->param_start += abslen;
+#ifndef X86_64
+  uint32_t absval;
+# define ABSPRIFMT PRIx32
+#else
+  uint64_t absval;
+# define ABSPRIFMT PRIx64
+  if (abslen == 8)
+    absval = read_8ubyte_unaligned (&d->data[1]);
+  else
+#endif
+    absval = read_4ubyte_unaligned (&d->data[1]);
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "%s0x%" ABSPRIFMT,
+			 absstring, absval);
+  if ((size_t) needed > avail)
+    return needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+static int
+FCT_absval (struct output_data *d)
+{
+  return generic_abs (d, "$"
+#ifdef X86_64
+		      , 4
+#endif
+		      );
+}
+
+static int
+FCT_abs (struct output_data *d)
+{
+  return generic_abs (d, ""
+#ifdef X86_64
+		      , 8
+#endif
+		      );
+}
+
+static int
+FCT_ax (struct output_data *d)
+{
+  int is_16bit = (*d->prefixes & has_data16) != 0;
+
+  size_t *bufcntp = d->bufcntp;
+  char *bufp = d->bufp;
+  size_t bufsize = d->bufsize;
+
+  if (*bufcntp + 4 - is_16bit > bufsize)
+    return *bufcntp + 4 - is_16bit - bufsize;
+
+  bufp[(*bufcntp)++] = '%';
+  if (! is_16bit)
+    bufp[(*bufcntp)++] = (
+#ifdef X86_64
+			  (*d->prefixes & has_rex_w) ? 'r' :
+#endif
+			  'e');
+  bufp[(*bufcntp)++] = 'a';
+  bufp[(*bufcntp)++] = 'x';
+
+  return 0;
+}
+
+
+static int
+FCT_ax$w (struct output_data *d)
+{
+  if ((d->data[d->opoff2 / 8] & (1 << (7 - (d->opoff2 & 7)))) != 0)
+    return FCT_ax (d);
+
+  size_t *bufcntp = d->bufcntp;
+  char *bufp = d->bufp;
+  size_t bufsize = d->bufsize;
+
+  if (*bufcntp + 3 > bufsize)
+    return *bufcntp + 3 - bufsize;
+
+  bufp[(*bufcntp)++] = '%';
+  bufp[(*bufcntp)++] = 'a';
+  bufp[(*bufcntp)++] = 'l';
+
+  return 0;
+}
+
+
+static int
+__attribute__ ((noinline))
+FCT_crdb (struct output_data *d, const char *regstr)
+{
+  if (*d->prefixes & has_data16)
+    return -1;
+
+  size_t *bufcntp = d->bufcntp;
+
+  // XXX If this assert is true, use absolute offset below
+  assert (d->opoff1 / 8 == 2);
+  assert (d->opoff1 % 8 == 2);
+  size_t avail = d->bufsize - *bufcntp;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "%%%s%" PRIx32,
+			 regstr, (uint32_t) (d->data[d->opoff1 / 8] >> 3) & 7);
+  if ((size_t) needed > avail)
+    return needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+static int
+FCT_ccc (struct output_data *d)
+{
+  return FCT_crdb (d, "cr");
+}
+
+
+static int
+FCT_ddd (struct output_data *d)
+{
+  return FCT_crdb (d, "db");
+}
+
+
+static int
+FCT_disp8 (struct output_data *d)
+{
+  assert (d->opoff1 % 8 == 0);
+  if (*d->param_start >= d->end)
+    return -1;
+  int32_t offset = *(const int8_t *) (*d->param_start)++;
+
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "0x%" PRIx32,
+			 (uint32_t) (d->addr + (*d->param_start - d->data)
+				     + offset));
+  if ((size_t) needed > avail)
+    return needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+static int
+__attribute__ ((noinline))
+FCT_ds_xx (struct output_data *d, const char *reg)
+{
+  int prefix = *d->prefixes & SEGMENT_PREFIXES;
+
+  if (prefix == 0)
+    *d->prefixes |= prefix = has_ds;
+  /* Make sure only one bit is set.  */
+  else if ((prefix - 1) & prefix)
+    return -1;
+
+  int r = data_prefix (d);
+
+  assert ((*d->prefixes & prefix) == 0);
+
+  if (r != 0)
+    return r;
+
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "(%%%s%s)",
+#ifdef X86_64
+			 *d->prefixes & idx_addr16 ? "e" : "r",
+#else
+			 *d->prefixes & idx_addr16 ? "" : "e",
+#endif
+			 reg);
+  if ((size_t) needed > avail)
+    return (size_t) needed - avail;
+  *bufcntp += needed;
+
+  return 0;
+}
+
+
+static int
+FCT_ds_bx (struct output_data *d)
+{
+  return FCT_ds_xx (d, "bx");
+}
+
+
+static int
+FCT_ds_si (struct output_data *d)
+{
+  return FCT_ds_xx (d, "si");
+}
+
+
+static int
+FCT_dx (struct output_data *d)
+{
+  size_t *bufcntp = d->bufcntp;
+
+  if (*bufcntp + 7 > d->bufsize)
+    return *bufcntp + 7 - d->bufsize;
+
+  memcpy (&d->bufp[*bufcntp], "(%dx)", 5);
+  *bufcntp += 5;
+
+  return 0;
+}
+
+
+static int
+FCT_es_di (struct output_data *d)
+{
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "%%es:(%%%sdi)",
+#ifdef X86_64
+			 *d->prefixes & idx_addr16 ? "e" : "r"
+#else
+			 *d->prefixes & idx_addr16 ? "" : "e"
+#endif
+			 );
+  if ((size_t) needed > avail)
+    return (size_t) needed - avail;
+  *bufcntp += needed;
+
+  return 0;
+}
+
+
+static int
+FCT_imm (struct output_data *d)
+{
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  int needed;
+  if (*d->prefixes & has_data16)
+    {
+      if (*d->param_start + 2 > d->end)
+	return -1;
+      uint16_t word = read_2ubyte_unaligned_inc (*d->param_start);
+      needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx16, word);
+    }
+  else
+    {
+      if (*d->param_start + 4 > d->end)
+	return -1;
+      int32_t word = read_4sbyte_unaligned_inc (*d->param_start);
+#ifdef X86_64
+      if (*d->prefixes & has_rex_w)
+	needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx64,
+			   (int64_t) word);
+      else
+#endif
+	needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx32, word);
+    }
+  if ((size_t) needed > avail)
+    return (size_t) needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+static int
+FCT_imm$w (struct output_data *d)
+{
+  if ((d->data[d->opoff2 / 8] & (1 << (7 - (d->opoff2 & 7)))) != 0)
+    return FCT_imm (d);
+
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  if (*d->param_start>= d->end)
+    return -1;
+  uint_fast8_t word = *(*d->param_start)++;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIxFAST8, word);
+  if ((size_t) needed > avail)
+    return (size_t) needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+#ifdef X86_64
+static int
+FCT_imm64$w (struct output_data *d)
+{
+  if ((d->data[d->opoff2 / 8] & (1 << (7 - (d->opoff2 & 7)))) == 0
+      || (*d->prefixes & has_data16) != 0)
+    return FCT_imm$w (d);
+
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  int needed;
+  if (*d->prefixes & has_rex_w)
+    {
+      if (*d->param_start + 8 > d->end)
+	return -1;
+      uint64_t word = read_8ubyte_unaligned_inc (*d->param_start);
+      needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx64, word);
+    }
+  else
+    {
+      if (*d->param_start + 4 > d->end)
+	return -1;
+      int32_t word = read_4sbyte_unaligned_inc (*d->param_start);
+      needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx32, word);
+    }
+  if ((size_t) needed > avail)
+    return (size_t) needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+#endif
+
+
+static int
+FCT_imms (struct output_data *d)
+{
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  if (*d->param_start>= d->end)
+    return -1;
+  int8_t byte = *(*d->param_start)++;
+#ifdef X86_64
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx64,
+			 (int64_t) byte);
+#else
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx32,
+			 (int32_t) byte);
+#endif
+  if ((size_t) needed > avail)
+    return (size_t) needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+static int
+FCT_imm$s (struct output_data *d)
+{
+  uint_fast8_t opcode = d->data[d->opoff2 / 8];
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  if ((opcode & 2) != 0)
+    return FCT_imms (d);
+
+  if ((*d->prefixes & has_data16) == 0)
+    {
+      if (*d->param_start + 4 > d->end)
+	return -1;
+      int32_t word = read_4sbyte_unaligned_inc (*d->param_start);
+#ifdef X86_64
+      int needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx64,
+			     (int64_t) word);
+#else
+      int needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx32, word);
+#endif
+      if ((size_t) needed > avail)
+	return (size_t) needed - avail;
+      *bufcntp += needed;
+    }
+  else
+    {
+      if (*d->param_start + 2 > d->end)
+	return -1;
+      uint16_t word = read_2ubyte_unaligned_inc (*d->param_start);
+      int needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx16, word);
+      if ((size_t) needed > avail)
+	return (size_t) needed - avail;
+      *bufcntp += needed;
+    }
+  return 0;
+}
+
+
+static int
+FCT_imm16 (struct output_data *d)
+{
+  if (*d->param_start + 2 > d->end)
+    return -1;
+  uint16_t word = read_2ubyte_unaligned_inc (*d->param_start);
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx16, word);
+  if ((size_t) needed > avail)
+    return (size_t) needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+static int
+FCT_imms8 (struct output_data *d)
+{
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  if (*d->param_start >= d->end)
+    return -1;
+  int_fast8_t byte = *(*d->param_start)++;
+  int needed;
+#ifdef X86_64
+  if (*d->prefixes & has_rex_w)
+    needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx64,
+		       (int64_t) byte);
+  else
+#endif
+    needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx32,
+		       (int32_t) byte);
+  if ((size_t) needed > avail)
+    return (size_t) needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+static int
+FCT_imm8 (struct output_data *d)
+{
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  if (*d->param_start >= d->end)
+    return -1;
+  uint_fast8_t byte = *(*d->param_start)++;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx32,
+			 (uint32_t) byte);
+  if ((size_t) needed > avail)
+    return (size_t) needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+static int
+FCT_rel (struct output_data *d)
+{
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  if (*d->param_start + 4 > d->end)
+    return -1;
+  int32_t rel = read_4sbyte_unaligned_inc (*d->param_start);
+#ifdef X86_64
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "0x%" PRIx64,
+			 (uint64_t) (d->addr + rel
+				     + (*d->param_start - d->data)));
+#else
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "0x%" PRIx32,
+			 (uint32_t) (d->addr + rel
+				     + (*d->param_start - d->data)));
+#endif
+  if ((size_t) needed > avail)
+    return (size_t) needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+static int
+FCT_mmxreg (struct output_data *d)
+{
+  uint_fast8_t byte = d->data[d->opoff1 / 8];
+  assert (d->opoff1 % 8 == 2 || d->opoff1 % 8 == 5);
+  byte = (byte >> (5 - d->opoff1 % 8)) & 7;
+  size_t *bufcntp =  d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "%%mm%" PRIxFAST8, byte);
+  if ((size_t) needed > avail)
+    return needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+static int
+FCT_mod$r_m (struct output_data *d)
+{
+  assert (d->opoff1 % 8 == 0);
+  uint_fast8_t modrm = d->data[d->opoff1 / 8];
+  if ((modrm & 0xc0) == 0xc0)
+    {
+      int prefixes = *d->prefixes;
+      if (prefixes & has_addr16)
+	return -1;
+
+      int is_16bit = (prefixes & has_data16) != 0;
+
+      size_t *bufcntp = d->bufcntp;
+      char *bufp = d->bufp;
+      if (*bufcntp + 5 - is_16bit > d->bufsize)
+	return *bufcntp + 5 - is_16bit - d->bufsize;
+      bufp[(*bufcntp)++] = '%';
+
+      char *cp;
+#ifdef X86_64
+      if ((prefixes & has_rex_b) != 0 && !is_16bit)
+	{
+	  cp = stpcpy (&bufp[*bufcntp], hiregs[modrm & 7]);
+	  if ((prefixes & has_rex_w) == 0)
+	    *cp++ = 'd';
+	}
+      else
+#endif
+	{
+	  cp = stpcpy (&bufp[*bufcntp], dregs[modrm & 7] + is_16bit);
+#ifdef X86_64
+	  if ((prefixes & has_rex_w) != 0)
+	    bufp[*bufcntp] = 'r';
+#endif
+	}
+      *bufcntp = cp - bufp;
+      return 0;
+    }
+
+  return general_mod$r_m (d);
+}
+
+
+#ifndef X86_64
+static int
+FCT_moda$r_m (struct output_data *d)
+{
+  assert (d->opoff1 % 8 == 0);
+  uint_fast8_t modrm = d->data[d->opoff1 / 8];
+  if ((modrm & 0xc0) == 0xc0)
+    {
+      if (*d->prefixes & has_addr16)
+	return -1;
+
+      size_t *bufcntp = d->bufcntp;
+      if (*bufcntp + 3 > d->bufsize)
+	return *bufcntp + 3 - d->bufsize;
+
+      memcpy (&d->bufp[*bufcntp], "???", 3);
+      *bufcntp += 3;
+
+      return 0;
+    }
+
+  return general_mod$r_m (d);
+}
+#endif
+
+
+#ifdef X86_64
+static const char rex_8bit[8][3] =
+  {
+    [0] = "a", [1] = "c", [2] = "d", [3] = "b",
+    [4] = "sp", [5] = "bp", [6] = "si", [7] = "di"
+  };
+#endif
+
+
+static int
+FCT_mod$r_m$w (struct output_data *d)
+{
+  assert (d->opoff1 % 8 == 0);
+  const uint8_t *data = d->data;
+  uint_fast8_t modrm = data[d->opoff1 / 8];
+  if ((modrm & 0xc0) == 0xc0)
+    {
+      int prefixes = *d->prefixes;
+
+      if (prefixes & has_addr16)
+	return -1;
+
+      size_t *bufcntp = d->bufcntp;
+      char *bufp = d->bufp;
+      if (*bufcntp + 5 > d->bufsize)
+	return *bufcntp + 5 - d->bufsize;
+
+      if ((data[d->opoff3 / 8] & (1 << (7 - (d->opoff3 & 7)))) == 0)
+	{
+	  bufp[(*bufcntp)++] = '%';
+
+#ifdef X86_64
+	  if (prefixes & has_rex)
+	    {
+	      if (prefixes & has_rex_r)
+		*bufcntp += snprintf (bufp + *bufcntp, d->bufsize - *bufcntp,
+				      "r%db", 8 + (modrm & 7));
+	      else
+		{
+		  char *cp = stpcpy (bufp + *bufcntp, hiregs[modrm & 7]);
+		  *cp++ = 'l';
+		  *bufcntp = cp - bufp;
+		}
+	    }
+	  else
+#endif
+	    {
+	      bufp[(*bufcntp)++] = "acdb"[modrm & 3];
+	      bufp[(*bufcntp)++] = "lh"[(modrm & 4) >> 2];
+	    }
+	}
+      else
+	{
+	  int is_16bit = (prefixes & has_data16) != 0;
+
+	  bufp[(*bufcntp)++] = '%';
+
+	  char *cp;
+#ifdef X86_64
+	  if ((prefixes & has_rex_b) != 0 && !is_16bit)
+	    {
+	      cp = stpcpy (&bufp[*bufcntp], hiregs[modrm & 7]);
+	      if ((prefixes & has_rex_w) == 0)
+		*cp++ = 'd';
+	    }
+	  else
+#endif
+	    {
+	      cp = stpcpy (&bufp[*bufcntp], dregs[modrm & 7] + is_16bit);
+#ifdef X86_64
+	      if ((prefixes & has_rex_w) != 0)
+		bufp[*bufcntp] = 'r';
+#endif
+	    }
+	  *bufcntp = cp - bufp;
+	}
+      return 0;
+    }
+
+  return general_mod$r_m (d);
+}
+
+
+static int
+FCT_mod$8r_m (struct output_data *d)
+{
+  assert (d->opoff1 % 8 == 0);
+  uint_fast8_t modrm = d->data[d->opoff1 / 8];
+  if ((modrm & 0xc0) == 0xc0)
+    {
+      size_t *bufcntp = d->bufcntp;
+      char *bufp = d->bufp;
+      if (*bufcntp + 3 > d->bufsize)
+	return *bufcntp + 3 - d->bufsize;
+      bufp[(*bufcntp)++] = '%';
+      bufp[(*bufcntp)++] = "acdb"[modrm & 3];
+      bufp[(*bufcntp)++] = "lh"[(modrm & 4) >> 2];
+      return 0;
+    }
+
+  return general_mod$r_m (d);
+}
+
+
+static int
+FCT_mod$16r_m (struct output_data *d)
+{
+  assert (d->opoff1 % 8 == 0);
+  uint_fast8_t modrm = d->data[d->opoff1 / 8];
+  if ((modrm & 0xc0) == 0xc0)
+    {
+      assert (d->opoff1 / 8 == d->opoff2 / 8);
+      //uint_fast8_t byte = data[opoff2 / 8] & 7;
+      uint_fast8_t byte = modrm & 7;
+
+      size_t *bufcntp = d->bufcntp;
+      if (*bufcntp + 3 > d->bufsize)
+	return *bufcntp + 3 - d->bufsize;
+      d->bufp[(*bufcntp)++] = '%';
+      memcpy (&d->bufp[*bufcntp], dregs[byte] + 1, sizeof (dregs[0]) - 1);
+      *bufcntp += 2;
+      return 0;
+    }
+
+  return general_mod$r_m (d);
+}
+
+
+#ifdef X86_64
+static int
+FCT_mod$64r_m (struct output_data *d)
+{
+  assert (d->opoff1 % 8 == 0);
+  uint_fast8_t modrm = d->data[d->opoff1 / 8];
+  if ((modrm & 0xc0) == 0xc0)
+    {
+      assert (d->opoff1 / 8 == d->opoff2 / 8);
+      //uint_fast8_t byte = data[opoff2 / 8] & 7;
+      uint_fast8_t byte = modrm & 7;
+
+      size_t *bufcntp = d->bufcntp;
+      if (*bufcntp + 4 > d->bufsize)
+	return *bufcntp + 4 - d->bufsize;
+      char *cp = &d->bufp[*bufcntp];
+      *cp++ = '%';
+      cp = stpcpy (cp,
+		   (*d->prefixes & has_rex_b) ? hiregs[byte] : aregs[byte]);
+      *bufcntp = cp - d->bufp;
+      return 0;
+    }
+
+  return general_mod$r_m (d);
+}
+#else
+static typeof (FCT_mod$r_m) FCT_mod$64r_m __attribute__ ((alias ("FCT_mod$r_m")));
+#endif
+
+
+static int
+FCT_reg (struct output_data *d)
+{
+  uint_fast8_t byte = d->data[d->opoff1 / 8];
+  assert (d->opoff1 % 8 + 3 <= 8);
+  byte >>= 8 - (d->opoff1 % 8 + 3);
+  byte &= 7;
+  int is_16bit = (*d->prefixes & has_data16) != 0;
+  size_t *bufcntp = d->bufcntp;
+  if (*bufcntp + 5 > d->bufsize)
+    return *bufcntp + 5 - d->bufsize;
+  d->bufp[(*bufcntp)++] = '%';
+#ifdef X86_64
+  if ((*d->prefixes & has_rex_r) != 0 && !is_16bit)
+    {
+      *bufcntp += snprintf (&d->bufp[*bufcntp], d->bufsize - *bufcntp, "r%d",
+			    8 + byte);
+      if ((*d->prefixes & has_rex_w) == 0)
+	d->bufp[(*bufcntp)++] = 'd';
+    }
+  else
+#endif
+    {
+      memcpy (&d->bufp[*bufcntp], dregs[byte] + is_16bit, 3 - is_16bit);
+#ifdef X86_64
+      if ((*d->prefixes & has_rex_w) != 0 && !is_16bit)
+	d->bufp[*bufcntp] = 'r';
+#endif
+      *bufcntp += 3 - is_16bit;
+    }
+  return 0;
+}
+
+
+#ifdef X86_64
+static int
+FCT_oreg (struct output_data *d)
+{
+  /* Special form where register comes from opcode.  The rex.B bit is used,
+     rex.R and rex.X are ignored.  */
+  int save_prefixes = *d->prefixes;
+
+  *d->prefixes = ((save_prefixes & ~has_rex_r)
+		  | ((save_prefixes & has_rex_b) << (idx_rex_r - idx_rex_b)));
+
+  int r = FCT_reg (d);
+
+  *d->prefixes = save_prefixes;
+
+  return r;
+}
+#endif
+
+
+static int
+FCT_reg64 (struct output_data *d)
+{
+  uint_fast8_t byte = d->data[d->opoff1 / 8];
+  assert (d->opoff1 % 8 + 3 <= 8);
+  byte >>= 8 - (d->opoff1 % 8 + 3);
+  byte &= 7;
+  if ((*d->prefixes & has_data16) != 0)
+    return -1;
+  size_t *bufcntp = d->bufcntp;
+  if (*bufcntp + 5 > d->bufsize)
+    return *bufcntp + 5 - d->bufsize;
+  d->bufp[(*bufcntp)++] = '%';
+#ifdef X86_64
+  if ((*d->prefixes & has_rex_r) != 0)
+    {
+      *bufcntp += snprintf (&d->bufp[*bufcntp], d->bufsize - *bufcntp, "r%d",
+			    8 + byte);
+      if ((*d->prefixes & has_rex_w) == 0)
+	d->bufp[(*bufcntp)++] = 'd';
+    }
+  else
+#endif
+    {
+      memcpy (&d->bufp[*bufcntp], aregs[byte], 3);
+      *bufcntp += 3;
+    }
+  return 0;
+}
+
+
+static int
+FCT_reg$w (struct output_data *d)
+{
+  if (d->data[d->opoff2 / 8] & (1 << (7 - (d->opoff2 & 7))))
+    return FCT_reg (d);
+
+  uint_fast8_t byte = d->data[d->opoff1 / 8];
+  assert (d->opoff1 % 8 + 3 <= 8);
+  byte >>= 8 - (d->opoff1 % 8 + 3);
+  byte &= 7;
+
+  size_t *bufcntp = d->bufcntp;
+  if (*bufcntp + 4 > d->bufsize)
+    return *bufcntp + 4 - d->bufsize;
+
+  d->bufp[(*bufcntp)++] = '%';
+
+#ifdef X86_64
+  if (*d->prefixes & has_rex)
+    {
+      if (*d->prefixes & has_rex_r)
+	*bufcntp += snprintf (d->bufp + *bufcntp, d->bufsize - *bufcntp,
+			      "r%db", 8 + byte);
+      else
+	{
+	  char* cp = stpcpy (d->bufp + *bufcntp, rex_8bit[byte]);
+	  *cp++ = 'l';
+	  *bufcntp = cp - d->bufp;
+	}
+    }
+  else
+#endif
+    {
+      d->bufp[(*bufcntp)++] = "acdb"[byte & 3];
+      d->bufp[(*bufcntp)++] = "lh"[byte >> 2];
+    }
+  return 0;
+}
+
+
+#ifdef X86_64
+static int
+FCT_oreg$w (struct output_data *d)
+{
+  /* Special form where register comes from opcode.  The rex.B bit is used,
+     rex.R and rex.X are ignored.  */
+  int save_prefixes = *d->prefixes;
+
+  *d->prefixes = ((save_prefixes & ~has_rex_r)
+		  | ((save_prefixes & has_rex_b) << (idx_rex_r - idx_rex_b)));
+
+  int r = FCT_reg$w (d);
+
+  *d->prefixes = save_prefixes;
+
+  return r;
+}
+#endif
+
+
+static int
+FCT_freg (struct output_data *d)
+{
+  assert (d->opoff1 / 8 == 1);
+  assert (d->opoff1 % 8 == 5);
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "%%st(%" PRIx32 ")",
+			 (uint32_t) (d->data[1] & 7));
+  if ((size_t) needed > avail)
+    return (size_t) needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+#ifndef X86_64
+static int
+FCT_reg16 (struct output_data *d)
+{
+  if (*d->prefixes & has_data16)
+    return -1;
+
+  *d->prefixes |= has_data16;
+  return FCT_reg (d);
+}
+#endif
+
+
+static int
+FCT_sel (struct output_data *d)
+{
+  assert (d->opoff1 % 8 == 0);
+  assert (d->opoff1 / 8 == 5);
+  if (*d->param_start + 2 > d->end)
+    return -1;
+  *d->param_start += 2;
+  uint16_t absval = read_2ubyte_unaligned (&d->data[5]);
+
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "$0x%" PRIx16, absval);
+  if ((size_t) needed > avail)
+    return needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
+
+
+static int
+FCT_sreg2 (struct output_data *d)
+{
+  uint_fast8_t byte = d->data[d->opoff1 / 8];
+  assert (d->opoff1 % 8 + 3 <= 8);
+  byte >>= 8 - (d->opoff1 % 8 + 2);
+
+  size_t *bufcntp = d->bufcntp;
+  char *bufp = d->bufp;
+  if (*bufcntp + 3 > d->bufsize)
+    return *bufcntp + 3 - d->bufsize;
+
+  bufp[(*bufcntp)++] = '%';
+  bufp[(*bufcntp)++] = "ecsd"[byte & 3];
+  bufp[(*bufcntp)++] = 's';
+
+  return 0;
+}
+
+
+static int
+FCT_sreg3 (struct output_data *d)
+{
+  uint_fast8_t byte = d->data[d->opoff1 / 8];
+  assert (d->opoff1 % 8 + 4 <= 8);
+  byte >>= 8 - (d->opoff1 % 8 + 3);
+
+  if ((byte & 7) >= 6)
+    return -1;
+
+  size_t *bufcntp = d->bufcntp;
+  char *bufp = d->bufp;
+  if (*bufcntp + 3 > d->bufsize)
+    return *bufcntp + 3 - d->bufsize;
+
+  bufp[(*bufcntp)++] = '%';
+  bufp[(*bufcntp)++] = "ecsdfg"[byte & 7];
+  bufp[(*bufcntp)++] = 's';
+
+  return 0;
+}
+
+
+static int
+FCT_string (struct output_data *d __attribute__ ((unused)))
+{
+  return 0;
+}
+
+
+static int
+FCT_xmmreg (struct output_data *d)
+{
+  uint_fast8_t byte = d->data[d->opoff1 / 8];
+  assert (d->opoff1 % 8 == 2 || d->opoff1 % 8 == 5);
+  byte = (byte >> (5 - d->opoff1 % 8)) & 7;
+
+  size_t *bufcntp = d->bufcntp;
+  size_t avail = d->bufsize - *bufcntp;
+  int needed = snprintf (&d->bufp[*bufcntp], avail, "%%xmm%" PRIxFAST8, byte);
+  if ((size_t) needed > avail)
+    return needed - avail;
+  *bufcntp += needed;
+  return 0;
+}
diff --git a/libcpu/i386_disasm.c b/libcpu/i386_disasm.c
new file mode 100644
index 0000000..a7e03f9
--- /dev/null
+++ b/libcpu/i386_disasm.c
@@ -0,0 +1,1149 @@
+/* Disassembler for x86.
+   Copyright (C) 2007, 2008, 2009, 2011 Red Hat, Inc.
+   This file is part of elfutils.
+   Written by Ulrich Drepper <drepper@redhat.com>, 2007.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <assert.h>
+#include <config.h>
+#include <ctype.h>
+#include <endian.h>
+#include <errno.h>
+#include <gelf.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../libebl/libeblP.h"
+
+#define MACHINE_ENCODING __LITTLE_ENDIAN
+#include "memory-access.h"
+
+
+#ifndef MNEFILE
+# define MNEFILE "i386.mnemonics"
+#endif
+
+#define MNESTRFIELD(line) MNESTRFIELD1 (line)
+#define MNESTRFIELD1(line) str##line
+static const union mnestr_t
+{
+  struct
+  {
+#define MNE(name) char MNESTRFIELD (__LINE__)[sizeof (#name)];
+#include MNEFILE
+#undef MNE
+  };
+  char str[0];
+} mnestr =
+  {
+    {
+#define MNE(name) #name,
+#include MNEFILE
+#undef MNE
+    }
+  };
+
+/* The index can be stored in the instrtab.  */
+enum
+  {
+#define MNE(name) MNE_##name,
+#include MNEFILE
+#undef MNE
+    MNE_INVALID
+  };
+
+static const unsigned short int mneidx[] =
+  {
+#define MNE(name) \
+  [MNE_##name] = offsetof (union mnestr_t, MNESTRFIELD (__LINE__)),
+#include MNEFILE
+#undef MNE
+  };
+
+
+enum
+  {
+    idx_rex_b = 0,
+    idx_rex_x,
+    idx_rex_r,
+    idx_rex_w,
+    idx_rex,
+    idx_cs,
+    idx_ds,
+    idx_es,
+    idx_fs,
+    idx_gs,
+    idx_ss,
+    idx_data16,
+    idx_addr16,
+    idx_rep,
+    idx_repne,
+    idx_lock
+  };
+
+enum
+  {
+#define prefbit(pref) has_##pref = 1 << idx_##pref
+    prefbit (rex_b),
+    prefbit (rex_x),
+    prefbit (rex_r),
+    prefbit (rex_w),
+    prefbit (rex),
+    prefbit (cs),
+    prefbit (ds),
+    prefbit (es),
+    prefbit (fs),
+    prefbit (gs),
+    prefbit (ss),
+    prefbit (data16),
+    prefbit (addr16),
+    prefbit (rep),
+    prefbit (repne),
+    prefbit (lock)
+#undef prefbit
+  };
+#define SEGMENT_PREFIXES \
+  (has_cs | has_ds | has_es | has_fs | has_gs | has_ss)
+
+#define prefix_cs	0x2e
+#define prefix_ds	0x3e
+#define prefix_es	0x26
+#define prefix_fs	0x64
+#define prefix_gs	0x65
+#define prefix_ss	0x36
+#define prefix_data16	0x66
+#define prefix_addr16	0x67
+#define prefix_rep	0xf3
+#define prefix_repne	0xf2
+#define prefix_lock	0xf0
+
+
+static const uint8_t known_prefixes[] =
+  {
+#define newpref(pref) [idx_##pref] = prefix_##pref
+    newpref (cs),
+    newpref (ds),
+    newpref (es),
+    newpref (fs),
+    newpref (gs),
+    newpref (ss),
+    newpref (data16),
+    newpref (addr16),
+    newpref (rep),
+    newpref (repne),
+    newpref (lock)
+#undef newpref
+  };
+#define nknown_prefixes (sizeof (known_prefixes) / sizeof (known_prefixes[0]))
+
+
+#if 0
+static const char *prefix_str[] =
+  {
+#define newpref(pref) [idx_##pref] = #pref
+    newpref (cs),
+    newpref (ds),
+    newpref (es),
+    newpref (fs),
+    newpref (gs),
+    newpref (ss),
+    newpref (data16),
+    newpref (addr16),
+    newpref (rep),
+    newpref (repne),
+    newpref (lock)
+#undef newpref
+  };
+#endif
+
+
+static const char amd3dnowstr[] =
+#define MNE_3DNOW_PAVGUSB 1
+  "pavgusb\0"
+#define MNE_3DNOW_PFADD (MNE_3DNOW_PAVGUSB + 8)
+  "pfadd\0"
+#define MNE_3DNOW_PFSUB (MNE_3DNOW_PFADD + 6)
+  "pfsub\0"
+#define MNE_3DNOW_PFSUBR (MNE_3DNOW_PFSUB + 6)
+  "pfsubr\0"
+#define MNE_3DNOW_PFACC (MNE_3DNOW_PFSUBR + 7)
+  "pfacc\0"
+#define MNE_3DNOW_PFCMPGE (MNE_3DNOW_PFACC + 6)
+  "pfcmpge\0"
+#define MNE_3DNOW_PFCMPGT (MNE_3DNOW_PFCMPGE + 8)
+  "pfcmpgt\0"
+#define MNE_3DNOW_PFCMPEQ (MNE_3DNOW_PFCMPGT + 8)
+  "pfcmpeq\0"
+#define MNE_3DNOW_PFMIN (MNE_3DNOW_PFCMPEQ + 8)
+  "pfmin\0"
+#define MNE_3DNOW_PFMAX (MNE_3DNOW_PFMIN + 6)
+  "pfmax\0"
+#define MNE_3DNOW_PI2FD (MNE_3DNOW_PFMAX + 6)
+  "pi2fd\0"
+#define MNE_3DNOW_PF2ID (MNE_3DNOW_PI2FD + 6)
+  "pf2id\0"
+#define MNE_3DNOW_PFRCP (MNE_3DNOW_PF2ID + 6)
+  "pfrcp\0"
+#define MNE_3DNOW_PFRSQRT (MNE_3DNOW_PFRCP + 6)
+  "pfrsqrt\0"
+#define MNE_3DNOW_PFMUL (MNE_3DNOW_PFRSQRT + 8)
+  "pfmul\0"
+#define MNE_3DNOW_PFRCPIT1 (MNE_3DNOW_PFMUL + 6)
+  "pfrcpit1\0"
+#define MNE_3DNOW_PFRSQIT1 (MNE_3DNOW_PFRCPIT1 + 9)
+  "pfrsqit1\0"
+#define MNE_3DNOW_PFRCPIT2 (MNE_3DNOW_PFRSQIT1 + 9)
+  "pfrcpit2\0"
+#define MNE_3DNOW_PMULHRW (MNE_3DNOW_PFRCPIT2 + 9)
+  "pmulhrw";
+
+#define AMD3DNOW_LOW_IDX 0x0d
+#define AMD3DNOW_HIGH_IDX (sizeof (amd3dnow) + AMD3DNOW_LOW_IDX - 1)
+#define AMD3DNOW_IDX(val) ((val) - AMD3DNOW_LOW_IDX)
+static const unsigned char amd3dnow[] =
+  {
+    [AMD3DNOW_IDX (0xbf)] = MNE_3DNOW_PAVGUSB,
+    [AMD3DNOW_IDX (0x9e)] = MNE_3DNOW_PFADD,
+    [AMD3DNOW_IDX (0x9a)] = MNE_3DNOW_PFSUB,
+    [AMD3DNOW_IDX (0xaa)] = MNE_3DNOW_PFSUBR,
+    [AMD3DNOW_IDX (0xae)] = MNE_3DNOW_PFACC,
+    [AMD3DNOW_IDX (0x90)] = MNE_3DNOW_PFCMPGE,
+    [AMD3DNOW_IDX (0xa0)] = MNE_3DNOW_PFCMPGT,
+    [AMD3DNOW_IDX (0xb0)] = MNE_3DNOW_PFCMPEQ,
+    [AMD3DNOW_IDX (0x94)] = MNE_3DNOW_PFMIN,
+    [AMD3DNOW_IDX (0xa4)] = MNE_3DNOW_PFMAX,
+    [AMD3DNOW_IDX (0x0d)] = MNE_3DNOW_PI2FD,
+    [AMD3DNOW_IDX (0x1d)] = MNE_3DNOW_PF2ID,
+    [AMD3DNOW_IDX (0x96)] = MNE_3DNOW_PFRCP,
+    [AMD3DNOW_IDX (0x97)] = MNE_3DNOW_PFRSQRT,
+    [AMD3DNOW_IDX (0xb4)] = MNE_3DNOW_PFMUL,
+    [AMD3DNOW_IDX (0xa6)] = MNE_3DNOW_PFRCPIT1,
+    [AMD3DNOW_IDX (0xa7)] = MNE_3DNOW_PFRSQIT1,
+    [AMD3DNOW_IDX (0xb6)] = MNE_3DNOW_PFRCPIT2,
+    [AMD3DNOW_IDX (0xb7)] = MNE_3DNOW_PMULHRW
+  };
+
+
+struct output_data
+{
+  GElf_Addr addr;
+  int *prefixes;
+  size_t opoff1;
+  size_t opoff2;
+  size_t opoff3;
+  char *bufp;
+  size_t *bufcntp;
+  size_t bufsize;
+  const uint8_t *data;
+  const uint8_t **param_start;
+  const uint8_t *end;
+  char *labelbuf;
+  size_t labelbufsize;
+  enum
+    {
+      addr_none = 0,
+      addr_abs_symbolic,
+      addr_abs_always,
+      addr_rel_symbolic,
+      addr_rel_always
+    } symaddr_use;
+  GElf_Addr symaddr;
+};
+
+
+#ifndef DISFILE
+# define DISFILE "i386_dis.h"
+#endif
+#include DISFILE
+
+
+#define ADD_CHAR(ch) \
+  do {									      \
+    if (unlikely (bufcnt == bufsize))					      \
+      goto enomem;							      \
+    buf[bufcnt++] = (ch);						      \
+  } while (0)
+
+#define ADD_STRING(str) \
+  do {									      \
+    const char *_str0 = (str);						      \
+    size_t _len0 = strlen (_str0);					      \
+    ADD_NSTRING (_str0, _len0);						      \
+  } while (0)
+
+#define ADD_NSTRING(str, len) \
+  do {									      \
+    const char *_str = (str);						      \
+    size_t _len = (len);						      \
+    if (unlikely (bufcnt + _len > bufsize))				      \
+      goto enomem;							      \
+    memcpy (buf + bufcnt, _str, _len);					      \
+    bufcnt += _len;							      \
+  } while (0)
+
+
+int
+i386_disasm (Ebl *ebl __attribute__((unused)),
+	     const uint8_t **startp, const uint8_t *end, GElf_Addr addr,
+	     const char *fmt, DisasmOutputCB_t outcb, DisasmGetSymCB_t symcb,
+	     void *outcbarg, void *symcbarg)
+{
+  const char *save_fmt = fmt;
+
+#define BUFSIZE 512
+  char initbuf[BUFSIZE];
+  int prefixes;
+  size_t bufcnt;
+  size_t bufsize = BUFSIZE;
+  char *buf = initbuf;
+  const uint8_t *param_start;
+
+  struct output_data output_data =
+    {
+      .prefixes = &prefixes,
+      .bufp = buf,
+      .bufsize = bufsize,
+      .bufcntp = &bufcnt,
+      .param_start = &param_start,
+      .end = end
+    };
+
+  int retval = 0;
+  while (1)
+    {
+      prefixes = 0;
+
+      const uint8_t *data = *startp;
+      const uint8_t *begin = data;
+
+      /* Recognize all prefixes.  */
+      int last_prefix_bit = 0;
+      while (data < end)
+	{
+	  unsigned int i;
+	  for (i = idx_cs; i < nknown_prefixes; ++i)
+	    if (known_prefixes[i] == *data)
+	      break;
+	  if (i == nknown_prefixes)
+	    break;
+
+	  prefixes |= last_prefix_bit = 1 << i;
+
+	  ++data;
+	}
+
+#ifdef X86_64
+      if (data < end && (*data & 0xf0) == 0x40)
+	prefixes |= ((*data++) & 0xf) | has_rex;
+#endif
+
+      bufcnt = 0;
+      size_t cnt = 0;
+
+      const uint8_t *curr = match_data;
+      const uint8_t *const match_end = match_data + sizeof (match_data);
+
+      assert (data <= end);
+      if (data == end)
+	{
+	  if (prefixes != 0)
+	    goto print_prefix;
+
+	  retval = -1;
+	  goto do_ret;
+	}
+
+    next_match:
+      while (curr < match_end)
+	{
+	  uint_fast8_t len = *curr++;
+	  uint_fast8_t clen = len >> 4;
+	  len &= 0xf;
+	  const uint8_t *next_curr = curr + clen + (len - clen) * 2;
+
+	  assert (len > 0);
+	  assert (curr + clen + 2 * (len - clen) <= match_end);
+
+	  const uint8_t *codep = data;
+	  int correct_prefix = 0;
+	  int opoff = 0;
+
+	  if (data > begin && codep[-1] == *curr && clen > 0)
+	    {
+	      /* We match a prefix byte.  This is exactly one byte and
+		 is matched exactly, without a mask.  */
+	      --len;
+	      --clen;
+	      opoff = 8;
+
+	      ++curr;
+
+	      assert (last_prefix_bit != 0);
+	      correct_prefix = last_prefix_bit;
+	    }
+
+	  size_t avail = len;
+	  while (clen > 0)
+	    {
+	      if (*codep++ != *curr++)
+		goto not;
+	      --avail;
+	      --clen;
+	      if (codep == end && avail > 0)
+		goto do_ret;
+	    }
+
+	  while (avail > 0)
+	    {
+	      uint_fast8_t masked = *codep++ & *curr++;
+	      if (masked != *curr++)
+		{
+		not:
+		  curr = next_curr;
+		  ++cnt;
+		  bufcnt = 0;
+		  goto next_match;
+		}
+
+	      --avail;
+	      if (codep == end && avail > 0)
+		goto do_ret;
+	    }
+
+	  if (len > end - data)
+	    /* There is not enough data for the entire instruction.  The
+	       caller can figure this out by looking at the pointer into
+	       the input data.  */
+	    goto do_ret;
+
+	  assert (correct_prefix == 0
+		  || (prefixes & correct_prefix) != 0);
+	  prefixes ^= correct_prefix;
+
+	  if (0)
+	    {
+	      /* Resize the buffer.  */
+	      char *oldbuf;
+	    enomem:
+	      oldbuf = buf;
+	      if (buf == initbuf)
+		buf = malloc (2 * bufsize);
+	      else
+		buf = realloc (buf, 2 * bufsize);
+	      if (buf == NULL)
+		{
+		  buf = oldbuf;
+		  retval = ENOMEM;
+		  goto do_ret;
+		}
+	      bufsize *= 2;
+
+	      output_data.bufp = buf;
+	      output_data.bufsize = bufsize;
+	      bufcnt = 0;
+
+	      if (data == end)
+		{
+		  assert (prefixes != 0);
+		  goto print_prefix;
+		}
+
+	      /* gcc is not clever enough to see the following variables
+		 are not used uninitialized.  */
+	      asm (""
+		   : "=mr" (opoff), "=mr" (correct_prefix), "=mr" (codep),
+		     "=mr" (next_curr), "=mr" (len));
+	    }
+
+	  size_t prefix_size = 0;
+
+	  // XXXonly print as prefix if valid?
+	  if ((prefixes & has_lock) != 0)
+	    {
+	      ADD_STRING ("lock ");
+	      prefix_size += 5;
+	    }
+
+	  if (instrtab[cnt].rep)
+	    {
+	      if ((prefixes & has_rep) !=  0)
+		{
+		  ADD_STRING ("rep ");
+		  prefix_size += 4;
+		}
+	    }
+	  else if (instrtab[cnt].repe
+		   && (prefixes & (has_rep | has_repne)) != 0)
+	    {
+	      if ((prefixes & has_repne) != 0)
+		{
+		  ADD_STRING ("repne ");
+		  prefix_size += 6;
+		}
+	      else if ((prefixes & has_rep) != 0)
+		{
+		  ADD_STRING ("repe ");
+		  prefix_size += 5;
+		}
+	    }
+	  else if ((prefixes & (has_rep | has_repne)) != 0)
+	    {
+	      uint_fast8_t byte;
+	    print_prefix:
+	      bufcnt = 0;
+	      byte = *begin;
+	      /* This is a prefix byte.  Print it.  */
+	      switch (byte)
+		{
+		case prefix_rep:
+		  ADD_STRING ("rep");
+		  break;
+		case prefix_repne:
+		  ADD_STRING ("repne");
+		  break;
+		case prefix_cs:
+		  ADD_STRING ("cs");
+		  break;
+		case prefix_ds:
+		  ADD_STRING ("ds");
+		  break;
+		case prefix_es:
+		  ADD_STRING ("es");
+		  break;
+		case prefix_fs:
+		  ADD_STRING ("fs");
+		  break;
+		case prefix_gs:
+		  ADD_STRING ("gs");
+		  break;
+		case prefix_ss:
+		  ADD_STRING ("ss");
+		  break;
+		case prefix_data16:
+		  ADD_STRING ("data16");
+		  break;
+		case prefix_addr16:
+		  ADD_STRING ("addr16");
+		  break;
+		case prefix_lock:
+		  ADD_STRING ("lock");
+		  break;
+#ifdef X86_64
+		case 0x40 ... 0x4f:
+		  ADD_STRING ("rex");
+		  if (byte != 0x40)
+		    {
+		      ADD_CHAR ('.');
+		      if (byte & 0x8)
+			ADD_CHAR ('w');
+		      if (byte & 0x4)
+			ADD_CHAR ('r');
+		      if (byte & 0x3)
+			ADD_CHAR ('x');
+		      if (byte & 0x1)
+			ADD_CHAR ('b');
+		    }
+		  break;
+#endif
+		default:
+		  /* Cannot happen.  */
+		  puts ("unknown prefix");
+		  abort ();
+		}
+	      data = begin + 1;
+	      ++addr;
+
+	      goto out;
+	    }
+
+	  /* We have a match.  First determine how many bytes are
+	     needed for the adressing mode.  */
+	  param_start = codep;
+	  if (instrtab[cnt].modrm)
+	    {
+	      uint_fast8_t modrm = codep[-1];
+
+#ifndef X86_64
+	      if (likely ((prefixes & has_addr16) != 0))
+		{
+		  /* Account for displacement.  */
+		  if ((modrm & 0xc7) == 6 || (modrm & 0xc0) == 0x80)
+		    param_start += 2;
+		  else if ((modrm & 0xc0) == 0x40)
+		    param_start += 1;
+		}
+	      else
+#endif
+		{
+		  /* Account for SIB.  */
+		  if ((modrm & 0xc0) != 0xc0 && (modrm & 0x7) == 0x4)
+		    param_start += 1;
+
+		  /* Account for displacement.  */
+		  if ((modrm & 0xc7) == 5 || (modrm & 0xc0) == 0x80
+		      || ((modrm & 0xc7) == 0x4 && (codep[0] & 0x7) == 0x5))
+		    param_start += 4;
+		  else if ((modrm & 0xc0) == 0x40)
+		    param_start += 1;
+		}
+
+	      if (unlikely (param_start > end))
+		goto not;
+	    }
+
+	  output_data.addr = addr + (data - begin);
+	  output_data.data = data;
+
+	  unsigned long string_end_idx = 0;
+	  fmt = save_fmt;
+	  const char *deferred_start = NULL;
+	  size_t deferred_len = 0;
+	  // XXX Can we get this from color.c?
+	  static const char color_off[] = "\e[0m";
+	  while (*fmt != '\0')
+	    {
+	      if (*fmt != '%')
+		{
+		  char ch = *fmt++;
+		  if (ch == '\\')
+		    {
+		      switch ((ch = *fmt++))
+			{
+			case '0' ... '7':
+			  {
+			    int val = ch - '0';
+			    ch = *fmt;
+			    if (ch >= '0' && ch <= '7')
+			      {
+				val *= 8;
+				val += ch - '0';
+				ch = *++fmt;
+				if (ch >= '0' && ch <= '7' && val < 32)
+				  {
+				    val *= 8;
+				    val += ch - '0';
+				    ++fmt;
+				  }
+			      }
+			    ch = val;
+			  }
+			  break;
+
+			case 'n':
+			  ch = '\n';
+			  break;
+
+			case 't':
+			  ch = '\t';
+			  break;
+
+			default:
+			  retval = EINVAL;
+			  goto do_ret;
+			}
+		    }
+		  else if (ch == '\e' && *fmt == '[')
+		    {
+		      deferred_start = fmt - 1;
+		      do
+			++fmt;
+		      while (*fmt != 'm' && *fmt != '\0');
+
+		      if (*fmt == 'm')
+			{
+			  deferred_len = ++fmt - deferred_start;
+			  continue;
+			}
+
+		      fmt = deferred_start + 1;
+		      deferred_start = NULL;
+		    }
+		  ADD_CHAR (ch);
+		  continue;
+		}
+	      ++fmt;
+
+	      int width = 0;
+	      while (isdigit (*fmt))
+		width = width * 10 + (*fmt++ - '0');
+
+	      int prec = 0;
+	      if (*fmt == '.')
+		while (isdigit (*++fmt))
+		  prec = prec * 10 + (*fmt - '0');
+
+	      size_t start_idx = bufcnt;
+	      size_t non_printing = 0;
+	      switch (*fmt++)
+		{
+		  char mnebuf[16];
+		  const char *str;
+
+		case 'm':
+		  /* Mnemonic.  */
+
+		  if (unlikely (instrtab[cnt].mnemonic == MNE_INVALID))
+		    {
+		      switch (*data)
+			{
+#ifdef X86_64
+			case 0x90:
+			  if (prefixes & has_rex_b)
+			    goto not;
+			  str = "nop";
+			  break;
+#endif
+
+			case 0x98:
+#ifdef X86_64
+			  if (prefixes == (has_rex_w | has_rex))
+			    {
+			      str = "cltq";
+			      break;
+			    }
+#endif
+			  if (prefixes & ~has_data16)
+			    goto print_prefix;
+			  str = prefixes & has_data16 ? "cbtw" : "cwtl";
+			  break;
+
+			case 0x99:
+#ifdef X86_64
+			  if (prefixes == (has_rex_w | has_rex))
+			    {
+			      str = "cqto";
+			      break;
+			    }
+#endif
+			  if (prefixes & ~has_data16)
+			    goto print_prefix;
+			  str = prefixes & has_data16 ? "cwtd" : "cltd";
+			  break;
+
+			case 0xe3:
+			  if (prefixes & ~has_addr16)
+			    goto print_prefix;
+#ifdef X86_64
+			  str = prefixes & has_addr16 ? "jecxz" : "jrcxz";
+#else
+			  str = prefixes & has_addr16 ? "jcxz" : "jecxz";
+#endif
+			  break;
+
+			case 0x0f:
+			  if (data[1] == 0x0f)
+			    {
+			      /* AMD 3DNOW.  We need one more byte.  */
+			      if (param_start >= end)
+				goto not;
+			      if (*param_start < AMD3DNOW_LOW_IDX
+				  || *param_start > AMD3DNOW_HIGH_IDX)
+				goto not;
+			      unsigned int idx
+				= amd3dnow[AMD3DNOW_IDX (*param_start)];
+			      if (idx == 0)
+				goto not;
+			      str = amd3dnowstr + idx - 1;
+			      /* Eat the immediate byte indicating the
+				 operation.  */
+			      ++param_start;
+			      break;
+			    }
+#ifdef X86_64
+			  if (data[1] == 0xc7)
+			    {
+			      str = ((prefixes & has_rex_w)
+				     ? "cmpxchg16b" : "cmpxchg8b");
+			      break;
+			    }
+#endif
+			  if (data[1] == 0xc2)
+			    {
+			      if (param_start >= end)
+				goto not;
+			      if (*param_start > 7)
+				goto not;
+			      static const char cmpops[][9] =
+				{
+				  [0] = "cmpeq",
+				  [1] = "cmplt",
+				  [2] = "cmple",
+				  [3] = "cmpunord",
+				  [4] = "cmpneq",
+				  [5] = "cmpnlt",
+				  [6] = "cmpnle",
+				  [7] = "cmpord"
+				};
+			      char *cp = stpcpy (mnebuf, cmpops[*param_start]);
+			      if (correct_prefix & (has_rep | has_repne))
+				*cp++ = 's';
+			      else
+				*cp++ = 'p';
+			      if (correct_prefix & (has_data16 | has_repne))
+				*cp++ = 'd';
+			      else
+				*cp++ = 's';
+			      *cp = '\0';
+			      str = mnebuf;
+			      /* Eat the immediate byte indicating the
+				 operation.  */
+			      ++param_start;
+			      break;
+			    }
+			  FALLTHROUGH;
+			default:
+			  assert (! "INVALID not handled");
+			}
+		    }
+		  else
+		    str = mnestr.str + mneidx[instrtab[cnt].mnemonic];
+
+		  if (deferred_start != NULL)
+		    {
+		      ADD_NSTRING (deferred_start, deferred_len);
+		      non_printing += deferred_len;
+		    }
+
+		  ADD_STRING (str);
+
+		  switch (instrtab[cnt].suffix)
+		    {
+		    case suffix_none:
+		      break;
+
+		    case suffix_w:
+		      if ((codep[-1] & 0xc0) != 0xc0)
+			{
+			  char ch;
+
+			  if (data[0] & 1)
+			    {
+			      if (prefixes & has_data16)
+				ch = 'w';
+#ifdef X86_64
+			      else if (prefixes & has_rex_w)
+				ch = 'q';
+#endif
+			      else
+				ch = 'l';
+			    }
+			  else
+			    ch = 'b';
+
+			  ADD_CHAR (ch);
+			}
+		      break;
+
+		    case suffix_w0:
+		      if ((codep[-1] & 0xc0) != 0xc0)
+			ADD_CHAR ('l');
+		      break;
+
+		    case suffix_w1:
+		      if ((data[0] & 0x4) == 0)
+			ADD_CHAR ('l');
+		      break;
+
+		    case suffix_W:
+		      if (prefixes & has_data16)
+			{
+			  ADD_CHAR ('w');
+			  prefixes &= ~has_data16;
+			}
+#ifdef X86_64
+		      else
+			ADD_CHAR ('q');
+#endif
+		      break;
+
+		    case suffix_W1:
+		      if (prefixes & has_data16)
+			{
+			  ADD_CHAR ('w');
+			  prefixes &= ~has_data16;
+			}
+#ifdef X86_64
+		      else if (prefixes & has_rex_w)
+			ADD_CHAR ('q');
+#endif
+		      break;
+
+		    case suffix_tttn:;
+		      static const char tttn[16][3] =
+			{
+			  "o", "no", "b", "ae", "e", "ne", "be", "a",
+			  "s", "ns", "p", "np", "l", "ge", "le", "g"
+			};
+		      ADD_STRING (tttn[codep[-1 - instrtab[cnt].modrm] & 0x0f]);
+		      break;
+
+		    case suffix_D:
+		      if ((codep[-1] & 0xc0) != 0xc0)
+			ADD_CHAR ((data[0] & 0x04) == 0 ? 's' : 'l');
+		      break;
+
+		    default:
+		      printf("unknown suffix %d\n", instrtab[cnt].suffix);
+		      abort ();
+		    }
+
+		  if (deferred_start != NULL)
+		    {
+		      ADD_STRING (color_off);
+		      non_printing += strlen (color_off);
+		    }
+
+		  string_end_idx = bufcnt;
+		  break;
+
+		case 'o':
+		  if (prec == 1 && instrtab[cnt].fct1 != 0)
+		    {
+		      /* First parameter.  */
+		      if (deferred_start != NULL)
+			{
+			  ADD_NSTRING (deferred_start, deferred_len);
+			  non_printing += deferred_len;
+			}
+
+		      if (instrtab[cnt].str1 != 0)
+			ADD_STRING (op1_str
+				    + op1_str_idx[instrtab[cnt].str1 - 1]);
+
+		      output_data.opoff1 = (instrtab[cnt].off1_1
+					    + OFF1_1_BIAS - opoff);
+		      output_data.opoff2 = (instrtab[cnt].off1_2
+					    + OFF1_2_BIAS - opoff);
+		      output_data.opoff3 = (instrtab[cnt].off1_3
+					    + OFF1_3_BIAS - opoff);
+		      int r = op1_fct[instrtab[cnt].fct1] (&output_data);
+		      if (r < 0)
+			goto not;
+		      if (r > 0)
+			goto enomem;
+
+		      if (deferred_start != NULL)
+			{
+			  ADD_STRING (color_off);
+			  non_printing += strlen (color_off);
+			}
+
+		      string_end_idx = bufcnt;
+		    }
+		  else if (prec == 2 && instrtab[cnt].fct2 != 0)
+		    {
+		      /* Second parameter.  */
+		      if (deferred_start != NULL)
+			{
+			  ADD_NSTRING (deferred_start, deferred_len);
+			  non_printing += deferred_len;
+			}
+
+		      if (instrtab[cnt].str2 != 0)
+			ADD_STRING (op2_str
+				    + op2_str_idx[instrtab[cnt].str2 - 1]);
+
+		      output_data.opoff1 = (instrtab[cnt].off2_1
+					    + OFF2_1_BIAS - opoff);
+		      output_data.opoff2 = (instrtab[cnt].off2_2
+					    + OFF2_2_BIAS - opoff);
+		      output_data.opoff3 = (instrtab[cnt].off2_3
+					    + OFF2_3_BIAS - opoff);
+		      int r = op2_fct[instrtab[cnt].fct2] (&output_data);
+		      if (r < 0)
+			goto not;
+		      if (r > 0)
+			goto enomem;
+
+		      if (deferred_start != NULL)
+			{
+			  ADD_STRING (color_off);
+			  non_printing += strlen (color_off);
+			}
+
+		      string_end_idx = bufcnt;
+		    }
+		  else if (prec == 3 && instrtab[cnt].fct3 != 0)
+		    {
+		      /* Third parameter.  */
+		      if (deferred_start != NULL)
+			{
+			  ADD_NSTRING (deferred_start, deferred_len);
+			  non_printing += deferred_len;
+			}
+
+		      if (instrtab[cnt].str3 != 0)
+			ADD_STRING (op3_str
+				    + op3_str_idx[instrtab[cnt].str3 - 1]);
+
+		      output_data.opoff1 = (instrtab[cnt].off3_1
+					    + OFF3_1_BIAS - opoff);
+		      output_data.opoff2 = (instrtab[cnt].off3_2
+					    + OFF3_2_BIAS - opoff);
+#ifdef OFF3_3_BITS
+		      output_data.opoff3 = (instrtab[cnt].off3_3
+					    + OFF3_3_BIAS - opoff);
+#else
+		      output_data.opoff3 = 0;
+#endif
+		      int r = op3_fct[instrtab[cnt].fct3] (&output_data);
+		      if (r < 0)
+			goto not;
+		      if (r > 0)
+			goto enomem;
+
+		      if (deferred_start != NULL)
+			{
+			  ADD_STRING (color_off);
+			  non_printing += strlen (color_off);
+			}
+
+		      string_end_idx = bufcnt;
+		    }
+		  else
+		    bufcnt = string_end_idx;
+		  break;
+
+		case 'e':
+		  string_end_idx = bufcnt;
+		  break;
+
+		case 'a':
+		  /* Pad to requested column.  */
+		  while (bufcnt - non_printing < (size_t) width)
+		    ADD_CHAR (' ');
+		  width = 0;
+		  break;
+
+		case 'l':
+		  if (deferred_start != NULL)
+		    {
+		      ADD_NSTRING (deferred_start, deferred_len);
+		      non_printing += deferred_len;
+		    }
+
+		  if (output_data.labelbuf != NULL
+		      && output_data.labelbuf[0] != '\0')
+		    {
+		      ADD_STRING (output_data.labelbuf);
+		      output_data.labelbuf[0] = '\0';
+		      string_end_idx = bufcnt;
+		    }
+		  else if (output_data.symaddr_use != addr_none)
+		    {
+		      GElf_Addr symaddr = output_data.symaddr;
+		      if (output_data.symaddr_use >= addr_rel_symbolic)
+			symaddr += addr + param_start - begin;
+
+		      // XXX Lookup symbol based on symaddr
+		      const char *symstr = NULL;
+		      if (symcb != NULL
+			  && symcb (0 /* XXX */, 0 /* XXX */, symaddr,
+				    &output_data.labelbuf,
+				    &output_data.labelbufsize, symcbarg) == 0)
+			symstr = output_data.labelbuf;
+
+		      size_t bufavail = bufsize - bufcnt;
+		      int r = 0;
+		      if (symstr != NULL)
+			r = snprintf (&buf[bufcnt], bufavail, "# <%s>",
+				      symstr);
+		      else if (output_data.symaddr_use == addr_abs_always
+			       || output_data.symaddr_use == addr_rel_always)
+			r = snprintf (&buf[bufcnt], bufavail, "# %#" PRIx64,
+				      (uint64_t) symaddr);
+
+		      assert (r >= 0);
+		      if ((size_t) r >= bufavail)
+			goto enomem;
+		      bufcnt += r;
+		      string_end_idx = bufcnt;
+
+		      output_data.symaddr_use = addr_none;
+		    }
+		  if (deferred_start != NULL)
+		    {
+		      ADD_STRING (color_off);
+		      non_printing += strlen (color_off);
+		    }
+		  break;
+
+		default:
+		  abort ();
+		}
+
+	      deferred_start = NULL;
+
+	      /* Pad according to the specified width.  */
+	      while (bufcnt + prefix_size - non_printing < start_idx + width)
+		ADD_CHAR (' ');
+	      prefix_size = 0;
+	    }
+
+	  if ((prefixes & SEGMENT_PREFIXES) != 0)
+	    goto print_prefix;
+
+	  assert (string_end_idx != ~0ul);
+	  bufcnt = string_end_idx;
+
+	  addr += param_start - begin;
+	  data = param_start;
+
+	  goto out;
+	}
+
+      /* Invalid (or at least unhandled) opcode.  */
+      if (prefixes != 0)
+	goto print_prefix;
+      assert (*startp == data);
+      ++data;
+      ADD_STRING ("(bad)");
+      addr += data - begin;
+
+    out:
+      if (bufcnt == bufsize)
+	goto enomem;
+      buf[bufcnt] = '\0';
+
+      *startp = data;
+      retval = outcb (buf, bufcnt, outcbarg);
+      if (retval != 0)
+	goto do_ret;
+    }
+
+ do_ret:
+  free (output_data.labelbuf);
+  if (buf != initbuf)
+    free (buf);
+
+  return retval;
+}
diff --git a/libcpu/i386_gendis.c b/libcpu/i386_gendis.c
new file mode 100644
index 0000000..aae5eae
--- /dev/null
+++ b/libcpu/i386_gendis.c
@@ -0,0 +1,72 @@
+/* Generate tables for x86 disassembler.
+   Copyright (C) 2007, 2008 Red Hat, Inc.
+   This file is part of elfutils.
+   Written by Ulrich Drepper <drepper@redhat.com>, 2007.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <error.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+extern int i386_parse (void);
+
+
+extern FILE *i386_in;
+extern int i386_debug;
+char *infname;
+
+FILE *outfile;
+
+int
+main (int argc, char *argv[argc])
+{
+  outfile = stdout;
+
+  if (argc == 1)
+    error (EXIT_FAILURE, 0, "usage: %s <MNEDEFFILE>", argv[0]);
+
+  //i386_debug = 1;
+  infname = argv[1];
+  if (strcmp (infname, "-") == 0)
+    i386_in = stdin;
+  else
+    {
+      i386_in = fopen (infname, "r");
+      if (i386_in == NULL)
+	error (EXIT_FAILURE, errno, "cannot open %s", argv[1]);
+    }
+
+  i386_parse ();
+
+  return error_message_count != 0;
+}
diff --git a/libcpu/i386_lex.l b/libcpu/i386_lex.l
new file mode 100644
index 0000000..ef1b53b
--- /dev/null
+++ b/libcpu/i386_lex.l
@@ -0,0 +1,129 @@
+%{
+/* Copyright (C) 2004, 2005, 2007, 2008 Red Hat, Inc.
+   Written by Ulrich Drepper <drepper@redhat.com>, 2004.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <ctype.h>
+#include <error.h>
+#include <libintl.h>
+
+#include <libeu.h>
+#include "i386_parse.h"
+
+
+static void eat_to_eol (void);
+static void invalid_char (int ch);
+%}
+
+ID              [a-zA-Z_][a-zA-Z0-9_/]*
+ID2             [a-zA-Z0-9_:/]*
+NUMBER		[0-9]+
+WHITE		[[:space:]]+
+
+%option yylineno
+%option never-interactive
+%option noyywrap
+
+
+%x MAIN
+
+%%
+
+"%mask"				{ return kMASK; }
+
+"%prefix"			{ return kPREFIX; }
+"%suffix"			{ return kSUFFIX; }
+
+"%synonym"			{ return kSYNONYM; }
+
+{NUMBER}			{ i386_lval.num = strtoul (yytext, NULL, 10);
+				  return kNUMBER; }
+
+"%%"				{ BEGIN (MAIN); return kPERCPERC; }
+
+
+<MAIN>"0"			{ return '0'; }
+<MAIN>"1"			{ return '1'; }
+
+<INITIAL,MAIN>"{"{ID2}"}"	{ i386_lval.str = xstrndup (yytext + 1,
+							    yyleng - 2);
+				  return kBITFIELD; }
+
+<MAIN>"INVALID"			{ i386_lval.str = (void *) -1l;
+				  return kID; }
+
+<MAIN>{ID}			{ i386_lval.str = xstrndup (yytext, yyleng);
+				  return kID; }
+
+<MAIN>","			{ return ','; }
+
+<MAIN>":"			{ return ':'; }
+
+<INITIAL,MAIN>^"\n"		{ /* IGNORE */ }
+
+<INITIAL,MAIN>"\n"		{ return '\n'; }
+
+<INITIAL,MAIN>^"#"		{ eat_to_eol (); }
+
+{WHITE}				{ /* IGNORE */ }
+
+<MAIN>{WHITE}			{ return kSPACE; }
+
+<MAIN>.				{ i386_lval.ch = *yytext; return kCHAR; }
+
+.				{ invalid_char (*yytext); }
+
+
+%%
+
+static void
+eat_to_eol (void)
+{
+  while (1)
+    {
+      int c = input ();
+
+      if (c == EOF || c == '\n')
+	break;
+    }
+}
+
+static void
+invalid_char (int ch)
+{
+  error (0, 0, (isascii (ch)
+		? gettext ("invalid character '%c' at line %d; ignored")
+		: gettext ("invalid character '\\%o' at line %d; ignored")),
+	 ch, yylineno);
+}
+
+// Local Variables:
+// mode: C
+// End:
diff --git a/libcpu/i386_parse.y b/libcpu/i386_parse.y
new file mode 100644
index 0000000..5fc0682
--- /dev/null
+++ b/libcpu/i386_parse.y
@@ -0,0 +1,1687 @@
+%{
+/* Parser for i386 CPU description.
+   Copyright (C) 2004, 2005, 2007, 2008, 2009 Red Hat, Inc.
+   Written by Ulrich Drepper <drepper@redhat.com>, 2004.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <error.h>
+#include <inttypes.h>
+#include <libintl.h>
+#include <math.h>
+#include <obstack.h>
+#include <search.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libeu.h>
+#include <system.h>
+
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+
+/* The error handler.  */
+static void yyerror (const char *s);
+
+extern int yylex (void);
+extern int i386_lineno;
+extern char *infname;
+
+
+struct known_bitfield
+{
+  char *name;
+  unsigned long int bits;
+  int tmp;
+};
+
+
+struct bitvalue
+{
+  enum bittype { zeroone, field, failure } type;
+  union
+  {
+    unsigned int value;
+    struct known_bitfield *field;
+  };
+  struct bitvalue *next;
+};
+
+
+struct argname
+{
+  enum nametype { string, nfield } type;
+  union
+  {
+    char *str;
+    struct known_bitfield *field;
+  };
+  struct argname *next;
+};
+
+
+struct argument
+{
+  struct argname *name;
+  struct argument *next;
+};
+
+
+struct instruction
+{
+  /* The byte encoding.  */
+  struct bitvalue *bytes;
+
+  /* Prefix possible.  */
+  int repe;
+  int rep;
+
+  /* Mnemonic.  */
+  char *mnemonic;
+
+  /* Suffix.  */
+  enum { suffix_none = 0, suffix_w, suffix_w0, suffix_W, suffix_tttn,
+	 suffix_w1, suffix_W1, suffix_D } suffix;
+
+  /* Flag set if modr/m is used.  */
+  int modrm;
+
+  /* Operands.  */
+  struct operand
+  {
+    char *fct;
+    char *str;
+    int off1;
+    int off2;
+    int off3;
+  } operands[3];
+
+  struct instruction *next;
+};
+
+
+struct synonym
+{
+  char *from;
+  char *to;
+};
+
+
+struct suffix
+{
+  char *name;
+  int idx;
+};
+
+
+struct argstring
+{
+  char *str;
+  int idx;
+  int off;
+};
+
+
+static struct known_bitfield ax_reg =
+  {
+    .name = "ax", .bits = 0, .tmp = 0
+  };
+
+static struct known_bitfield dx_reg =
+  {
+    .name = "dx", .bits = 0, .tmp = 0
+  };
+
+static struct known_bitfield di_reg =
+  {
+    .name = "es_di", .bits = 0, .tmp = 0
+  };
+
+static struct known_bitfield si_reg =
+  {
+    .name = "ds_si", .bits = 0, .tmp = 0
+  };
+
+static struct known_bitfield bx_reg =
+  {
+    .name = "ds_bx", .bits = 0, .tmp = 0
+  };
+
+
+static int bitfield_compare (const void *p1, const void *p2);
+static void new_bitfield (char *name, unsigned long int num);
+static void check_bits (struct bitvalue *value);
+static int check_duplicates (struct bitvalue *val);
+static int check_argsdef (struct bitvalue *bitval, struct argument *args);
+static int check_bitsused (struct bitvalue *bitval,
+			   struct known_bitfield *suffix,
+			   struct argument *args);
+static struct argname *combine (struct argname *name);
+static void fillin_arg (struct bitvalue *bytes, struct argname *name,
+			struct instruction *instr, int n);
+static void find_numbers (void);
+static int compare_syn (const void *p1, const void *p2);
+static int compare_suf (const void *p1, const void *p2);
+static void instrtable_out (void);
+#if 0
+static void create_mnemonic_table (void);
+#endif
+
+static void *bitfields;
+static struct instruction *instructions;
+static size_t ninstructions;
+static void *synonyms;
+static void *suffixes;
+static int nsuffixes;
+static void *mnemonics;
+size_t nmnemonics;
+extern FILE *outfile;
+
+/* Number of bits used mnemonics.  */
+#if 0
+static size_t best_mnemonic_bits;
+#endif
+%}
+
+%union {
+  unsigned long int num;
+  char *str;
+  char ch;
+  struct known_bitfield *field;
+  struct bitvalue *bit;
+  struct argname *name;
+  struct argument *arg;
+}
+
+%token kMASK
+%token kPREFIX
+%token kSUFFIX
+%token kSYNONYM
+%token <str> kID
+%token <num> kNUMBER
+%token kPERCPERC
+%token <str> kBITFIELD
+%token <ch> kCHAR
+%token kSPACE
+
+%type <bit> bit byte bytes
+%type <field> bitfieldopt
+%type <name> argcomp arg
+%type <arg> args optargs
+
+%defines
+
+%%
+
+spec:		  masks kPERCPERC '\n' instrs
+		    {
+		      if (error_message_count != 0)
+			error (EXIT_FAILURE, 0,
+			       "terminated due to previous error");
+
+		      instrtable_out ();
+		    }
+		;
+
+masks:		  masks '\n' mask
+		| mask
+		;
+
+mask:		  kMASK kBITFIELD kNUMBER
+		    { new_bitfield ($2, $3); }
+		| kPREFIX kBITFIELD
+		    { new_bitfield ($2, -1); }
+		| kSUFFIX kBITFIELD
+		    { new_bitfield ($2, -2); }
+		| kSYNONYM kBITFIELD kBITFIELD
+		    {
+		      struct synonym *newp = xmalloc (sizeof (*newp));
+		      newp->from = $2;
+		      newp->to = $3;
+		      if (tfind (newp, &synonyms, compare_syn) != NULL)
+			error (0, 0,
+			       "%d: duplicate definition for synonym '%s'",
+			       i386_lineno, $2);
+		      else if (tsearch ( newp, &synonyms, compare_syn) == NULL)
+			error (EXIT_FAILURE, 0, "tsearch");
+		    }
+		|
+		;
+
+instrs:		  instrs '\n' instr
+		| instr
+		;
+
+instr:		  bytes ':' bitfieldopt kID bitfieldopt optargs
+		    {
+		      if ($3 != NULL && strcmp ($3->name, "RE") != 0
+			  && strcmp ($3->name, "R") != 0)
+			{
+			  error (0, 0, "%d: only 'R' and 'RE' prefix allowed",
+				 i386_lineno - 1);
+			}
+		      if (check_duplicates ($1) == 0
+			  && check_argsdef ($1, $6) == 0
+			  && check_bitsused ($1, $5, $6) == 0)
+			{
+			  struct instruction *newp = xcalloc (sizeof (*newp),
+							      1);
+			  if ($3 != NULL)
+			    {
+			      if (strcmp ($3->name, "RE") == 0)
+				newp->repe = 1;
+			      else if (strcmp ($3->name, "R") == 0)
+				newp->rep = 1;
+			    }
+
+			  newp->bytes = $1;
+			  newp->mnemonic = $4;
+			  if (newp->mnemonic != (void *) -1l
+			      && tfind ($4, &mnemonics,
+					(int (*)(const void *, const void *)) strcmp) == NULL)
+			    {
+			      if (tsearch ($4, &mnemonics,
+					   (int (*)(const void *, const void *)) strcmp) == NULL)
+				error (EXIT_FAILURE, errno, "tsearch");
+			      ++nmnemonics;
+			    }
+
+			  if ($5 != NULL)
+			    {
+			      if (strcmp ($5->name, "w") == 0)
+				newp->suffix = suffix_w;
+			      else if (strcmp ($5->name, "w0") == 0)
+				newp->suffix = suffix_w0;
+			      else if (strcmp ($5->name, "tttn") == 0)
+				newp->suffix = suffix_tttn;
+			      else if (strcmp ($5->name, "w1") == 0)
+				newp->suffix = suffix_w1;
+			      else if (strcmp ($5->name, "W") == 0)
+				newp->suffix = suffix_W;
+			      else if (strcmp ($5->name, "W1") == 0)
+				newp->suffix = suffix_W1;
+			      else if (strcmp ($5->name, "D") == 0)
+				newp->suffix = suffix_D;
+			      else
+				error (EXIT_FAILURE, 0,
+				       "%s: %d: unknown suffix '%s'",
+				       infname, i386_lineno - 1, $5->name);
+
+			      struct suffix search = { .name = $5->name };
+			      if (tfind (&search, &suffixes, compare_suf)
+				  == NULL)
+				{
+				  struct suffix *ns = xmalloc (sizeof (*ns));
+				  ns->name = $5->name;
+				  ns->idx = ++nsuffixes;
+				  if (tsearch (ns, &suffixes, compare_suf)
+				      == NULL)
+				    error (EXIT_FAILURE, errno, "tsearch");
+				}
+			    }
+
+			  struct argument *args = $6;
+			  int n = 0;
+			  while (args != NULL)
+			    {
+			      fillin_arg ($1, args->name, newp, n);
+
+			      args = args->next;
+			      ++n;
+			    }
+
+			  newp->next = instructions;
+			  instructions = newp;
+			  ++ninstructions;
+			}
+		    }
+		|
+		;
+
+bitfieldopt:	  kBITFIELD
+		    {
+		      struct known_bitfield search;
+		      search.name = $1;
+		      struct known_bitfield **res;
+		      res = tfind (&search, &bitfields, bitfield_compare);
+		      if (res == NULL)
+			{
+			  error (0, 0, "%d: unknown bitfield '%s'",
+				 i386_lineno, search.name);
+			  $$ = NULL;
+			}
+		      else
+			$$ = *res;
+		    }
+		|
+		    { $$ = NULL; }
+		;
+
+bytes:		  bytes ',' byte
+		    {
+		      check_bits ($3);
+
+		      struct bitvalue *runp = $1;
+		      while (runp->next != NULL)
+			runp = runp->next;
+		      runp->next = $3;
+		      $$ = $1;
+		    }
+		| byte
+		    {
+		      check_bits ($1);
+		      $$ = $1;
+		    }
+		;
+
+byte:		  byte bit
+		    {
+		      struct bitvalue *runp = $1;
+		      while (runp->next != NULL)
+			runp = runp->next;
+		      runp->next = $2;
+		      $$ = $1;
+		    }
+		| bit
+		    { $$ = $1; }
+		;
+
+bit:		  '0'
+		    {
+		      $$ = xmalloc (sizeof (struct bitvalue));
+		      $$->type = zeroone;
+		      $$->value = 0;
+		      $$->next = NULL;
+		    }
+		| '1'
+		    {
+		      $$ = xmalloc (sizeof (struct bitvalue));
+		      $$->type = zeroone;
+		      $$->value = 1;
+		      $$->next = NULL;
+		    }
+		| kBITFIELD
+		    {
+		      $$ = xmalloc (sizeof (struct bitvalue));
+		      struct known_bitfield search;
+		      search.name = $1;
+		      struct known_bitfield **res;
+		      res = tfind (&search, &bitfields, bitfield_compare);
+		      if (res == NULL)
+			{
+			  error (0, 0, "%d: unknown bitfield '%s'",
+				 i386_lineno, search.name);
+			  $$->type = failure;
+			}
+		      else
+			{
+			  $$->type = field;
+			  $$->field = *res;
+			}
+		      $$->next = NULL;
+		    }
+		;
+
+optargs:	  kSPACE args
+		    { $$ = $2; }
+		|
+		    { $$ = NULL; }
+		;
+
+args:		  args ',' arg
+		    {
+		      struct argument *runp = $1;
+		      while (runp->next != NULL)
+			runp = runp->next;
+		      runp->next = xmalloc (sizeof (struct argument));
+		      runp->next->name = combine ($3);
+		      runp->next->next = NULL;
+		      $$ = $1;
+		    }
+		| arg
+		    {
+		      $$ = xmalloc (sizeof (struct argument));
+		      $$->name = combine ($1);
+		      $$->next = NULL;
+		    }
+		;
+
+arg:		  arg argcomp
+		    {
+		      struct argname *runp = $1;
+		      while (runp->next != NULL)
+			runp = runp->next;
+		      runp->next = $2;
+		      $$ = $1;
+		    }
+		| argcomp
+		    { $$ = $1; }
+		;
+argcomp:	  kBITFIELD
+		    {
+		      $$ = xmalloc (sizeof (struct argname));
+		      $$->type = nfield;
+		      $$->next = NULL;
+
+		      struct known_bitfield search;
+		      search.name = $1;
+		      struct known_bitfield **res;
+		      res = tfind (&search, &bitfields, bitfield_compare);
+		      if (res == NULL)
+			{
+			  if (strcmp ($1, "ax") == 0)
+			    $$->field = &ax_reg;
+			  else if (strcmp ($1, "dx") == 0)
+			    $$->field = &dx_reg;
+			  else if (strcmp ($1, "es_di") == 0)
+			    $$->field = &di_reg;
+			  else if (strcmp ($1, "ds_si") == 0)
+			    $$->field = &si_reg;
+			  else if (strcmp ($1, "ds_bx") == 0)
+			    $$->field = &bx_reg;
+			  else
+			    {
+			      error (0, 0, "%d: unknown bitfield '%s'",
+				     i386_lineno, search.name);
+			      $$->field = NULL;
+			    }
+			}
+		      else
+			$$->field = *res;
+		    }
+		| kCHAR
+		    {
+		      $$ = xmalloc (sizeof (struct argname));
+		      $$->type = string;
+		      $$->next = NULL;
+		      $$->str = xmalloc (2);
+		      $$->str[0] = $1;
+		      $$->str[1] = '\0';
+		    }
+		| kID
+		    {
+		      $$ = xmalloc (sizeof (struct argname));
+		      $$->type = string;
+		      $$->next = NULL;
+		      $$->str = $1;
+		    }
+		| ':'
+		    {
+		      $$ = xmalloc (sizeof (struct argname));
+		      $$->type = string;
+		      $$->next = NULL;
+		      $$->str = xmalloc (2);
+		      $$->str[0] = ':';
+		      $$->str[1] = '\0';
+		    }
+		;
+
+%%
+
+static void
+yyerror (const char *s)
+{
+  error (0, 0, gettext ("while reading i386 CPU description: %s at line %d"),
+         gettext (s), i386_lineno);
+}
+
+
+static int
+bitfield_compare (const void *p1, const void *p2)
+{
+  struct known_bitfield *f1 = (struct known_bitfield *) p1;
+  struct known_bitfield *f2 = (struct known_bitfield *) p2;
+
+  return strcmp (f1->name, f2->name);
+}
+
+
+static void
+new_bitfield (char *name, unsigned long int num)
+{
+  struct known_bitfield *newp = xmalloc (sizeof (struct known_bitfield));
+  newp->name = name;
+  newp->bits = num;
+  newp->tmp = 0;
+
+  if (tfind (newp, &bitfields, bitfield_compare) != NULL)
+    {
+      error (0, 0, "%d: duplicated definition of bitfield '%s'",
+	     i386_lineno, name);
+      free (name);
+      return;
+    }
+
+  if (tsearch (newp, &bitfields, bitfield_compare) == NULL)
+    error (EXIT_FAILURE, errno, "%d: cannot insert new bitfield '%s'",
+	   i386_lineno, name);
+}
+
+
+/* Check that the number of bits is a multiple of 8.  */
+static void
+check_bits (struct bitvalue *val)
+{
+  struct bitvalue *runp = val;
+  unsigned int total = 0;
+
+  while (runp != NULL)
+    {
+      if (runp->type == zeroone)
+	++total;
+      else if (runp->field == NULL)
+	/* No sense doing anything, the field is not known.  */
+	return;
+      else
+	total += runp->field->bits;
+
+      runp = runp->next;
+    }
+
+  if (total % 8 != 0)
+    {
+      struct obstack os;
+      obstack_init (&os);
+
+      while (val != NULL)
+	{
+	  if (val->type == zeroone)
+	    obstack_printf (&os, "%u", val->value);
+	  else
+	    obstack_printf (&os, "{%s}", val->field->name);
+	  val = val->next;
+	}
+      obstack_1grow (&os, '\0');
+
+      error (0, 0, "%d: field '%s' not a multiple of 8 bits in size",
+	     i386_lineno, (char *) obstack_finish (&os));
+
+      obstack_free (&os, NULL);
+    }
+}
+
+
+static int
+check_duplicates (struct bitvalue *val)
+{
+  static int testcnt;
+  ++testcnt;
+
+  int result = 0;
+  while (val != NULL)
+    {
+      if (val->type == field && val->field != NULL)
+	{
+	  if (val->field->tmp == testcnt)
+	    {
+	      error (0, 0, "%d: bitfield '%s' used more than once",
+		     i386_lineno - 1, val->field->name);
+	      result = 1;
+	    }
+	  val->field->tmp = testcnt;
+	}
+
+      val = val->next;
+    }
+
+  return result;
+}
+
+
+static int
+check_argsdef (struct bitvalue *bitval, struct argument *args)
+{
+  int result = 0;
+
+  while (args != NULL)
+    {
+      for (struct argname *name = args->name; name != NULL; name = name->next)
+	if (name->type == nfield && name->field != NULL
+	    && name->field != &ax_reg && name->field != &dx_reg
+	    && name->field != &di_reg && name->field != &si_reg
+	    && name->field != &bx_reg)
+	  {
+	    struct bitvalue *runp = bitval;
+
+	    while (runp != NULL)
+	      if (runp->type == field && runp->field == name->field)
+		break;
+	      else
+		runp = runp->next;
+
+	    if (runp == NULL)
+	      {
+		error (0, 0, "%d: unknown bitfield '%s' used in output format",
+		       i386_lineno - 1, name->field->name);
+		result = 1;
+	      }
+	  }
+
+      args = args->next;
+    }
+
+  return result;
+}
+
+
+static int
+check_bitsused (struct bitvalue *bitval, struct known_bitfield *suffix,
+		struct argument *args)
+{
+  int result = 0;
+
+  while (bitval != NULL)
+    {
+      if (bitval->type == field && bitval->field != NULL
+	  && bitval->field != suffix
+	  /* {w} is handled special.  */
+	  && strcmp (bitval->field->name, "w") != 0)
+	{
+	  struct argument *runp;
+	  for (runp = args; runp != NULL; runp = runp->next)
+	    {
+	      struct argname *name = runp->name;
+
+	      while (name != NULL)
+		if (name->type == nfield && name->field == bitval->field)
+		  break;
+		else
+		  name = name->next;
+
+	      if (name != NULL)
+		break;
+	    }
+
+#if 0
+	  if (runp == NULL)
+	    {
+	      error (0, 0, "%d: bitfield '%s' not used",
+		     i386_lineno - 1, bitval->field->name);
+	      result = 1;
+	    }
+#endif
+	}
+
+      bitval = bitval->next;
+    }
+
+  return result;
+}
+
+
+static struct argname *
+combine (struct argname *name)
+{
+  struct argname *last_str = NULL;
+  for (struct argname *runp = name; runp != NULL; runp = runp->next)
+    {
+      if (runp->type == string)
+	{
+	  if (last_str == NULL)
+	    last_str = runp;
+	  else
+	    {
+	      last_str->str = xrealloc (last_str->str,
+					strlen (last_str->str)
+					+ strlen (runp->str) + 1);
+	      strcat (last_str->str, runp->str);
+	      last_str->next = runp->next;
+	    }
+	}
+      else
+	last_str = NULL;
+    }
+  return name;
+}
+
+
+#define obstack_grow_str(ob, str) obstack_grow (ob, str, strlen (str))
+
+
+static void
+fillin_arg (struct bitvalue *bytes, struct argname *name,
+	    struct instruction *instr, int n)
+{
+  static struct obstack ob;
+  static int initialized;
+  if (! initialized)
+    {
+      initialized = 1;
+      obstack_init (&ob);
+    }
+
+  struct argname *runp = name;
+  int cnt = 0;
+  while (runp != NULL)
+    {
+      /* We ignore strings in the function name.  */
+      if (runp->type == string)
+	{
+	  if (instr->operands[n].str != NULL)
+	    error (EXIT_FAILURE, 0,
+		   "%d: cannot have more than one string parameter",
+		   i386_lineno - 1);
+
+	  instr->operands[n].str = runp->str;
+	}
+      else
+	{
+	  assert (runp->type == nfield);
+
+	  /* Construct the function name.  */
+	  if (cnt++ > 0)
+	    obstack_1grow (&ob, '$');
+
+	  if (runp->field == NULL)
+	    /* Add some string which contains invalid characters.  */
+	    obstack_grow_str (&ob, "!!!INVALID!!!");
+	  else
+	    {
+	      char *fieldname = runp->field->name;
+
+	      struct synonym search = { .from = fieldname };
+
+	      struct synonym **res = tfind (&search, &synonyms, compare_syn);
+	      if (res != NULL)
+		fieldname = (*res)->to;
+
+	      obstack_grow_str (&ob, fieldname);
+	    }
+
+	  /* Now compute the bit offset of the field.  */
+	  struct bitvalue *b = bytes;
+	  int bitoff = 0;
+	  if (runp->field != NULL)
+	    while (b != NULL)
+	      {
+		if (b->type == field && b->field != NULL)
+		  {
+		    if (strcmp (b->field->name, runp->field->name) == 0)
+		      break;
+		    bitoff += b->field->bits;
+		  }
+		else
+		  ++bitoff;
+
+		b = b->next;
+	      }
+	  if (instr->operands[n].off1 == 0)
+	    instr->operands[n].off1 = bitoff;
+	  else if (instr->operands[n].off2 == 0)
+	    instr->operands[n].off2 = bitoff;
+	  else if (instr->operands[n].off3 == 0)
+	    instr->operands[n].off3 = bitoff;
+	  else
+	    error (EXIT_FAILURE, 0,
+		   "%d: cannot have more than three fields in parameter",
+		   i386_lineno - 1);
+
+	  if  (runp->field != NULL
+	       && strncasecmp (runp->field->name, "mod", 3) == 0)
+	    instr->modrm = 1;
+	}
+
+      runp = runp->next;
+    }
+  if (obstack_object_size (&ob) == 0)
+    obstack_grow_str (&ob, "string");
+  obstack_1grow (&ob, '\0');
+  char *fct = obstack_finish (&ob);
+
+  instr->operands[n].fct = fct;
+}
+
+
+#if 0
+static void
+nameout (const void *nodep, VISIT value, int level)
+{
+  if (value == leaf || value == postorder)
+    printf ("  %s\n", *(const char **) nodep);
+}
+#endif
+
+
+static int
+compare_argstring (const void *p1, const void *p2)
+{
+  const struct argstring *a1 = (const struct argstring *) p1;
+  const struct argstring *a2 = (const struct argstring *) p2;
+
+  return strcmp (a1->str, a2->str);
+}
+
+
+static int maxoff[3][3];
+static int minoff[3][3] = { { 1000, 1000, 1000 },
+			    { 1000, 1000, 1000 },
+			    { 1000, 1000, 1000 } };
+static int nbitoff[3][3];
+static void *fct_names[3];
+static int nbitfct[3];
+static int nbitsuf;
+static void *strs[3];
+static int nbitstr[3];
+static int total_bits = 2;	// Already counted the rep/repe bits.
+
+static void
+find_numbers (void)
+{
+  int nfct_names[3] = { 0, 0, 0 };
+  int nstrs[3] = { 0, 0, 0 };
+
+  /* We reverse the order of the instruction list while processing it.
+     Later phases need it in the order in which the input file has
+     them.  */
+  struct instruction *reversed = NULL;
+
+  struct instruction *runp = instructions;
+  while (runp != NULL)
+    {
+      for (int i = 0; i < 3; ++i)
+	if (runp->operands[i].fct != NULL)
+	  {
+	    struct argstring search = { .str = runp->operands[i].fct };
+	    if (tfind (&search, &fct_names[i], compare_argstring) == NULL)
+	      {
+		struct argstring *newp = xmalloc (sizeof (*newp));
+		newp->str = runp->operands[i].fct;
+		newp->idx = 0;
+		if (tsearch (newp, &fct_names[i], compare_argstring) == NULL)
+		  error (EXIT_FAILURE, errno, "tsearch");
+		++nfct_names[i];
+	      }
+
+	    if (runp->operands[i].str != NULL)
+	      {
+		search.str = runp->operands[i].str;
+		if (tfind (&search, &strs[i], compare_argstring) == NULL)
+		  {
+		    struct argstring *newp = xmalloc (sizeof (*newp));
+		    newp->str = runp->operands[i].str;
+		    newp->idx = 0;
+		    if (tsearch (newp, &strs[i], compare_argstring) == NULL)
+		      error (EXIT_FAILURE, errno, "tsearch");
+		    ++nstrs[i];
+		  }
+	      }
+
+	    maxoff[i][0] = MAX (maxoff[i][0], runp->operands[i].off1);
+	    maxoff[i][1] = MAX (maxoff[i][1], runp->operands[i].off2);
+	    maxoff[i][2] = MAX (maxoff[i][2], runp->operands[i].off3);
+
+	    if (runp->operands[i].off1 > 0)
+	      minoff[i][0] = MIN (minoff[i][0], runp->operands[i].off1);
+	    if (runp->operands[i].off2 > 0)
+	      minoff[i][1] = MIN (minoff[i][1], runp->operands[i].off2);
+	    if (runp->operands[i].off3 > 0)
+	      minoff[i][2] = MIN (minoff[i][2], runp->operands[i].off3);
+	  }
+
+      struct instruction *old = runp;
+      runp = runp->next;
+
+      old->next = reversed;
+      reversed = old;
+    }
+  instructions = reversed;
+
+  int d;
+  int c;
+  for (int i = 0; i < 3; ++i)
+    {
+      // printf ("min1 = %d, min2 = %d, min3 = %d\n", minoff[i][0], minoff[i][1], minoff[i][2]);
+      // printf ("max1 = %d, max2 = %d, max3 = %d\n", maxoff[i][0], maxoff[i][1], maxoff[i][2]);
+
+      if (minoff[i][0] == 1000)
+	nbitoff[i][0] = 0;
+      else
+	{
+	  nbitoff[i][0] = 1;
+	  d = maxoff[i][0] - minoff[i][0];
+	  c = 1;
+	  while (c < d)
+	    {
+	      ++nbitoff[i][0];
+	      c *= 2;
+	    }
+	  total_bits += nbitoff[i][0];
+	}
+
+      if (minoff[i][1] == 1000)
+	nbitoff[i][1] = 0;
+      else
+	{
+	  nbitoff[i][1] = 1;
+	  d = maxoff[i][1] - minoff[i][1];
+	  c = 1;
+	  while (c < d)
+	    {
+	      ++nbitoff[i][1];
+	      c *= 2;
+	    }
+	  total_bits += nbitoff[i][1];
+	}
+
+      if (minoff[i][2] == 1000)
+	nbitoff[i][2] = 0;
+      else
+	{
+	  nbitoff[i][2] = 1;
+	  d = maxoff[i][2] - minoff[i][2];
+	  c = 1;
+	  while (c < d)
+	    {
+	      ++nbitoff[i][2];
+	      c *= 2;
+	    }
+	  total_bits += nbitoff[i][2];
+	}
+      // printf ("off1 = %d, off2 = %d, off3 = %d\n", nbitoff[i][0], nbitoff[i][1], nbitoff[i][2]);
+
+      nbitfct[i] = 1;
+      d = nfct_names[i];
+      c = 1;
+      while (c < d)
+	{
+	  ++nbitfct[i];
+	  c *= 2;
+	}
+      total_bits += nbitfct[i];
+      // printf ("%d fct[%d], %d bits\n", nfct_names[i], i, nbitfct[i]);
+
+      if (nstrs[i] != 0)
+	{
+	  nbitstr[i] = 1;
+	  d = nstrs[i];
+	  c = 1;
+	  while (c < d)
+	    {
+	      ++nbitstr[i];
+	      c *= 2;
+	    }
+	  total_bits += nbitstr[i];
+	}
+
+      // twalk (fct_names[i], nameout);
+    }
+
+  nbitsuf = 0;
+  d = nsuffixes;
+  c = 1;
+  while (c < d)
+    {
+      ++nbitsuf;
+      c *= 2;
+    }
+  total_bits += nbitsuf;
+  // printf ("%d suffixes, %d bits\n", nsuffixes, nbitsuf);
+}
+
+
+static int
+compare_syn (const void *p1, const void *p2)
+{
+  const struct synonym *s1 = (const struct synonym *) p1;
+  const struct synonym *s2 = (const struct synonym *) p2;
+
+  return strcmp (s1->from, s2->from);
+}
+
+
+static int
+compare_suf (const void *p1, const void *p2)
+{
+  const struct suffix *s1 = (const struct suffix *) p1;
+  const struct suffix *s2 = (const struct suffix *) p2;
+
+  return strcmp (s1->name, s2->name);
+}
+
+
+static int count_op_str;
+static int off_op_str;
+static void
+print_op_str (const void *nodep, VISIT value,
+	      int level __attribute__ ((unused)))
+{
+  if (value == leaf || value == postorder)
+    {
+      const char *str = (*(struct argstring **) nodep)->str;
+      fprintf (outfile, "%s\n  \"%s",
+	       count_op_str == 0 ? "" : "\\0\"", str);
+      (*(struct argstring **) nodep)->idx = ++count_op_str;
+      (*(struct argstring **) nodep)->off = off_op_str;
+      off_op_str += strlen (str) + 1;
+    }
+}
+
+
+static void
+print_op_str_idx (const void *nodep, VISIT value,
+		  int level __attribute__ ((unused)))
+{
+  if (value == leaf || value == postorder)
+    printf ("  %d,\n", (*(struct argstring **) nodep)->off);
+}
+
+
+static void
+print_op_fct (const void *nodep, VISIT value,
+	      int level __attribute__ ((unused)))
+{
+  if (value == leaf || value == postorder)
+    {
+      fprintf (outfile, "  FCT_%s,\n", (*(struct argstring **) nodep)->str);
+      (*(struct argstring **) nodep)->idx = ++count_op_str;
+    }
+}
+
+
+#if NMNES < 2
+# error "bogus NMNES value"
+#endif
+
+static void
+instrtable_out (void)
+{
+  find_numbers ();
+
+#if 0
+  create_mnemonic_table ();
+
+  fprintf (outfile, "#define MNEMONIC_BITS %zu\n", best_mnemonic_bits);
+#else
+  fprintf (outfile, "#define MNEMONIC_BITS %ld\n",
+	   lrint (ceil (log2 (NMNES))));
+#endif
+  fprintf (outfile, "#define SUFFIX_BITS %d\n", nbitsuf);
+  for (int i = 0; i < 3; ++i)
+    {
+      fprintf (outfile, "#define FCT%d_BITS %d\n", i + 1, nbitfct[i]);
+      if (nbitstr[i] != 0)
+	fprintf (outfile, "#define STR%d_BITS %d\n", i + 1, nbitstr[i]);
+      fprintf (outfile, "#define OFF%d_1_BITS %d\n", i + 1, nbitoff[i][0]);
+      fprintf (outfile, "#define OFF%d_1_BIAS %d\n", i + 1, minoff[i][0]);
+      if (nbitoff[i][1] != 0)
+	{
+	  fprintf (outfile, "#define OFF%d_2_BITS %d\n", i + 1, nbitoff[i][1]);
+	  fprintf (outfile, "#define OFF%d_2_BIAS %d\n", i + 1, minoff[i][1]);
+	}
+      if (nbitoff[i][2] != 0)
+	{
+	  fprintf (outfile, "#define OFF%d_3_BITS %d\n", i + 1, nbitoff[i][2]);
+	  fprintf (outfile, "#define OFF%d_3_BIAS %d\n", i + 1, minoff[i][2]);
+	}
+    }
+
+  fputs ("\n#include <i386_data.h>\n\n", outfile);
+
+
+#define APPEND(a, b) APPEND_ (a, b)
+#define APPEND_(a, b) a##b
+#define EMIT_SUFFIX(suf) \
+  fprintf (outfile, "#define suffix_%s %d\n", #suf, APPEND (suffix_, suf))
+  EMIT_SUFFIX (none);
+  EMIT_SUFFIX (w);
+  EMIT_SUFFIX (w0);
+  EMIT_SUFFIX (W);
+  EMIT_SUFFIX (tttn);
+  EMIT_SUFFIX (D);
+  EMIT_SUFFIX (w1);
+  EMIT_SUFFIX (W1);
+
+  fputc_unlocked ('\n', outfile);
+
+  for (int i = 0; i < 3; ++i)
+    {
+      /* Functions.  */
+      count_op_str = 0;
+      fprintf (outfile, "static const opfct_t op%d_fct[] =\n{\n  NULL,\n",
+	       i + 1);
+      twalk (fct_names[i], print_op_fct);
+      fputs ("};\n", outfile);
+
+      /* The operand strings.  */
+      if (nbitstr[i] != 0)
+	{
+	  count_op_str = 0;
+	  off_op_str = 0;
+	  fprintf (outfile, "static const char op%d_str[] =", i + 1);
+	  twalk (strs[i], print_op_str);
+	  fputs ("\";\n", outfile);
+
+	  fprintf (outfile, "static const uint8_t op%d_str_idx[] = {\n",
+		   i + 1);
+	  twalk (strs[i], print_op_str_idx);
+	  fputs ("};\n", outfile);
+	}
+    }
+
+
+  fputs ("static const struct instr_enc instrtab[] =\n{\n", outfile);
+  struct instruction *instr;
+  for (instr = instructions; instr != NULL; instr = instr->next)
+    {
+      fputs ("  {", outfile);
+      if (instr->mnemonic == (void *) -1l)
+	fputs (" .mnemonic = MNE_INVALID,", outfile);
+      else
+	fprintf (outfile, " .mnemonic = MNE_%s,", instr->mnemonic);
+      fprintf (outfile, " .rep = %d,", instr->rep);
+      fprintf (outfile, " .repe = %d,", instr->repe);
+      fprintf (outfile, " .suffix = %d,", instr->suffix);
+      fprintf (outfile, " .modrm = %d,", instr->modrm);
+
+      for (int i = 0; i < 3; ++i)
+	{
+	  int idx = 0;
+	  if (instr->operands[i].fct != NULL)
+	    {
+	      struct argstring search = { .str = instr->operands[i].fct };
+	      struct argstring **res = tfind (&search, &fct_names[i],
+					      compare_argstring);
+	      assert (res != NULL);
+	      idx = (*res)->idx;
+	    }
+	  fprintf (outfile, " .fct%d = %d,", i + 1, idx);
+
+	  idx = 0;
+	  if (instr->operands[i].str != NULL)
+	    {
+	      struct argstring search = { .str = instr->operands[i].str };
+	      struct argstring **res = tfind (&search, &strs[i],
+					      compare_argstring);
+	      assert (res != NULL);
+	      idx = (*res)->idx;
+	    }
+	  if (nbitstr[i] != 0)
+	    fprintf (outfile, " .str%d = %d,", i + 1, idx);
+
+	  fprintf (outfile, " .off%d_1 = %d,", i + 1,
+		   MAX (0, instr->operands[i].off1 - minoff[i][0]));
+
+	  if (nbitoff[i][1] != 0)
+	    fprintf (outfile, " .off%d_2 = %d,", i + 1,
+		     MAX (0, instr->operands[i].off2 - minoff[i][1]));
+
+	  if (nbitoff[i][2] != 0)
+	    fprintf (outfile, " .off%d_3 = %d,", i + 1,
+		     MAX (0, instr->operands[i].off3 - minoff[i][2]));
+	}
+
+      fputs (" },\n", outfile);
+    }
+  fputs ("};\n", outfile);
+
+  fputs ("static const uint8_t match_data[] =\n{\n", outfile);
+  size_t cnt = 0;
+  for (instr = instructions; instr != NULL; instr = instr->next, ++cnt)
+    {
+      /* First count the number of bytes.  */
+      size_t totalbits = 0;
+      size_t zerobits = 0;
+      bool leading_p = true;
+      size_t leadingbits = 0;
+      struct bitvalue *b = instr->bytes;
+      while (b != NULL)
+	{
+	  if (b->type == zeroone)
+	    {
+	      ++totalbits;
+	      zerobits = 0;
+	      if (leading_p)
+		++leadingbits;
+	    }
+	  else
+	    {
+	      totalbits += b->field->bits;
+	      /* We must always count the mod/rm byte.  */
+	      if (strncasecmp (b->field->name, "mod", 3) == 0)
+		zerobits = 0;
+	      else
+		zerobits += b->field->bits;
+	      leading_p = false;
+	    }
+	  b = b->next;
+	}
+      size_t nbytes = (totalbits - zerobits + 7) / 8;
+      assert (nbytes > 0);
+      size_t leadingbytes = leadingbits / 8;
+
+      fprintf (outfile, "  %#zx,", nbytes | (leadingbytes << 4));
+
+      /* Now create the mask and byte values.  */
+      uint8_t byte = 0;
+      uint8_t mask = 0;
+      int nbits = 0;
+      b = instr->bytes;
+      while (b != NULL)
+	{
+	  if (b->type == zeroone)
+	    {
+	      byte = (byte << 1) | b->value;
+	      mask = (mask << 1) | 1;
+	      if (++nbits == 8)
+		{
+		  if (leadingbytes > 0)
+		    {
+		      assert (mask == 0xff);
+		      fprintf (outfile, " %#" PRIx8 ",", byte);
+		      --leadingbytes;
+		    }
+		  else
+		    fprintf (outfile, " %#" PRIx8 ", %#" PRIx8 ",",
+			     mask, byte);
+		  byte = mask = nbits = 0;
+		  if (--nbytes == 0)
+		    break;
+		}
+	    }
+	  else
+	    {
+	      assert (leadingbytes == 0);
+
+	      unsigned long int remaining = b->field->bits;
+	      while (nbits + remaining > 8)
+		{
+		  fprintf (outfile, " %#" PRIx8 ", %#" PRIx8 ",",
+			   mask << (8 - nbits), byte << (8 - nbits));
+		  remaining = nbits + remaining - 8;
+		  byte = mask = nbits = 0;
+		  if (--nbytes == 0)
+		    break;
+		}
+	      byte <<= remaining;
+	      mask <<= remaining;
+	      nbits += remaining;
+	      if (nbits == 8)
+		{
+		  fprintf (outfile, " %#" PRIx8 ", %#" PRIx8 ",", mask, byte);
+		  byte = mask = nbits = 0;
+		  if (--nbytes == 0)
+		    break;
+		}
+	    }
+	  b = b->next;
+	}
+
+      fputc_unlocked ('\n', outfile);
+    }
+  fputs ("};\n", outfile);
+}
+
+
+#if 0
+static size_t mnemonic_maxlen;
+static size_t mnemonic_minlen;
+static size_t
+which_chars (const char *str[], size_t nstr)
+{
+  char used_char[256];
+  memset (used_char, '\0', sizeof (used_char));
+  mnemonic_maxlen = 0;
+  mnemonic_minlen = 10000;
+  for (size_t cnt = 0; cnt < nstr; ++cnt)
+    {
+      const unsigned char *cp = (const unsigned char *) str[cnt];
+      mnemonic_maxlen = MAX (mnemonic_maxlen, strlen ((char *) cp));
+      mnemonic_minlen = MIN (mnemonic_minlen, strlen ((char *) cp));
+      do
+        used_char[*cp++] = 1;
+      while (*cp != '\0');
+    }
+  size_t nused_char = 0;
+  for (size_t cnt = 0; cnt < 256; ++cnt)
+    if (used_char[cnt] != 0)
+      ++nused_char;
+  return nused_char;
+}
+
+
+static const char **mnemonic_strs;
+static size_t nmnemonic_strs;
+static void
+add_mnemonics (const void *nodep, VISIT value,
+	       int level __attribute__ ((unused)))
+{
+  if (value == leaf || value == postorder)
+    mnemonic_strs[nmnemonic_strs++] = *(const char **) nodep;
+}
+
+
+struct charfreq
+{
+  char ch;
+  int freq;
+};
+static struct charfreq pfxfreq[256];
+static struct charfreq sfxfreq[256];
+
+
+static int
+compare_freq (const void *p1, const void *p2)
+{
+  const struct charfreq *c1 = (const struct charfreq *) p1;
+  const struct charfreq *c2 = (const struct charfreq *) p2;
+
+  if (c1->freq > c2->freq)
+    return -1;
+  if (c1->freq < c2->freq)
+    return 1;
+  return 0;
+}
+
+
+static size_t
+compute_pfxfreq (const char *str[], size_t nstr)
+{
+  memset (pfxfreq, '\0', sizeof (pfxfreq));
+
+  for (size_t i = 0; i < nstr; ++i)
+    pfxfreq[i].ch = i;
+
+  for (size_t i = 0; i < nstr; ++i)
+    ++pfxfreq[*((const unsigned char *) str[i])].freq;
+
+  qsort (pfxfreq, 256, sizeof (struct charfreq), compare_freq);
+
+  size_t n = 0;
+  while (n < 256 && pfxfreq[n].freq != 0)
+    ++n;
+  return n;
+}
+
+
+struct strsnlen
+{
+  const char *str;
+  size_t len;
+};
+
+static size_t
+compute_sfxfreq (size_t nstr, struct strsnlen *strsnlen)
+{
+  memset (sfxfreq, '\0', sizeof (sfxfreq));
+
+  for (size_t i = 0; i < nstr; ++i)
+    sfxfreq[i].ch = i;
+
+  for (size_t i = 0; i < nstr; ++i)
+    ++sfxfreq[((const unsigned char *) strchrnul (strsnlen[i].str, '\0'))[-1]].freq;
+
+  qsort (sfxfreq, 256, sizeof (struct charfreq), compare_freq);
+
+  size_t n = 0;
+  while (n < 256 && sfxfreq[n].freq != 0)
+    ++n;
+  return n;
+}
+
+
+static void
+create_mnemonic_table (void)
+{
+  mnemonic_strs = xmalloc (nmnemonics * sizeof (char *));
+
+  twalk (mnemonics, add_mnemonics);
+
+  (void) which_chars (mnemonic_strs, nmnemonic_strs);
+
+  size_t best_so_far = 100000000;
+  char *best_prefix = NULL;
+  char *best_suffix = NULL;
+  char *best_table = NULL;
+  size_t best_table_size = 0;
+  size_t best_table_bits = 0;
+  size_t best_prefix_bits = 0;
+
+  /* We can precompute the prefix characters.  */
+  size_t npfx_char = compute_pfxfreq (mnemonic_strs, nmnemonic_strs);
+
+  /* Compute best size for string representation including explicit NUL.  */
+  for (size_t pfxbits = 0; (1u << pfxbits) < 2 * npfx_char; ++pfxbits)
+    {
+      char prefix[1 << pfxbits];
+      size_t i;
+      for (i = 0; i < (1u << pfxbits) - 1; ++i)
+	prefix[i] = pfxfreq[i].ch;
+      prefix[i] = '\0';
+
+      struct strsnlen strsnlen[nmnemonic_strs];
+
+      for (i = 0; i < nmnemonic_strs; ++i)
+	{
+	  if (strchr (prefix, *mnemonic_strs[i]) != NULL)
+	    strsnlen[i].str = mnemonic_strs[i] + 1;
+	  else
+	    strsnlen[i].str = mnemonic_strs[i];
+	  strsnlen[i].len = strlen (strsnlen[i].str);
+	}
+
+      /* With the prefixes gone, try to combine strings.  */
+      size_t nstrsnlen = 1;
+      for (i = 1; i < nmnemonic_strs; ++i)
+	{
+	  size_t j;
+	  for (j = 0; j < nstrsnlen; ++j)
+	    if (strsnlen[i].len > strsnlen[j].len
+		&& strcmp (strsnlen[j].str,
+			   strsnlen[i].str + (strsnlen[i].len
+					      - strsnlen[j].len)) == 0)
+	      {
+		strsnlen[j] = strsnlen[i];
+		break;
+	      }
+	    else if (strsnlen[i].len < strsnlen[j].len
+		     && strcmp (strsnlen[i].str,
+				strsnlen[j].str + (strsnlen[j].len
+						   - strsnlen[i].len)) == 0)
+	      break;
+;
+	  if (j == nstrsnlen)
+	      strsnlen[nstrsnlen++] = strsnlen[i];
+	}
+
+      size_t nsfx_char = compute_sfxfreq (nstrsnlen, strsnlen);
+
+      for (size_t sfxbits = 0; (1u << sfxbits) < 2 * nsfx_char; ++sfxbits)
+	{
+	  char suffix[1 << sfxbits];
+
+	  for (i = 0; i < (1u << sfxbits) - 1; ++i)
+	    suffix[i] = sfxfreq[i].ch;
+	  suffix[i] = '\0';
+
+	  size_t newlen[nstrsnlen];
+
+	  for (i = 0; i < nstrsnlen; ++i)
+	    if (strchr (suffix, strsnlen[i].str[strsnlen[i].len - 1]) != NULL)
+	      newlen[i] = strsnlen[i].len - 1;
+	    else
+	      newlen[i] = strsnlen[i].len;
+
+	  char charused[256];
+	  memset (charused, '\0', sizeof (charused));
+	  size_t ncharused = 0;
+
+	  const char *tablestr[nstrsnlen];
+	  size_t ntablestr = 1;
+	  tablestr[0] = strsnlen[0].str;
+	  size_t table = newlen[0] + 1;
+	  for (i = 1; i < nstrsnlen; ++i)
+	    {
+	      size_t j;
+	      for (j = 0; j < ntablestr; ++j)
+		if (newlen[i] > newlen[j]
+		    && memcmp (tablestr[j],
+			       strsnlen[i].str + (newlen[i] - newlen[j]),
+			       newlen[j]) == 0)
+		  {
+		    table += newlen[i] - newlen[j];
+		    tablestr[j] = strsnlen[i].str;
+		    newlen[j] = newlen[i];
+		    break;
+		  }
+		else if (newlen[i] < newlen[j]
+		     && memcmp (strsnlen[i].str,
+				tablestr[j] + (newlen[j] - newlen[i]),
+				newlen[i]) == 0)
+		  break;
+
+	      if (j == ntablestr)
+		{
+		  table += newlen[i] + 1;
+		  tablestr[ntablestr] = strsnlen[i].str;
+		  newlen[ntablestr] = newlen[i];
+
+		  ++ntablestr;
+		}
+
+	      for (size_t x = 0; x < newlen[j]; ++x)
+		if (charused[((const unsigned char *) tablestr[j])[x]]++ == 0)
+		  ++ncharused;
+	    }
+
+	  size_t ncharused_bits = 0;
+	  i = 1;
+	  while (i < ncharused)
+	    {
+	      i *= 2;
+	      ++ncharused_bits;
+	    }
+
+	  size_t table_bits = 0;
+	  i = 1;
+	  while (i < table)
+	    {
+	      i *= 2;
+	      ++table_bits;
+	    }
+
+	  size_t mnemonic_bits = table_bits + pfxbits + sfxbits;
+	  size_t new_total = (((table + 7) / 8) * ncharused_bits + ncharused
+			      + (pfxbits == 0 ? 0 : (1 << pfxbits) - 1)
+			      + (sfxbits == 0 ? 0 : (1 << sfxbits) - 1)
+			      + (((total_bits + mnemonic_bits + 7) / 8)
+				 * ninstructions));
+
+	  if (new_total < best_so_far)
+	    {
+	      best_so_far = new_total;
+	      best_mnemonic_bits = mnemonic_bits;
+
+	      free (best_suffix);
+	      best_suffix = xstrdup (suffix);
+
+	      free (best_prefix);
+	      best_prefix = xstrdup (prefix);
+	      best_prefix_bits = pfxbits;
+
+	      best_table_size = table;
+	      best_table_bits = table_bits;
+	      char *cp = best_table = xrealloc (best_table, table);
+	      for (i = 0; i < ntablestr; ++i)
+		{
+		  assert (cp + newlen[i] + 1 <= best_table + table);
+		  cp = mempcpy (cp, tablestr[i], newlen[i]);
+		  *cp++ = '\0';
+		}
+	      assert (cp == best_table + table);
+	    }
+	}
+    }
+
+  fputs ("static const char mnemonic_table[] =\n\"", outfile);
+  for (size_t i = 0; i < best_table_size; ++i)
+    {
+      if (((i + 1) % 60) == 0)
+	fputs ("\"\n\"", outfile);
+      if (!isascii (best_table[i]) || !isprint (best_table[i]))
+	fprintf (outfile, "\\%03o", best_table[i]);
+      else
+	fputc (best_table[i], outfile);
+    }
+  fputs ("\";\n", outfile);
+
+  if (best_prefix[0] != '\0')
+    fprintf (outfile,
+	     "static const char prefix[%zu] = \"%s\";\n"
+	     "#define PREFIXCHAR_BITS %zu\n",
+	     strlen (best_prefix), best_prefix, best_prefix_bits);
+  else
+    fputs ("#define NO_PREFIX\n", outfile);
+
+  if (best_suffix[0] != '\0')
+    fprintf (outfile, "static const char suffix[%zu] = \"%s\";\n",
+	     strlen (best_suffix), best_suffix);
+  else
+    fputs ("#define NO_SUFFIX\n", outfile);
+
+  for (size_t i = 0; i < nmnemonic_strs; ++i)
+    {
+      const char *mne = mnemonic_strs[i];
+
+      size_t pfxval = 0;
+      char *cp = strchr (best_prefix, *mne);
+      if (cp != NULL)
+	{
+	  pfxval = 1 + (cp - best_prefix);
+	  ++mne;
+	}
+
+      size_t l = strlen (mne);
+
+      size_t sfxval = 0;
+      cp = strchr (best_suffix, mne[l - 1]);
+      if (cp != NULL)
+	{
+	  sfxval = 1 + (cp - best_suffix);
+	  --l;
+	}
+
+      char *off = memmem (best_table, best_table_size, mne, l);
+      while (off[l] != '\0')
+	{
+	  off = memmem (off + 1, best_table_size, mne, l);
+	  assert (off != NULL);
+	}
+
+      fprintf (outfile, "#define MNE_%s %#zx\n",
+	       mnemonic_strs[i],
+	       (off - best_table)
+	       + ((pfxval + (sfxval << best_prefix_bits)) << best_table_bits));
+    }
+}
+#endif
diff --git a/libcpu/memory-access.h b/libcpu/memory-access.h
new file mode 100644
index 0000000..779825f
--- /dev/null
+++ b/libcpu/memory-access.h
@@ -0,0 +1,182 @@
+/* Unaligned memory access functionality.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2008 Red Hat, Inc.
+   Written by Ulrich Drepper <drepper@redhat.com>, 2001.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MEMORY_ACCESS_H
+#define _MEMORY_ACCESS_H 1
+
+#include <byteswap.h>
+#include <endian.h>
+#include <limits.h>
+#include <stdint.h>
+
+
+/* When loading this file we require the macro MACHINE_ENCODING to be
+   defined to signal the endianness of the architecture which is
+   defined.  */
+#ifndef MACHINE_ENCODING
+# error "MACHINE_ENCODING needs to be defined"
+#endif
+#if MACHINE_ENCODING != __BIG_ENDIAN && MACHINE_ENCODING != __LITTLE_ENDIAN
+# error "MACHINE_ENCODING must signal either big or little endian"
+#endif
+
+
+/* We use simple memory access functions in case the hardware allows it.
+   The caller has to make sure we don't have alias problems.  */
+#if ALLOW_UNALIGNED
+
+# define read_2ubyte_unaligned(Addr) \
+  (unlikely (MACHINE_ENCODING != __BYTE_ORDER)				      \
+   ? bswap_16 (*((const uint16_t *) (Addr)))				      \
+   : *((const uint16_t *) (Addr)))
+# define read_2sbyte_unaligned(Addr) \
+  (unlikely (MACHINE_ENCODING != __BYTE_ORDER)				      \
+   ? (int16_t) bswap_16 (*((const int16_t *) (Addr)))			      \
+   : *((const int16_t *) (Addr)))
+
+# define read_4ubyte_unaligned_noncvt(Addr) \
+   *((const uint32_t *) (Addr))
+# define read_4ubyte_unaligned(Addr) \
+  (unlikely (MACHINE_ENCODING != __BYTE_ORDER)				      \
+   ? bswap_32 (*((const uint32_t *) (Addr)))				      \
+   : *((const uint32_t *) (Addr)))
+# define read_4sbyte_unaligned(Addr) \
+  (unlikely (MACHINE_ENCODING != __BYTE_ORDER)				      \
+   ? (int32_t) bswap_32 (*((const int32_t *) (Addr)))			      \
+   : *((const int32_t *) (Addr)))
+
+# define read_8ubyte_unaligned(Addr) \
+  (unlikely (MACHINE_ENCODING != __BYTE_ORDER)				      \
+   ? bswap_64 (*((const uint64_t *) (Addr)))				      \
+   : *((const uint64_t *) (Addr)))
+# define read_8sbyte_unaligned(Addr) \
+  (unlikely (MACHINE_ENCODING != __BYTE_ORDER)				      \
+   ? (int64_t) bswap_64 (*((const int64_t *) (Addr)))			      \
+   : *((const int64_t *) (Addr)))
+
+#else
+
+union unaligned
+  {
+    void *p;
+    uint16_t u2;
+    uint32_t u4;
+    uint64_t u8;
+    int16_t s2;
+    int32_t s4;
+    int64_t s8;
+  } attribute_packed;
+
+static inline uint16_t
+read_2ubyte_unaligned (const void *p)
+{
+  const union unaligned *up = p;
+  if (MACHINE_ENCODING != __BYTE_ORDER)
+    return bswap_16 (up->u2);
+  return up->u2;
+}
+static inline int16_t
+read_2sbyte_unaligned (const void *p)
+{
+  const union unaligned *up = p;
+  if (MACHINE_ENCODING != __BYTE_ORDER)
+    return (int16_t) bswap_16 (up->u2);
+  return up->s2;
+}
+
+static inline uint32_t
+read_4ubyte_unaligned_noncvt (const void *p)
+{
+  const union unaligned *up = p;
+  return up->u4;
+}
+static inline uint32_t
+read_4ubyte_unaligned (const void *p)
+{
+  const union unaligned *up = p;
+  if (MACHINE_ENCODING != __BYTE_ORDER)
+    return bswap_32 (up->u4);
+  return up->u4;
+}
+static inline int32_t
+read_4sbyte_unaligned (const void *p)
+{
+  const union unaligned *up = p;
+  if (MACHINE_ENCODING != __BYTE_ORDER)
+    return (int32_t) bswap_32 (up->u4);
+  return up->s4;
+}
+
+static inline uint64_t
+read_8ubyte_unaligned (const void *p)
+{
+  const union unaligned *up = p;
+  if (MACHINE_ENCODING != __BYTE_ORDER)
+    return bswap_64 (up->u8);
+  return up->u8;
+}
+static inline int64_t
+read_8sbyte_unaligned (const void *p)
+{
+  const union unaligned *up = p;
+  if (MACHINE_ENCODING != __BYTE_ORDER)
+    return (int64_t) bswap_64 (up->u8);
+  return up->s8;
+}
+
+#endif	/* allow unaligned */
+
+
+#define read_2ubyte_unaligned_inc(Addr) \
+  ({ uint16_t t_ = read_2ubyte_unaligned (Addr);			      \
+     Addr = (__typeof (Addr)) (((uintptr_t) (Addr)) + 2);		      \
+     t_; })
+#define read_2sbyte_unaligned_inc(Addr) \
+  ({ int16_t t_ = read_2sbyte_unaligned (Addr);				      \
+     Addr = (__typeof (Addr)) (((uintptr_t) (Addr)) + 2);		      \
+     t_; })
+
+#define read_4ubyte_unaligned_inc(Addr) \
+  ({ uint32_t t_ = read_4ubyte_unaligned (Addr);			      \
+     Addr = (__typeof (Addr)) (((uintptr_t) (Addr)) + 4);		      \
+     t_; })
+#define read_4sbyte_unaligned_inc(Addr) \
+  ({ int32_t t_ = read_4sbyte_unaligned (Addr);				      \
+     Addr = (__typeof (Addr)) (((uintptr_t) (Addr)) + 4);		      \
+     t_; })
+
+#define read_8ubyte_unaligned_inc(Addr) \
+  ({ uint64_t t_ = read_8ubyte_unaligned (Addr);			      \
+     Addr = (__typeof (Addr)) (((uintptr_t) (Addr)) + 8);		      \
+     t_; })
+#define read_8sbyte_unaligned_inc(Addr) \
+  ({ int64_t t_ = read_8sbyte_unaligned (Addr);				      \
+     Addr = (__typeof (Addr)) (((uintptr_t) (Addr)) + 8);		      \
+     t_; })
+
+#endif	/* memory-access.h */
diff --git a/libcpu/x86_64_disasm.c b/libcpu/x86_64_disasm.c
new file mode 100644
index 0000000..947bc94
--- /dev/null
+++ b/libcpu/x86_64_disasm.c
@@ -0,0 +1,34 @@
+/* Disassembler for x86-64.
+   Copyright (C) 2007, 2008 Red Hat, Inc.
+   This file is part of elfutils.
+   Written by Ulrich Drepper <drepper@redhat.com>, 2007.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#define i386_disasm x86_64_disasm
+#define DISFILE "x86_64_dis.h"
+#define MNEFILE "x86_64.mnemonics"
+#define X86_64
+#include "i386_disasm.c"