Blame - third_party/gmp/mpn/x86/x86-defs.m4 - RealtimeRoboticsGroup/test

blob: 81309b22427849357b6d87417ddd8a80665cdf12 [file] [log] [blame]

Austin Schuh	dace2a6	2020-08-18 10:56:48 -0700	[diff] [blame]	1	divert(-1)
				2
				3	dnl m4 macros for x86 assembler.
				4
				5	dnl Copyright 1999-2003, 2007, 2010, 2012, 2014 Free Software Foundation, Inc.
				6
				7	dnl This file is part of the GNU MP Library.
				8	dnl
				9	dnl The GNU MP Library is free software; you can redistribute it and/or modify
				10	dnl it under the terms of either:
				11	dnl
				12	dnl * the GNU Lesser General Public License as published by the Free
				13	dnl Software Foundation; either version 3 of the License, or (at your
				14	dnl option) any later version.
				15	dnl
				16	dnl or
				17	dnl
				18	dnl * the GNU General Public License as published by the Free Software
				19	dnl Foundation; either version 2 of the License, or (at your option) any
				20	dnl later version.
				21	dnl
				22	dnl or both in parallel, as here.
				23	dnl
				24	dnl The GNU MP Library is distributed in the hope that it will be useful, but
				25	dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
				26	dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				27	dnl for more details.
				28	dnl
				29	dnl You should have received copies of the GNU General Public License and the
				30	dnl GNU Lesser General Public License along with the GNU MP Library. If not,
				31	dnl see https://www.gnu.org/licenses/.
				32
				33
				34	dnl Notes:
				35	dnl
				36	dnl m4 isn't perfect for processing BSD style x86 assembler code, the main
				37	dnl problems are,
				38	dnl
				39	dnl 1. Doing define(foo,123) and then using foo in an addressing mode like
				40	dnl foo(%ebx) expands as a macro rather than a constant. This is worked
				41	dnl around by using deflit() from asm-defs.m4, instead of define().
				42	dnl
				43	dnl 2. Immediates in macro definitions need a space or `' to stop the $
				44	dnl looking like a macro parameter. For example,
				45	dnl
				46	dnl define(foo, `mov $ 123, %eax')
				47	dnl
				48	dnl This is only a problem in macro definitions, not in ordinary text,
				49	dnl and not in macro parameters like text passed to forloop() or ifdef().
				50
				51
				52	deflit(GMP_LIMB_BYTES, 4)
				53
				54
				55	dnl Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL. We
				56	dnl undefine PIC since we don't need to be position independent in this
				57	dnl case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc.
				58
				59	ifdef(`DLL_EXPORT',`undefine(`PIC')')
				60
				61
				62	dnl Usage: CPUVEC_FUNCS_LIST
				63	dnl
				64	dnl A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
				65	dnl order they appear in that structure.
				66
				67	define(CPUVEC_FUNCS_LIST,
				68	``add_n',
				69	`addlsh1_n',
				70	`addlsh2_n',
				71	`addmul_1',
				72	`addmul_2',
				73	`bdiv_dbm1c',
				74	`cnd_add_n',
				75	`cnd_sub_n',
				76	`com',
				77	`copyd',
				78	`copyi',
				79	`divexact_1',
				80	`divrem_1',
				81	`gcd_11',
				82	`lshift',
				83	`lshiftc',
				84	`mod_1',
				85	`mod_1_1p',
				86	`mod_1_1p_cps',
				87	`mod_1s_2p',
				88	`mod_1s_2p_cps',
				89	`mod_1s_4p',
				90	`mod_1s_4p_cps',
				91	`mod_34lsub1',
				92	`modexact_1c_odd',
				93	`mul_1',
				94	`mul_basecase',
				95	`mullo_basecase',
				96	`preinv_divrem_1',
				97	`preinv_mod_1',
				98	`redc_1',
				99	`redc_2',
				100	`rshift',
				101	`sqr_basecase',
				102	`sub_n',
				103	`sublsh1_n',
				104	`submul_1'')
				105
				106
				107	dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
				108	dnl
				109	dnl In the x86 code we use explicit TEXT and ALIGN() calls in the code,
				110	dnl since different alignments are wanted in various circumstances. So for
				111	dnl instance,
				112	dnl
				113	dnl TEXT
				114	dnl ALIGN(16)
				115	dnl PROLOGUE(mpn_add_n)
				116	dnl ...
				117	dnl EPILOGUE()
				118
				119	define(`PROLOGUE_cpu',
				120	m4_assert_numargs(1)
				121	m4_assert_defined(`WANT_PROFILING')
				122	`GLOBL $1
				123	TYPE($1,`function')
				124	COFF_TYPE($1)
				125	$1:
				126	ifelse(WANT_PROFILING,`prof', ` call_mcount')
				127	ifelse(WANT_PROFILING,`gprof', ` call_mcount')
				128	ifelse(WANT_PROFILING,`instrument',` call_instrument(enter)')
				129	')
				130
				131
				132	dnl Usage: COFF_TYPE(GSYM_PREFIX`'foo)
				133	dnl
				134	dnl Emit COFF style ".def ... .endef" type information for a function, when
				135	dnl supported. The argument should include any GSYM_PREFIX.
				136	dnl
				137	dnl See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE.
				138
				139	define(COFF_TYPE,
				140	m4_assert_numargs(1)
				141	m4_assert_defined(`HAVE_COFF_TYPE')
				142	`ifelse(HAVE_COFF_TYPE,yes,
				143	`.def $1
				144	.scl 2
				145	.type 32
				146	.endef')')
				147
				148
				149	dnl Usage: call_mcount
				150	dnl
				151	dnl For `gprof' style profiling, %ebp is setup as a frame pointer. None of
				152	dnl the assembler routines use %ebp this way, so it's done only for the
				153	dnl benefit of mcount. glibc sysdeps/i386/i386-mcount.S shows how mcount
				154	dnl gets the current function from (%esp) and the parent from 4(%ebp).
				155	dnl
				156	dnl For `prof' style profiling gcc generates mcount calls without setting
				157	dnl up %ebp, and the same is done here.
				158
				159	define(`call_mcount',
				160	m4_assert_numargs(-1)
				161	m4_assert_defined(`WANT_PROFILING')
				162	m4_assert_defined(`MCOUNT_PIC_REG')
				163	m4_assert_defined(`MCOUNT_NONPIC_REG')
				164	m4_assert_defined(`MCOUNT_PIC_CALL')
				165	m4_assert_defined(`MCOUNT_NONPIC_CALL')
				166	`ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,,
				167	` DATA
				168	ALIGN(4)
				169	L(mcount_data_`'mcount_counter):
				170	W32 0
				171	TEXT
				172	')dnl
				173	ifelse(WANT_PROFILING,`gprof',
				174	` pushl %ebp
				175	movl %esp, %ebp
				176	')dnl
				177	ifdef(`PIC',
				178	` pushl %ebx
				179	call_movl_eip_to_ebx
				180	L(mcount_here_`'mcount_counter):
				181	addl $_GLOBAL_OFFSET_TABLE_+[.-L(mcount_here_`'mcount_counter)], %ebx
				182	ifelse(MCOUNT_PIC_REG,,,
				183	` leal L(mcount_data_`'mcount_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG')
				184	MCOUNT_PIC_CALL
				185	popl %ebx
				186	',`dnl non-PIC
				187	ifelse(MCOUNT_NONPIC_REG,,,
				188	` movl `$'L(mcount_data_`'mcount_counter), MCOUNT_NONPIC_REG
				189	')dnl
				190	MCOUNT_NONPIC_CALL
				191	')dnl
				192	ifelse(WANT_PROFILING,`gprof',
				193	` popl %ebp
				194	')
				195	define(`mcount_counter',incr(mcount_counter))
				196	')
				197
				198	define(mcount_counter,1)
				199
				200
				201	dnl Usage: call_instrument(enter\|exit)
				202	dnl
				203	dnl Call __cyg_profile_func_enter or __cyg_profile_func_exit.
				204	dnl
				205	dnl For PIC, most routines don't require _GLOBAL_OFFSET_TABLE_ themselves
				206	dnl so %ebx is just setup for these calls. It's a bit wasteful to repeat
				207	dnl the setup for the exit call having done it earlier for the enter, but
				208	dnl there's nowhere very convenient to hold %ebx through the length of a
				209	dnl routine, in general.
				210	dnl
				211	dnl For PIC, because instrument_current_function will be within the current
				212	dnl object file we can get it just as an offset from %eip, there's no need
				213	dnl to use the GOT.
				214	dnl
				215	dnl No attempt is made to maintain the stack alignment gcc generates with
				216	dnl -mpreferred-stack-boundary. This wouldn't be hard, but it seems highly
				217	dnl unlikely the instrumenting functions would be doing anything that'd
				218	dnl benefit from alignment, in particular they're unlikely to be using
				219	dnl doubles or long doubles on the stack.
				220	dnl
				221	dnl The FRAME scheme is used to conveniently account for the register saves
				222	dnl before accessing the return address. Any previous value is saved and
				223	dnl restored, since plenty of code keeps a value across a "ret" in the
				224	dnl middle of a routine.
				225
				226	define(call_instrument,
				227	m4_assert_numargs(1)
				228	` pushdef(`FRAME',0)
				229	ifelse($1,exit,
				230	` pushl %eax FRAME_pushl() C return value
				231	')
				232	ifdef(`PIC',
				233	` pushl %ebx FRAME_pushl()
				234	call_movl_eip_to_ebx
				235	L(instrument_here_`'instrument_count):
				236	movl %ebx, %ecx
				237	addl $_GLOBAL_OFFSET_TABLE_+[.-L(instrument_here_`'instrument_count)], %ebx
				238	C use addl rather than leal to avoid old gas bugs, see mpn/x86/README
				239	addl $instrument_current_function-L(instrument_here_`'instrument_count), %ecx
				240	pushl m4_empty_if_zero(FRAME)(%esp) FRAME_pushl() C return addr
				241	pushl %ecx FRAME_pushl() C this function
				242	call GSYM_PREFIX`'__cyg_profile_func_$1@PLT
				243	addl $`'8, %esp
				244	popl %ebx
				245	',
				246	` C non-PIC
				247	pushl m4_empty_if_zero(FRAME)(%esp) FRAME_pushl() C return addr
				248	pushl $instrument_current_function FRAME_pushl() C this function
				249	call GSYM_PREFIX`'__cyg_profile_func_$1
				250	addl $`'8, %esp
				251	')
				252	ifelse($1,exit,
				253	` popl %eax C return value
				254	')
				255	popdef(`FRAME')
				256	define(`instrument_count',incr(instrument_count))
				257	')
				258	define(instrument_count,1)
				259
				260
				261	dnl Usage: instrument_current_function
				262	dnl
				263	dnl Return the current function name for instrumenting purposes. This is
				264	dnl PROLOGUE_current_function, but it sticks at the first such name seen.
				265	dnl
				266	dnl Sticking to the first name seen ensures that multiple-entrypoint
				267	dnl functions like mpn_add_nc and mpn_add_n will make enter and exit calls
				268	dnl giving the same function address.
				269
				270	define(instrument_current_function,
				271	m4_assert_numargs(-1)
				272	`ifdef(`instrument_current_function_seen',
				273	`instrument_current_function_seen',
				274	`define(`instrument_current_function_seen',PROLOGUE_current_function)dnl
				275	PROLOGUE_current_function')')
				276
				277
				278	dnl Usage: call_movl_eip_to_ebx
				279	dnl
				280	dnl Generate a call to L(movl_eip_to_ebx), and record the need for that
				281	dnl routine.
				282
				283	define(call_movl_eip_to_ebx,
				284	m4_assert_numargs(-1)
				285	`call L(movl_eip_to_ebx)
				286	define(`movl_eip_to_ebx_needed',1)')
				287
				288	dnl Usage: generate_movl_eip_to_ebx
				289	dnl
				290	dnl Emit a L(movl_eip_to_ebx) routine, if needed and not already generated.
				291
				292	define(generate_movl_eip_to_ebx,
				293	m4_assert_numargs(-1)
				294	`ifelse(movl_eip_to_ebx_needed,1,
				295	`ifelse(movl_eip_to_ebx_done,1,,
				296	`L(movl_eip_to_ebx):
				297	movl (%esp), %ebx
				298	ret_internal
				299	define(`movl_eip_to_ebx_done',1)
				300	')')')
				301
				302
				303	dnl Usage: ret
				304	dnl
				305	dnl Generate a "ret", but if doing instrumented profiling then call
				306	dnl __cyg_profile_func_exit first.
				307
				308	define(ret,
				309	m4_assert_numargs(-1)
				310	m4_assert_defined(`WANT_PROFILING')
				311	`ifelse(WANT_PROFILING,instrument,
				312	`ret_instrument',
				313	`ret_internal')
				314	generate_movl_eip_to_ebx
				315	')
				316
				317
				318	dnl Usage: ret_internal
				319	dnl
				320	dnl A plain "ret", without any __cyg_profile_func_exit call. This can be
				321	dnl used for a return which is internal to some function, such as when
				322	dnl getting %eip for PIC.
				323
				324	define(ret_internal,
				325	m4_assert_numargs(-1)
				326	``ret'')
				327
				328
				329	dnl Usage: ret_instrument
				330	dnl
				331	dnl Generate call to __cyg_profile_func_exit and then a ret. If a ret has
				332	dnl already been seen from this function then jump to that chunk of code,
				333	dnl rather than emitting it again.
				334
				335	define(ret_instrument,
				336	m4_assert_numargs(-1)
				337	`ifelse(m4_unquote(ret_instrument_seen_`'instrument_current_function),1,
				338	`jmp L(instrument_exit_`'instrument_current_function)',
				339	`define(ret_instrument_seen_`'instrument_current_function,1)
				340	L(instrument_exit_`'instrument_current_function):
				341	call_instrument(exit)
				342	ret_internal')')
				343
				344
				345	dnl Usage: _GLOBAL_OFFSET_TABLE_
				346	dnl
				347	dnl Expand to _GLOBAL_OFFSET_TABLE_ plus any necessary underscore prefix.
				348	dnl This lets us write plain _GLOBAL_OFFSET_TABLE_ in SVR4 style, but still
				349	dnl work with systems requiring an extra underscore such as OpenBSD.
				350	dnl
				351	dnl deflit is used so "leal _GLOBAL_OFFSET_TABLE_(%eax), %ebx" will come
				352	dnl out right, though that form doesn't work properly in gas (see
				353	dnl mpn/x86/README).
				354
				355	deflit(_GLOBAL_OFFSET_TABLE_,
				356	m4_assert_defined(`GOT_GSYM_PREFIX')
				357	`GOT_GSYM_PREFIX`_GLOBAL_OFFSET_TABLE_'')
				358
				359
				360	dnl --------------------------------------------------------------------------
				361	dnl Various x86 macros.
				362	dnl
				363
				364
				365	dnl Usage: ALIGN_OFFSET(bytes,offset)
				366	dnl
				367	dnl Align to `offset' away from a multiple of `bytes'.
				368	dnl
				369	dnl This is useful for testing, for example align to something very strict
				370	dnl and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
				371	dnl
				372	dnl Generally you wouldn't execute across the padding, but it's done with
				373	dnl nop's so it'll work.
				374
				375	define(ALIGN_OFFSET,
				376	m4_assert_numargs(2)
				377	`ALIGN($1)
				378	forloop(`i',1,$2,` nop
				379	')')
				380
				381
				382	dnl Usage: defframe(name,offset)
				383	dnl
				384	dnl Make a definition like the following with which to access a parameter
				385	dnl or variable on the stack.
				386	dnl
				387	dnl define(name,`FRAME+offset(%esp)')
				388	dnl
				389	dnl Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
				390	dnl byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
				391	dnl Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
				392	dnl zero offset is wanted.
				393	dnl
				394	dnl The new macro also gets a check that when it's used FRAME is actually
				395	dnl defined, and that the final %esp offset isn't negative, which would
				396	dnl mean an attempt to access something below the current %esp.
				397	dnl
				398	dnl deflit() is used rather than a plain define(), so the new macro won't
				399	dnl delete any following parenthesized expression. name(%edi) will come
				400	dnl out say as 16(%esp)(%edi). This isn't valid assembler and should
				401	dnl provoke an error, which is better than silently giving just 16(%esp).
				402	dnl
				403	dnl See README for more on the suggested way to access the stack frame.
				404
				405	define(defframe,
				406	m4_assert_numargs(2)
				407	`deflit(`$1',
				408	m4_assert_defined(`FRAME')
				409	`defframe_check_notbelow(`$1',$2,FRAME)dnl
				410	defframe_empty_if_zero(FRAME+($2))(%esp)')')
				411
				412	dnl Called: defframe_empty_if_zero(expression)
				413	define(defframe_empty_if_zero,
				414	m4_assert_numargs(1)
				415	`ifelse(defframe_empty_if_zero_disabled,1,
				416	`eval($1)',
				417	`m4_empty_if_zero($1)')')
				418
				419	dnl Called: defframe_check_notbelow(`name',offset,FRAME)
				420	define(defframe_check_notbelow,
				421	m4_assert_numargs(3)
				422	`ifelse(eval(($3)+($2)<0),1,
				423	`m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
				424	')')')
				425
				426
				427	dnl Usage: FRAME_pushl()
				428	dnl FRAME_popl()
				429	dnl FRAME_addl_esp(n)
				430	dnl FRAME_subl_esp(n)
				431	dnl
				432	dnl Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
				433	dnl %esp of n bytes.
				434	dnl
				435	dnl Using these macros is completely optional. Sometimes it makes more
				436	dnl sense to put explicit deflit(`FRAME',N) forms, especially when there's
				437	dnl jumps and different sequences of FRAME values need to be used in
				438	dnl different places.
				439
				440	define(FRAME_pushl,
				441	m4_assert_numargs(0)
				442	m4_assert_defined(`FRAME')
				443	`deflit(`FRAME',eval(FRAME+4))')
				444
				445	define(FRAME_popl,
				446	m4_assert_numargs(0)
				447	m4_assert_defined(`FRAME')
				448	`deflit(`FRAME',eval(FRAME-4))')
				449
				450	define(FRAME_addl_esp,
				451	m4_assert_numargs(1)
				452	m4_assert_defined(`FRAME')
				453	`deflit(`FRAME',eval(FRAME-($1)))')
				454
				455	define(FRAME_subl_esp,
				456	m4_assert_numargs(1)
				457	m4_assert_defined(`FRAME')
				458	`deflit(`FRAME',eval(FRAME+($1)))')
				459
				460
				461	dnl Usage: defframe_pushl(name)
				462	dnl
				463	dnl Do a combination FRAME_pushl() and a defframe() to name the stack
				464	dnl location just pushed. This should come after a pushl instruction.
				465	dnl Putting it on the same line works and avoids lengthening the code. For
				466	dnl example,
				467	dnl
				468	dnl pushl %eax defframe_pushl(VAR_COUNTER)
				469	dnl
				470	dnl Notice the defframe() is done with an unquoted -FRAME thus giving its
				471	dnl current value without tracking future changes.
				472
				473	define(defframe_pushl,
				474	m4_assert_numargs(1)
				475	`FRAME_pushl()defframe(`$1',-FRAME)')
				476
				477
				478	dnl --------------------------------------------------------------------------
				479	dnl Assembler instruction macros.
				480	dnl
				481
				482
				483	dnl Usage: emms_or_femms
				484	dnl femms_available_p
				485	dnl
				486	dnl femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
				487	dnl femms instruction is available. emms_or_femms expands to femms if
				488	dnl available, or emms if not.
				489	dnl
				490	dnl emms_or_femms is meant for use in the K6 directory where plain K6
				491	dnl (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
				492	dnl supported together.
				493	dnl
				494	dnl On K7 femms is no longer faster and is just an alias for emms, so plain
				495	dnl emms may as well be used.
				496
				497	define(femms_available_p,
				498	m4_assert_numargs(-1)
				499	`m4_ifdef_anyof_p(
				500	`HAVE_HOST_CPU_k62',
				501	`HAVE_HOST_CPU_k63',
				502	`HAVE_HOST_CPU_athlon')')
				503
				504	define(emms_or_femms,
				505	m4_assert_numargs(-1)
				506	`ifelse(femms_available_p,1,`femms',`emms')')
				507
				508
				509	dnl Usage: femms
				510	dnl
				511	dnl Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the
				512	dnl following is a replacement using .byte.
				513
				514	define(femms,
				515	m4_assert_numargs(-1)
				516	`.byte 15,14 C AMD 3DNow femms')
				517
				518
				519	dnl Usage: jadcl0(op)
				520	dnl
				521	dnl Generate a jnc/incl as a substitute for adcl $0,op. Note this isn't an
				522	dnl exact replacement, since it doesn't set the flags like adcl does.
				523	dnl
				524	dnl This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
				525	dnl mpn_sqr_basecase because on K6 an adcl is slow, the branch
				526	dnl misprediction penalty is small, and the multiply algorithm used leads
				527	dnl to a carry bit on average only 1/4 of the time.
				528	dnl
				529	dnl jadcl0_disabled can be set to 1 to instead generate an ordinary adcl
				530	dnl for comparison. For example,
				531	dnl
				532	dnl define(`jadcl0_disabled',1)
				533	dnl
				534	dnl When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
				535	dnl the same size as an adcl. This makes it possible to use the exact same
				536	dnl computed jump code when testing the relative speed of the two.
				537
				538	define(jadcl0,
				539	m4_assert_numargs(1)
				540	`ifelse(jadcl0_disabled,1,
				541	`adcl $`'0, $1',
				542	`jnc L(jadcl0_`'jadcl0_counter)
				543	incl $1
				544	L(jadcl0_`'jadcl0_counter):
				545	define(`jadcl0_counter',incr(jadcl0_counter))')')
				546
				547	define(jadcl0_counter,1)
				548
				549
				550	dnl Usage: x86_lookup(target, key,value, key,value, ...)
				551	dnl x86_lookup_p(target, key,value, key,value, ...)
				552	dnl
				553	dnl Look for `target' among the `key' parameters.
				554	dnl
				555	dnl x86_lookup expands to the corresponding `value', or generates an error
				556	dnl if `target' isn't found.
				557	dnl
				558	dnl x86_lookup_p expands to 1 if `target' is found, or 0 if not.
				559
				560	define(x86_lookup,
				561	m4_assert_numargs_range(1,999)
				562	`ifelse(eval($#<3),1,
				563	`m4_error(`unrecognised part of x86 instruction: $1
				564	')',
				565	`ifelse(`$1',`$2', `$3',
				566	`x86_lookup(`$1',shift(shift(shift($@))))')')')
				567
				568	define(x86_lookup_p,
				569	m4_assert_numargs_range(1,999)
				570	`ifelse(eval($#<3),1, `0',
				571	`ifelse(`$1',`$2', `1',
				572	`x86_lookup_p(`$1',shift(shift(shift($@))))')')')
				573
				574
				575	dnl Usage: x86_opcode_reg32(reg)
				576	dnl x86_opcode_reg32_p(reg)
				577	dnl
				578	dnl x86_opcode_reg32 expands to the standard 3 bit encoding for the given
				579	dnl 32-bit register, eg. `%ebp' turns into 5.
				580	dnl
				581	dnl x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
				582	dnl if not.
				583
				584	define(x86_opcode_reg32,
				585	m4_assert_numargs(1)
				586	`x86_lookup(`$1',x86_opcode_reg32_list)')
				587
				588	define(x86_opcode_reg32_p,
				589	m4_assert_onearg()
				590	`x86_lookup_p(`$1',x86_opcode_reg32_list)')
				591
				592	define(x86_opcode_reg32_list,
				593	``%eax',0,
				594	`%ecx',1,
				595	`%edx',2,
				596	`%ebx',3,
				597	`%esp',4,
				598	`%ebp',5,
				599	`%esi',6,
				600	`%edi',7')
				601
				602
				603	dnl Usage: x86_opcode_tttn(cond)
				604	dnl
				605	dnl Expand to the 4-bit "tttn" field value for the given x86 branch
				606	dnl condition (like `c', `ae', etc).
				607
				608	define(x86_opcode_tttn,
				609	m4_assert_numargs(1)
				610	`x86_lookup(`$1',x86_opcode_ttn_list)')
				611
				612	define(x86_opcode_tttn_list,
				613	``o', 0,
				614	`no', 1,
				615	`b', 2, `c', 2, `nae',2,
				616	`nb', 3, `nc', 3, `ae', 3,
				617	`e', 4, `z', 4,
				618	`ne', 5, `nz', 5,
				619	`be', 6, `na', 6,
				620	`nbe', 7, `a', 7,
				621	`s', 8,
				622	`ns', 9,
				623	`p', 10, `pe', 10, `npo',10,
				624	`np', 11, `npe',11, `po', 11,
				625	`l', 12, `nge',12,
				626	`nl', 13, `ge', 13,
				627	`le', 14, `ng', 14,
				628	`nle',15, `g', 15')
				629
				630
				631	dnl Usage: cmovCC(%srcreg,%dstreg)
				632	dnl
				633	dnl Emit a cmov instruction, using a .byte sequence, since various past
				634	dnl versions of gas don't know cmov. For example,
				635	dnl
				636	dnl cmovz( %eax, %ebx)
				637	dnl
				638	dnl The source operand can only be a plain register. (m4 code implementing
				639	dnl full memory addressing modes exists, believe it or not, but isn't
				640	dnl currently needed and isn't included.)
				641	dnl
				642	dnl All the standard conditions are defined. Attempting to use one without
				643	dnl the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
				644	dnl an error. This protects against writing something old gas wouldn't
				645	dnl understand.
				646
				647	dnl Called: define_cmov_many(cond,tttn,cond,tttn,...)
				648	define(define_cmov_many,
				649	`ifelse(m4_length(`$1'),0,,
				650	`define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
				651
				652	dnl Called: define_cmov(cond,tttn)
				653	dnl Emit basically define(cmov<cond>,`cmov_internal(<cond>,<ttn>,`$1',`$2')')
				654	define(define_cmov,
				655	m4_assert_numargs(2)
				656	`define(`cmov$1',
				657	m4_instruction_wrapper()
				658	m4_assert_numargs(2)
				659	`cmov_internal'(m4_doublequote($`'0),``$2'',dnl
				660	m4_doublequote($`'1),m4_doublequote($`'2)))')
				661
				662	define_cmov_many(x86_opcode_tttn_list)
				663
				664	dnl Called: cmov_internal(name,tttn,src,dst)
				665	define(cmov_internal,
				666	m4_assert_numargs(4)
				667	`.byte dnl
				668	15, dnl
				669	eval(64+$2), dnl
				670	eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
				671	C `$1 $3, $4'')
				672
				673
				674	dnl Usage: x86_opcode_regmmx(reg)
				675	dnl
				676	dnl Validate the given mmx register, and return its number, 0 to 7.
				677
				678	define(x86_opcode_regmmx,
				679	m4_assert_numargs(1)
				680	`x86_lookup(`$1',x86_opcode_regmmx_list)')
				681
				682	define(x86_opcode_regmmx_list,
				683	``%mm0',0,
				684	`%mm1',1,
				685	`%mm2',2,
				686	`%mm3',3,
				687	`%mm4',4,
				688	`%mm5',5,
				689	`%mm6',6,
				690	`%mm7',7')
				691
				692
				693	dnl Usage: psadbw(%srcreg,%dstreg)
				694	dnl
				695	dnl Oldish versions of gas don't know psadbw, in particular gas 2.9.1 on
				696	dnl FreeBSD 3.3 and 3.4 doesn't, so instead emit .byte sequences. For
				697	dnl example,
				698	dnl
				699	dnl psadbw( %mm1, %mm2)
				700	dnl
				701	dnl Only register->register forms are supported here, which suffices for
				702	dnl the current code.
				703
				704	define(psadbw,
				705	m4_instruction_wrapper()
				706	m4_assert_numargs(2)
				707	`.byte 0x0f,0xf6,dnl
				708	eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl
				709	C `psadbw $1, $2'')
				710
				711
				712	dnl Usage: Zdisp(inst,op,op,op)
				713	dnl
				714	dnl Generate explicit .byte sequences if necessary to force a byte-sized
				715	dnl zero displacement on an instruction. For example,
				716	dnl
				717	dnl Zdisp( movl, 0,(%esi), %eax)
				718	dnl
				719	dnl expands to
				720	dnl
				721	dnl .byte 139,70,0 C movl 0(%esi), %eax
				722	dnl
				723	dnl If the displacement given isn't 0, then normal assembler code is
				724	dnl generated. For example,
				725	dnl
				726	dnl Zdisp( movl, 4,(%esi), %eax)
				727	dnl
				728	dnl expands to
				729	dnl
				730	dnl movl 4(%esi), %eax
				731	dnl
				732	dnl This means a single Zdisp() form can be used with an expression for the
				733	dnl displacement, and .byte will be used only if necessary. The
				734	dnl displacement argument is eval()ed.
				735	dnl
				736	dnl Because there aren't many places a 0(reg) form is wanted, Zdisp is
				737	dnl implemented with a table of instructions and encodings. A new entry is
				738	dnl needed for any different operation or registers. The table is split
				739	dnl into separate macros to avoid overflowing BSD m4 macro expansion space.
				740
				741	define(Zdisp,
				742	m4_assert_numargs(4)
				743	`define(`Zdisp_found',0)dnl
				744	Zdisp_1($@)dnl
				745	Zdisp_2($@)dnl
				746	Zdisp_3($@)dnl
				747	Zdisp_4($@)dnl
				748	ifelse(Zdisp_found,0,
				749	`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
				750	')')')
				751
				752	define(Zdisp_1,`dnl
				753	Zdisp_match( adcl, 0,(%edx), %eax, `0x13,0x42,0x00', $@)`'dnl
				754	Zdisp_match( adcl, 0,(%edx), %ebx, `0x13,0x5a,0x00', $@)`'dnl
				755	Zdisp_match( adcl, 0,(%edx), %esi, `0x13,0x72,0x00', $@)`'dnl
				756	Zdisp_match( addl, %ebx, 0,(%edi), `0x01,0x5f,0x00', $@)`'dnl
				757	Zdisp_match( addl, %ecx, 0,(%edi), `0x01,0x4f,0x00', $@)`'dnl
				758	Zdisp_match( addl, %esi, 0,(%edi), `0x01,0x77,0x00', $@)`'dnl
				759	Zdisp_match( sbbl, 0,(%edx), %eax, `0x1b,0x42,0x00', $@)`'dnl
				760	Zdisp_match( sbbl, 0,(%edx), %esi, `0x1b,0x72,0x00', $@)`'dnl
				761	Zdisp_match( subl, %ecx, 0,(%edi), `0x29,0x4f,0x00', $@)`'dnl
				762	Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl
				763	Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl
				764	Zdisp_match( adc, 0,(%ebx,%ecx,4), %eax, `0x13,0x44,0x8b,0x00', $@)`'dnl
				765	Zdisp_match( sbb, 0,(%ebx,%ecx,4), %eax, `0x1b,0x44,0x8b,0x00', $@)`'dnl
				766	')
				767	define(Zdisp_2,`dnl
				768	Zdisp_match( movl, %eax, 0,(%edi), `0x89,0x47,0x00', $@)`'dnl
				769	Zdisp_match( movl, %ebx, 0,(%edi), `0x89,0x5f,0x00', $@)`'dnl
				770	Zdisp_match( movl, %esi, 0,(%edi), `0x89,0x77,0x00', $@)`'dnl
				771	Zdisp_match( movl, 0,(%ebx), %eax, `0x8b,0x43,0x00', $@)`'dnl
				772	Zdisp_match( movl, 0,(%ebx), %esi, `0x8b,0x73,0x00', $@)`'dnl
				773	Zdisp_match( movl, 0,(%edx), %eax, `0x8b,0x42,0x00', $@)`'dnl
				774	Zdisp_match( movl, 0,(%esi), %eax, `0x8b,0x46,0x00', $@)`'dnl
				775	Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl
				776	Zdisp_match( mov, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00', $@)`'dnl
				777	Zdisp_match( mov, %eax, 0,(%edi,%ecx,4), `0x89,0x44,0x8f,0x00', $@)`'dnl
				778	')
				779	define(Zdisp_3,`dnl
				780	Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
				781	Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
				782	Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
				783	Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl
				784	Zdisp_match( movq, 0,(%edx), %mm0, `0x0f,0x6f,0x42,0x00', $@)`'dnl
				785	Zdisp_match( movq, 0,(%esi), %mm0, `0x0f,0x6f,0x46,0x00', $@)`'dnl
				786	Zdisp_match( movq, %mm0, 0,(%edi), `0x0f,0x7f,0x47,0x00', $@)`'dnl
				787	Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
				788	Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
				789	Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
				790	')
				791	define(Zdisp_4,`dnl
				792	Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl
				793	Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
				794	Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
				795	Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
				796	Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
				797	Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
				798	Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl
				799	')
				800
				801	define(Zdisp_match,
				802	m4_assert_numargs(9)
				803	`ifelse(eval(m4_stringequal_p(`$1',`$6')
				804	&& m4_stringequal_p(`$2',0)
				805	&& m4_stringequal_p(`$3',`$8')
				806	&& m4_stringequal_p(`$4',`$9')),1,
				807	`define(`Zdisp_found',1)dnl
				808	ifelse(eval(`$7'),0,
				809	` .byte $5 C `$1 0$3, $4'',
				810	` $6 $7$8, $9')',
				811
				812	`ifelse(eval(m4_stringequal_p(`$1',`$6')
				813	&& m4_stringequal_p(`$2',`$7')
				814	&& m4_stringequal_p(`$3',0)
				815	&& m4_stringequal_p(`$4',`$9')),1,
				816	`define(`Zdisp_found',1)dnl
				817	ifelse(eval(`$8'),0,
				818	` .byte $5 C `$1 $2, 0$4'',
				819	` $6 $7, $8$9')')')')
				820
				821
				822	dnl Usage: shldl(count,src,dst)
				823	dnl shrdl(count,src,dst)
				824	dnl shldw(count,src,dst)
				825	dnl shrdw(count,src,dst)
				826	dnl
				827	dnl Generate a double-shift instruction, possibly omitting a %cl count
				828	dnl parameter if that's what the assembler requires, as indicated by
				829	dnl WANT_SHLDL_CL in config.m4. For example,
				830	dnl
				831	dnl shldl( %cl, %eax, %ebx)
				832	dnl
				833	dnl turns into either
				834	dnl
				835	dnl shldl %cl, %eax, %ebx
				836	dnl or
				837	dnl shldl %eax, %ebx
				838	dnl
				839	dnl Immediate counts are always passed through unchanged. For example,
				840	dnl
				841	dnl shrdl( $2, %esi, %edi)
				842	dnl becomes
				843	dnl shrdl $2, %esi, %edi
				844	dnl
				845	dnl
				846	dnl If you forget to use the macro form "shldl( ...)" and instead write
				847	dnl just a plain "shldl ...", an error results. This ensures the necessary
				848	dnl variant treatment of %cl isn't accidentally bypassed.
				849
				850	define(define_shd_instruction,
				851	m4_assert_numargs(1)
				852	`define($1,
				853	m4_instruction_wrapper()
				854	m4_assert_numargs(3)
				855	`shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
				856	m4_doublequote($`'2),m4_doublequote($`'3)))')
				857
				858	dnl Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
				859	define_shd_instruction(shldl)
				860	define_shd_instruction(shrdl)
				861	define_shd_instruction(shldw)
				862	define_shd_instruction(shrdw)
				863
				864	dnl Called: shd_instruction(op,count,src,dst)
				865	define(shd_instruction,
				866	m4_assert_numargs(4)
				867	m4_assert_defined(`WANT_SHLDL_CL')
				868	`ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
				869	``$1' `$3', `$4'',
				870	``$1' `$2', `$3', `$4'')')
				871
				872
				873	dnl Usage: ASSERT([cond][,instructions])
				874	dnl
				875	dnl If WANT_ASSERT is 1, output the given instructions and expect the given
				876	dnl flags condition to then be satisfied. For example,
				877	dnl
				878	dnl ASSERT(ne, `cmpl %eax, %ebx')
				879	dnl
				880	dnl The instructions can be omitted to just assert a flags condition with
				881	dnl no extra calculation. For example,
				882	dnl
				883	dnl ASSERT(nc)
				884	dnl
				885	dnl When `instructions' is not empty, a pushf/popf is added to preserve the
				886	dnl flags, but the instructions themselves must preserve any registers that
				887	dnl matter. FRAME is adjusted for the push and pop, so the instructions
				888	dnl given can use defframe() stack variables.
				889	dnl
				890	dnl The condition can be omitted to just output the given instructions when
				891	dnl assertion checking is wanted. In this case the pushf/popf is omitted.
				892	dnl For example,
				893	dnl
				894	dnl ASSERT(, `movl %eax, VAR_KEEPVAL')
				895
				896	define(ASSERT,
				897	m4_assert_numargs_range(1,2)
				898	m4_assert_defined(`WANT_ASSERT')
				899	`ifelse(WANT_ASSERT,1,
				900	`ifelse(`$1',,
				901	`$2',
				902	`C ASSERT
				903	ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')')
				904	$2
				905	j`$1' L(ASSERT_ok`'ASSERT_counter)
				906	ud2 C assertion failed
				907	L(ASSERT_ok`'ASSERT_counter):
				908	ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')')
				909	define(`ASSERT_counter',incr(ASSERT_counter))')')')
				910
				911	define(ASSERT_counter,1)
				912
				913
				914	dnl Usage: movl_text_address(label,register)
				915	dnl
				916	dnl Get the address of a text segment label, using either a plain movl or a
				917	dnl position-independent calculation, as necessary. For example,
				918	dnl
				919	dnl movl_code_address(L(foo),%eax)
				920	dnl
				921	dnl This macro is only meant for use in ASSERT()s or when testing, since
				922	dnl the PIC sequence it generates will want to be done with a ret balancing
				923	dnl the call on CPUs with return address branch prediction.
				924	dnl
				925	dnl The addl generated here has a backward reference to the label, and so
				926	dnl won't suffer from the two forwards references bug in old gas (described
				927	dnl in mpn/x86/README).
				928
				929	define(movl_text_address,
				930	m4_assert_numargs(2)
				931	`ifdef(`PIC',
				932	`call L(movl_text_address_`'movl_text_address_counter)
				933	L(movl_text_address_`'movl_text_address_counter):
				934	popl $2 C %eip
				935	addl `$'$1-L(movl_text_address_`'movl_text_address_counter), $2
				936	define(`movl_text_address_counter',incr(movl_text_address_counter))',
				937	`movl `$'$1, $2')')
				938
				939	define(movl_text_address_counter,1)
				940
				941
				942	dnl Usage: notl_or_xorl_GMP_NUMB_MASK(reg)
				943	dnl
				944	dnl Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as
				945	dnl appropriate for nails in use or not.
				946
				947	define(notl_or_xorl_GMP_NUMB_MASK,
				948	m4_assert_numargs(1)
				949	`ifelse(GMP_NAIL_BITS,0,
				950	`notl `$1'',
				951	`xorl $GMP_NUMB_MASK, `$1'')')
				952
				953
				954	dnl Usage LEA(symbol,reg)
				955	dnl Usage LEAL(symbol_local_to_file,reg)
				956
				957	define(`LEA',
				958	m4_assert_numargs(2)
				959	`ifdef(`PIC',`dnl
				960	ifelse(index(defn(`load_eip'), `$2'),-1,
				961	`m4append(`load_eip',
				962	` TEXT
				963	ALIGN(16)
				964	L(movl_eip_`'substr($2,1)):
				965	movl (%esp), $2
				966	ret_internal
				967	')')dnl
				968	call L(movl_eip_`'substr($2,1))
				969	addl $_GLOBAL_OFFSET_TABLE_, $2
				970	movl $1@GOT($2), $2
				971	',`
				972	movl `$'$1, $2
				973	')')
				974
				975	define(`LEAL',
				976	m4_assert_numargs(2)
				977	`ifdef(`PIC',`dnl
				978	ifelse(index(defn(`load_eip'), `$2'),-1,
				979	`m4append(`load_eip',
				980	` TEXT
				981	ALIGN(16)
				982	L(movl_eip_`'substr($2,1)):
				983	movl (%esp), $2
				984	ret_internal
				985	')')dnl
				986	call L(movl_eip_`'substr($2,1))
				987	addl $_GLOBAL_OFFSET_TABLE_, $2
				988	leal $1@GOTOFF($2), $2
				989	',`
				990	movl `$'$1, $2
				991	')')
				992
				993	dnl ASM_END
				994
				995	define(`ASM_END',`load_eip')
				996
				997	define(`load_eip', `') dnl updated in LEA/LEAL
				998
				999
				1000	define(`DEF_OBJECT',
				1001	m4_assert_numargs_range(1,2)
				1002	`RODATA
				1003	ALIGN(ifelse($#,1,2,$2))
				1004	$1:
				1005	')
				1006
				1007	define(`END_OBJECT',
				1008	m4_assert_numargs(1)
				1009	` SIZE(`$1',.-`$1')')
				1010
				1011	dnl Usage: CALL(funcname)
				1012	dnl
				1013
				1014	define(`CALL',
				1015	m4_assert_numargs(1)
				1016	`ifdef(`PIC',
				1017	`call GSYM_PREFIX`'$1@PLT',
				1018	`call GSYM_PREFIX`'$1')')
				1019
				1020	ifdef(`PIC',
				1021	`define(`PIC_WITH_EBX')',
				1022	`undefine(`PIC_WITH_EBX')')
				1023
				1024	divert`'dnl