From e98f84f068f8661264db0b182ec87544a6833268 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <campbell@mumble.net>
Date: Fri, 30 Oct 2009 18:14:32 -0400
Subject: [PATCH] Implement microcode auxiliaries for AMD x86-64 compiled code.

---
 src/microcode/cmpauxmd/x86-64.m4       | 1114 ++++++++++++++++++++++++
 src/microcode/cmpintmd/x86-64-config.h |   32 +
 src/microcode/cmpintmd/x86-64.c        |  372 ++++++++
 src/microcode/cmpintmd/x86-64.h        |  351 ++++++++
 src/microcode/confshared.h             |    5 +-
 src/microcode/utabmd.c                 |    1 +
 6 files changed, 1874 insertions(+), 1 deletion(-)
 create mode 100644 src/microcode/cmpauxmd/x86-64.m4
 create mode 100644 src/microcode/cmpintmd/x86-64-config.h
 create mode 100644 src/microcode/cmpintmd/x86-64.c
 create mode 100644 src/microcode/cmpintmd/x86-64.h

diff --git a/src/microcode/cmpauxmd/x86-64.m4 b/src/microcode/cmpauxmd/x86-64.m4
new file mode 100644
index 000000000..fbf7417e3
--- /dev/null
+++ b/src/microcode/cmpauxmd/x86-64.m4
@@ -0,0 +1,1114 @@
+### -*-Midas-*-
+###
+### Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993,
+###     1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+###     2004, 2005, 2006, 2007, 2008, 2009 Massachusetts Institute of
+###     Technology
+###
+### This file is part of MIT/GNU Scheme.
+###
+### MIT/GNU Scheme is free software; you can redistribute it and/or
+### modify it under the terms of the GNU General Public License as
+### published by the Free Software Foundation; either version 2 of the
+### License, or (at your option) any later version.
+###
+### MIT/GNU Scheme is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+### General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with MIT/GNU Scheme; if not, write to the Free Software
+### Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+### 02110-1301, USA.
+
+### AMD x86-64 assembly language part of the compiled code interface.
+### See cmpint.txt, cmpint.c, cmpint-mc68k.h, and cmpgc.h for more
+### documentation.
+###
+### This m4 source expands into either Unix (gas) source or PC
+### (masm/wasm) source.
+###
+### NOTE:
+###	Assumptions:
+###
+###	0) Segment registers and paging are set up for 64-bit "flat"
+###	operation.
+###
+###	1) All registers and stack locations hold a C long object.
+###
+###	2) The C compiler divides registers into three groups:
+###	- Linkage registers, used for procedure calls and global
+###	references.  On AMD64 Unix ABI: %rbp, %rsp.
+###	- super temporaries, not preserved accross procedure calls and
+###	always usable. On AMD64 Unix ABI: everything but what is
+###	listed below.
+###	- preserved registers saved by the callee if they are written.
+###	On AMD64 Unix ABI: %rbp, %rbx, %r12-%r15, MXCSR, x87 control
+###	word.
+###
+###	3) Arguments, if passed on a stack, are popped by the caller
+###	or by the procedure return instruction (as on the VAX).  Thus
+###	most "leaf" procedures need not worry about them.  On x86-64,
+###	arguments beyond the sixth are passed on the stack; the first
+###	through sixth are passed in %rdi, %rsi, %rdx, %rcx, %r8, or
+###	%r9.  (Non-integer arguments are passed in other ways.)
+###
+###	4) There is a hardware or software maintained stack for
+###	control.  The procedure calling sequence may leave return
+###	addresses in registers, but they must be saved somewhere for
+###	nested calls and recursive procedures.  On x86-64: saved on
+###	the stack by the CALL instruction.
+###
+###	5) C procedures return long values in a super temporary
+###	register.  Two word structures are returned in super temporary
+###	registers as well in the AMD64 Unix ABI: %rax and %rdi.
+###
+###	6) Floating point registers are not preserved by this
+###	interface.  The interface is only called from the Scheme
+###	interpreter, which does not use floating point data.  Thus
+###	although the calling convention would require us to preserve
+###	them, they contain garbage.
+###
+### Compiled Scheme code uses the following register convention:
+###	- %rsp containts the Scheme stack pointer, not the C stack
+###	pointer.
+###	- %rsi contains a pointer to the Scheme interpreter's "register"
+###	block.  This block contains the compiler's copy of MemTop,
+###	the interpreter's registers (val, env, exp, etc.),
+###	temporary locations for compiled code, and the addresses
+###	of various hooks defined in this file.
+###	- %rdi contains the Scheme free pointer.
+###	- %rbp contains the Scheme datum mask.
+###	The dynamic link (when needed) is in Registers[REGBLOCK_COMPILER_TEMP]
+###	Values are returned in Registers[REGBLOCK_VAL]
+###	[TRC 20091025: Later, we ought to use machine registers for
+###	these.]
+###
+###	All other registers are available to the compiler.  A
+###	caller-saves convention is used, so the registers need not be
+###	preserved by subprocedures.
+
+### The following m4 macros can be defined to change how this file is
+### expanded.
+###
+### DASM
+###	If defined, expand to Intel assembly-language syntax, used by
+###	Microsoft assembler (MASM) and Watcom assembler (WASM).
+###	Otherwise, expand to AT&T syntax, used by GAS.  [TRC 20091025:
+###	The Intel syntax probably won't work here.]
+###
+### WIN32
+###	If defined, expand to run under Win32; implies DASM.
+###
+### SUPPRESS_LEADING_UNDERSCORE
+###	If defined, external symbol names are generated as written;
+###	otherwise, they have an underscore prepended to them.
+### WCC386
+###	Should be defined when using Watcom assembler.
+### WCC386R
+###	Should be defined when using Watcom assembler and generating
+###	code to use the Watcom register-based argument conventions.
+### TYPE_CODE_LENGTH
+###	Normally defined to be 6.  Don't change this unless you know
+###	what you're doing.
+### DISABLE_387
+###	If defined, do not generate 387 floating-point instructions.
+### VALGRIND_MODE
+###	If defined, modify code to make it work with valgrind.
+
+####	Utility macros and definitions
+
+ifdef(`WIN32',
+      `define(IF_WIN32,`$1')',
+      `define(IF_WIN32,`')')
+
+ifdef(`DISABLE_387',
+      `define(IF387,`')',
+      `define(IF387,`$1')')
+
+ifdef(`DISABLE_387',
+      `define(IFN387,`$1')',
+      `define(IFN387,`')')
+
+IF_WIN32(`define(DASM,1)')
+ifdef(`WCC386R',`define(WCC386,1)')
+
+ifdef(`DASM',
+      `define(IFDASM,`$1')',
+      `define(IFDASM,`')')
+
+ifdef(`DASM',
+      `define(IFNDASM,`')',
+      `define(IFNDASM,`$1')')
+
+ifdef(`DASM',
+      `define(use_external_data,`	extrn $1':dword)',
+      `define(use_external_data,`')')
+
+ifdef(`DASM',
+       `define(use_external_code,`	extrn $1':near)',
+       `define(use_external_code,`')')
+
+ifdef(`DASM',
+      `define(export_label,`	public $1')',
+      `define(export_label,`	.globl $1')')
+
+IFNDASM(`	.file	"cmpaux-x86-64.s"')
+
+# GAS doesn't implement these, for no obvious reason.
+IFNDASM(`define(pushad,`pusha')')
+IFNDASM(`define(popad,`popa')')
+IFNDASM(`define(pushfd,`pushf')')
+IFNDASM(`define(popfd,`popf')')
+
+ifdef(`SUPPRESS_LEADING_UNDERSCORE',
+       `define(EVR,`$1')',
+       `define(EVR,`_$1')')
+
+# When using the Watcom C compiler with register-based calling
+# conventions, source-code function names normally expand to `FOO_',
+# but functions that are compiled with prefix keywords such as
+# `__cdecl' or `__syscall' expand differently.  References to the
+# former type of name are marked with `EFR', while references to the
+# latter are marked with `EPFR'.
+
+ifdef(`SUPPRESS_LEADING_UNDERSCORE',
+      `define(EPFR,`$1')',
+      `define(EPFR,`_$1')')
+
+ifdef(`WCC386R',
+      `define(EFR,`$1_')',
+      `define(EFR,`EPFR($1)')')
+
+define(hook_reference,`EFR(asm_$1)')
+
+define(define_data,`export_label(EVR($1))')
+
+define(define_code_label,`
+export_label($1)
+$1:')
+
+define(define_c_label,`define_code_label(EPFR($1))')
+define(define_debugging_label,`define_code_label($1)')
+define(define_hook_label,`define_code_label(hook_reference($1))')
+
+ifdef(`DASM',
+      `define(DECLARE_DATA_SEGMENT,`	.data')',
+      `define(DECLARE_DATA_SEGMENT,`	.data')')
+
+ifdef(`DASM',
+      `define(DECLARE_CODE_SEGMENT,`	.code')',
+      `define(DECLARE_CODE_SEGMENT,`	.text')')
+
+ifdef(`DASM',
+      `define(declare_alignment,`	align $1')',
+      `define(declare_alignment,`	.align $1')')
+
+ifdef(`DASM',
+      `define(allocate_word,`EVR($1) dw 0')',
+      `define(allocate_word,`	.comm EVR($1),2')')
+
+ifdef(`DASM',
+      `define(allocate_longword,`EVR($1) dd 0')',
+      `define(allocate_longword,`	.comm EVR($1),4')')
+
+ifdef(`DASM',
+      `define(allocate_quadword,`EVR($1) dq 0')',
+      `define(allocate_quadword,`	.comm EVR($1),8')')
+
+ifdef(`DASM',
+      `define(allocate_space,`EVR($1) db $2 dup (0)')',
+      `define(allocate_space,`EVR($1):
+	.space $2')')
+
+ifdef(`DASM',
+      `define(HEX, `0$1H')',
+      `define(HEX, `0x$1')')
+
+ifdef(`DASM',
+      `define(OP,`$1$3')',
+      `define(OP,`$1$2')')
+
+ifdef(`DASM',
+      `define(TW,`$2,$1')',
+      `define(TW,`$1,$2')')
+
+ifdef(`DASM',
+      `define(ABS, `dword ptr $1')',
+      `define(ABS, `$1(%rip)')')
+
+ifdef(`DASM',
+      `define(IMM, `$1')',
+      `define(IMM, `$$1')')
+
+ifdef(`DASM',
+      `define(REG,`$1')',
+      `define(REG,`%$1')')
+
+ifdef(`DASM',
+      `define(ST,`st($1)')',
+      `define(ST,`%st ($1)')')
+
+ifdef(`DASM',
+      `define(IND,`dword ptr [$1]')',
+      `define(IND,`($1)')')
+
+ifdef(`DASM',
+      `define(BOF,`byte ptr $1[$2]')',
+      `define(BOF,`$1($2)')')
+
+ifdef(`DASM',
+      `define(WOF,`word ptr $1[$2]')',
+      `define(WOF,`$1($2)')')
+
+ifdef(`DASM',
+      `define(LOF,`dword ptr $1[$2]')',
+      `define(LOF,`$1($2)')')
+
+ifdef(`DASM',
+      `define(DOF,`qword ptr $1[$2]')',
+      `define(DOF,`$1($2)')')
+
+ifdef(`DASM',
+      `define(IDX,`dword ptr [$1] [$2]')',
+      `define(IDX,`($1,$2)')')
+
+ifdef(`DASM',
+      `define(SDX,`dword ptr $1[$2*$3]')',
+      `define(SDX,`$1(,$2,$3)')')
+
+ifdef(`DASM',
+      `define(IJMP,`$1')',
+      `define(IJMP,`*$1')')
+
+define(TC_LENGTH, ifdef(`TYPE_CODE_LENGTH', TYPE_CODE_LENGTH, 6))
+define(DATUM_LENGTH, eval(64 - TC_LENGTH))
+define(DATUM_SHIFT, eval(1 << DATUM_LENGTH))
+# This doesn't work because m4 is !@#&$*%^!#!$(%!&*@#^(.
+#define(ADDRESS_MASK, eval(DATUM_SHIFT - 1))
+define(ADDRESS_MASK, HEX(3ffffffffffffff))
+# TAG doesn't work for the same reason.
+#define(TAG, ($2 + ($1 * DATUM_SHIFT)))
+
+define(TC_FALSE,0)
+define(TC_FLONUM,6)
+define(TC_TRUE,8)
+define(TC_FIXNUM,26)
+define(TC_MANIFEST_NM_VECTOR,39)
+define(TC_COMPILED_ENTRY,40)
+
+# TAG doesn't work due to m4 stupidity, so define these magic
+# constants here.  These are computed in terms of the parameters
+# above.
+
+define(IMM_MANIFEST_NM_VECTOR_1, `IMM(HEX(9c00000000000001))')
+define(IMM_TRUE, `IMM(HEX(2000000000000000))')
+define(IMM_FALSE, `IMM(HEX(0000000000000000))')
+
+define(REGBLOCK_VAL,16)
+define(REGBLOCK_COMPILER_TEMP,32)
+define(REGBLOCK_LEXPR_ACTUALS,56)
+define(REGBLOCK_PRIMITIVE,64)
+define(REGBLOCK_CLOSURE_FREE,72)
+
+define(REGBLOCK_DLINK,REGBLOCK_COMPILER_TEMP)
+define(REGBLOCK_UTILITY_ARG4,REGBLOCK_CLOSURE_FREE)
+
+define(COMPILER_REGBLOCK_N_FIXED,16)
+define(COMPILER_REGBLOCK_N_HOOKS,80)
+define(COMPILER_REGBLOCK_N_TEMPS,256)
+define(COMPILER_FIXED_SIZE,1)
+define(COMPILER_HOOK_SIZE,1)
+define(COMPILER_TEMP_SIZE,2)
+define(REGBLOCK_SIZE_IN_OBJECTS,
+       eval((COMPILER_REGBLOCK_N_FIXED*COMPILER_FIXED_SIZE)
+	    +(COMPILER_REGBLOCK_N_HOOKS*COMPILER_HOOK_SIZE)
+	    +(COMPILER_REGBLOCK_N_TEMPS*COMPILER_TEMP_SIZE)))
+
+# Define the floating-point processor control word.  Always set
+# round-to-even and double precision.  Under Win32, mask all
+# exceptions.  Under unix and OS/2, mask only the inexact result
+# exception.
+ifdef(`WIN32',
+      `define(FP_CONTROL_WORD,HEX(023f))',
+      `define(FP_CONTROL_WORD,HEX(0220))')
+
+define(regs,REG(rsi))
+define(rfree,REG(rdi))
+define(rmask,REG(rbp))
+
+IFDASM(`.586p
+.model flat')
+
+DECLARE_DATA_SEGMENT()
+declare_alignment(2)
+
+use_external_data(EVR(Free))
+use_external_data(EVR(stack_pointer))
+use_external_data(EVR(utility_table))
+
+ifdef(`WIN32',`
+use_external_data(EVR(RegistersPtr))
+',`
+define_data(Regstart)
+allocate_space(Regstart,256)
+
+define_data(Registers)
+allocate_space(Registers,eval(REGBLOCK_SIZE_IN_OBJECTS*8))
+')
+
+define_data(i387_presence)
+allocate_quadword(i387_presence)
+
+define_data(C_Stack_Pointer)
+allocate_quadword(C_Stack_Pointer)
+
+define_data(C_Frame_Pointer)
+allocate_quadword(C_Frame_Pointer)
+
+# [TRC 20091025: CPUID is always supported.]
+# define_data(x86_64_cpuid_supported)
+# allocate_quadword(x86_64_cpuid_supported)
+
+# [TRC 20091025: The cache synchronization bug does not occur in any
+# x86-64 machines of which I am aware.]
+# define_data(x86_64_cpuid_needed)
+# allocate_quadword(x86_64_cpuid_needed)
+
+DECLARE_CODE_SEGMENT()
+declare_alignment(2)
+
+# [TRC 20091025: We need to check for MMX/SSEn instructions too.]
+
+define_c_label(x86_64_interface_initialize)
+	OP(push,q)	REG(rbp)
+	OP(mov,q)	TW(REG(rsp),REG(rbp))
+	OP(xor,q)	TW(REG(rax),REG(rax))		# No 387 available
+
+# [TRC 20091025: The AMD64 reference manual suggests using the CPUID
+# instruction to detect instruction subsets instead.]
+
+# Unfortunately, the `movl cr0,ecx' instruction is privileged.
+# Use the deprecated `smsw cx' instruction instead.
+
+IF387(`
+#	OP(mov,q)	TW(REG(cr0),REG(rcx))		# Test for 387 presence
+ifdef(`VALGRIND_MODE',`',`
+	smsw		REG(cx)
+	OP(mov,q)	TW(IMM(HEX(12)),REG(rdx))
+	OP(and,q)	TW(REG(rdx),REG(rcx))
+	OP(cmp,q)	TW(REG(rdx),REG(rcx))
+	jne	x86_64_initialize_no_fp
+')
+	OP(inc,q)	REG(rax)			# 387 available
+	OP(sub,q)	TW(IMM(8),REG(rsp))
+	fclex
+	fnstcw		WOF(-2,REG(rbp))
+	OP(and,w)	TW(IMM(HEX(f0e0)),WOF(-2,REG(rbp)))
+	OP(or,w)	TW(IMM(FP_CONTROL_WORD),WOF(-2,REG(rbp)))
+	fldcw		WOF(-2,REG(rbp))
+x86_64_initialize_no_fp:
+')
+	OP(mov,q)	TW(REG(rax),ABS(EVR(i387_presence)))
+
+# [TRC 20091025: CPUID is always supported.]
+
+# Do a bunch of hair to determine if we need to do cache synchronization.
+# See if the CPUID instruction is supported.
+
+#	OP(xor,q)	TW(REG(rax),REG(rax))
+#	OP(mov,q)	TW(REG(rax),ABS(EVR(x86_64_cpuid_supported)))
+#	OP(mov,q)	TW(REG(rax),ABS(EVR(x86_64_cpuid_needed)))
+
+# First test: can we toggle the AC bit?
+
+#	pushfd
+#	OP(pop,l)	REG(eax)
+#	OP(mov,l)	TW(REG(eax),REG(ecx))
+#	OP(xor,l)	TW(IMM(HEX(00040000)),REG(eax))
+#	OP(push,l)	REG(eax)
+#	popfd
+#	pushfd
+#	OP(pop,l)	REG(eax)
+
+# if AC bit can't be toggled, this is a 386 (and doesn't support CPUID).
+
+#	OP(xor,l)	TW(REG(ecx),REG(eax))
+#	jz		no_cpuid_instr
+#	OP(push,l)	REG(ecx)			# restore EFLAGS
+#	popfd
+
+# Now test to see if the ID bit can be toggled.
+
+#	OP(mov,l)	TW(REG(ecx),REG(eax))
+#	OP(xor,l)	TW(IMM(HEX(00200000)),REG(eax))
+#	OP(push,l)	REG(eax)
+#	popfd
+#	pushfd
+#	OP(pop,l)	REG(eax)
+
+# if ID bit can't be toggled, this is a 486 that doesn't support CPUID.
+
+#	OP(xor,l)	TW(REG(ecx),REG(eax))
+#	jz		no_cpuid_instr
+#	OP(push,l)	REG(ecx)			# restore EFLAGS
+#	popfd
+
+# Now we know that cpuid is supported.
+
+#	OP(mov,q)	TW(IMM(HEX(00000001)),ABS(EVR(x86_64_cpuid_supported)))
+
+# Next, use the CPUID instruction to determine the processor type.
+
+#	OP(push,l)	REG(ebx)
+#	OP(xor,l)	TW(REG(eax),REG(eax))
+#	cpuid
+
+# Check that CPUID accepts argument 1.
+
+#	OP(cmp,l)	TW(IMM(HEX(00000001)),REG(eax))
+#	jl		done_setting_up_cpuid
+
+# Detect "GenuineIntel".
+
+#	OP(cmp,l)	TW(IMM(HEX(756e6547)),REG(ebx))
+#	jne		not_intel_cpu
+#	OP(cmp,l)	TW(IMM(HEX(49656e69)),REG(edx))
+#	jne		not_intel_cpu
+#	OP(cmp,l)	TW(IMM(HEX(6c65746e)),REG(ecx))
+#	jne		not_intel_cpu
+
+# For CPU families 4 (486), 5 (Pentium), or 6 (Pentium Pro, Pentium
+# II, Pentium III), don't use CPUID synchronization.
+
+#	OP(mov,l)	TW(IMM(HEX(01)),REG(eax))
+#	cpuid
+#	OP(shr,l)	TW(IMM(HEX(08)),REG(eax))
+#	OP(and,l)	TW(IMM(HEX(0000000F)),REG(eax))
+#	OP(cmp,l)	TW(IMM(HEX(4)),REG(eax))
+#	jl		done_setting_up_cpuid
+#	OP(cmp,l)	TW(IMM(HEX(6)),REG(eax))
+#	jg		done_setting_up_cpuid
+#
+#	jmp		cpuid_not_needed
+#
+#not_intel_cpu:
+
+# Detect "AuthenticAMD".
+
+#	OP(cmp,l)	TW(IMM(HEX(68747541)),REG(ebx))
+#	jne		not_amd_cpu
+#	OP(cmp,l)	TW(IMM(HEX(69746e65)),REG(edx))
+#	jne		not_amd_cpu
+#	OP(cmp,l)	TW(IMM(HEX(444d4163)),REG(ecx))
+#	jne		not_amd_cpu
+
+# Problem appears to exist only on Athlon models 1, 3, and 4.
+
+#	OP(mov,l)	TW(IMM(HEX(01)),REG(eax))
+#	cpuid
+
+#	OP(mov,l)	TW(REG(eax),REG(ecx))
+#	OP(shr,l)	TW(IMM(HEX(08)),REG(eax))
+#	OP(and,l)	TW(IMM(HEX(0000000F)),REG(eax))
+#	OP(cmp,l)	TW(IMM(HEX(6)),REG(eax))	# family 6 = Athlon
+#	jne		done_setting_up_cpuid
+
+#	OP(mov,l)	TW(REG(ecx),REG(eax))
+#	OP(shr,l)	TW(IMM(HEX(04)),REG(eax))
+#	OP(and,l)	TW(IMM(HEX(0000000F)),REG(eax))
+#	OP(cmp,l)	TW(IMM(HEX(6)),REG(eax))	# model 6 and up OK
+#	jge		done_setting_up_cpuid
+#	OP(cmp,l)	TW(IMM(HEX(2)),REG(eax))	# model 2 OK
+#	je		done_setting_up_cpuid
+
+#	OP(mov,l)	TW(IMM(HEX(00000001)),ABS(EVR(x86_64_cpuid_needed)))
+
+#not_amd_cpu:
+#done_setting_up_cpuid:
+#	OP(pop,l)	REG(ebx)
+#no_cpuid_instr:
+	leave
+	ret
+
+define_c_label(C_to_interface)
+	OP(push,q)	REG(rbp)			# Link according
+	OP(mov,q)	TW(REG(rsp),REG(rbp))		#  to C's conventions
+	OP(push,q)	REG(rbx)			# Save callee-saves
+	OP(push,q)	REG(r12)			#  registers
+	OP(push,q)	REG(r13)
+	OP(push,q)	REG(r14)
+	OP(push,q)	REG(r15)
+	OP(mov,q)	TW(REG(rdi),REG(rdx))		# Entry point
+							# Preserve frame ptr
+	OP(mov,q)	TW(REG(rbp),ABS(EVR(C_Frame_Pointer)))
+							# Preserve stack ptr
+	OP(mov,q)	TW(REG(rsp),ABS(EVR(C_Stack_Pointer)))
+	jmp	EPFR(interface_to_scheme)
+
+define_hook_label(trampoline_to_interface)
+define_debugging_label(trampoline_to_interface)
+	OP(pop,q)	REG(rcx)			# trampoline storage
+	jmp	scheme_to_interface
+
+define_hook_label(scheme_to_interface_call)
+define_debugging_label(scheme_to_interface_call)
+	OP(pop,q)	REG(rcx)			# arg1 = ret. add
+	OP(add,q)	TW(IMM(4),REG(rcx))		# Skip format info
+#	jmp	scheme_to_interface
+
+define_hook_label(scheme_to_interface)
+define_debugging_label(scheme_to_interface)
+
+# These two moves must happen _before_ the ffree instructions below.
+# Otherwise recovery from SIGFPE there will fail.
+	OP(mov,q)	TW(REG(rsp),ABS(EVR(stack_pointer)))
+	OP(mov,q)	TW(rfree,ABS(EVR(Free)))
+
+# [TRC 20091025: I think this should be excised.]
+
+IF387(`
+	OP(cmp,q)	TW(IMM(0),ABS(EVR(i387_presence)))
+	je	scheme_to_interface_proceed
+	ffree	ST(0)					# Free floating "regs"
+	ffree	ST(1)
+	ffree	ST(2)
+	ffree	ST(3)
+	ffree	ST(4)
+	ffree	ST(5)
+	ffree	ST(6)
+	ffree	ST(7)
+scheme_to_interface_proceed:
+')
+
+	OP(mov,q)	TW(ABS(EVR(C_Stack_Pointer)),REG(rsp))
+	OP(mov,q)	TW(ABS(EVR(C_Frame_Pointer)),REG(rbp))
+
+	OP(sub,q)	TW(IMM(16),REG(rsp))	# alloc struct return
+
+	# Shuffle Scheme -> AMD64 calling conventions:
+	#   struct pointer -> rdi
+	#   rcx -> rsi
+	#   rdx -> rdx
+	#   rbx -> rcx
+	#   arg4 -> r8
+	# Parallel assignment problems:
+	#   arg4 depends on rsi: do arg4->r8 first
+	#   target depends on rcx (why?): use r11 as a temporary
+	# [TRC 20091025: Perhaps we can rearrange LIAR to generate
+	# arguments in the registers we want, to avoid this
+	# shuffling.]
+
+	OP(mov,q)	TW(REG(rcx),REG(r11))
+
+	OP(xor,q)	TW(REG(rcx),REG(rcx))
+	OP(mov,b)	TW(REG(al),REG(cl))
+	OP(mov,q)	TW(SDX(EVR(utility_table),REG(rcx),8),REG(rax))
+
+	OP(mov,q)	TW(REG(rsp),REG(rdi))
+	OP(mov,q)	TW(DOF(REGBLOCK_UTILITY_ARG4(),regs),REG(r8))
+	OP(mov,q)	TW(REG(r11),REG(rsi))
+	OP(mov,q)	TW(REG(rbx),REG(rcx))
+
+	call		IJMP(REG(rax))
+
+define_debugging_label(scheme_to_interface_return)
+	OP(pop,q)	REG(rax)		# pop struct return
+	OP(pop,q)	REG(rdx)
+	jmp		IJMP(REG(rax))		# Invoke handler
+
+define_c_label(interface_to_scheme)
+IF387(`
+	OP(cmp,q)	TW(IMM(0),ABS(EVR(i387_presence)))
+	je	interface_to_scheme_proceed
+	ffree	ST(0)					# Free floating "regs"
+	ffree	ST(1)
+	ffree	ST(2)
+	ffree	ST(3)
+	ffree	ST(4)
+	ffree	ST(5)
+	ffree	ST(6)
+	ffree	ST(7)
+interface_to_scheme_proceed:
+')
+							# Register block = %rsi
+							# Scheme offset in NT
+ifdef(`WIN32',
+`	OP(mov,q)	TW(ABS(EVR(RegistersPtr)),regs)',
+`	OP(lea,q)	TW(ABS(EVR(Registers)),regs)')
+
+	OP(mov,q)	TW(ABS(EVR(Free)),rfree)	# Free pointer = %rdi
+	OP(mov,q)	TW(DOF(REGBLOCK_VAL(),regs),REG(rax)) # Value/dynamic link
+	OP(mov,q)	TW(IMM(ADDRESS_MASK),rmask)	# = %rbp
+
+	OP(mov,q)	TW(ABS(EVR(stack_pointer)),REG(rsp))
+	OP(mov,q)	TW(REG(rax),REG(rcx))		# Preserve if used
+	OP(and,q)	TW(rmask,REG(rcx))		# Restore potential dynamic link
+	OP(mov,q)	TW(REG(rcx),DOF(REGBLOCK_DLINK(),regs))
+	jmp		IJMP(REG(rdx))
+
+IF_WIN32(`
+use_external_code(EFR(WinntExceptionTransferHook))
+define_code_label(EFR(callWinntExceptionTransferHook))
+	call	EFR(WinntExceptionTransferHook)
+	mov	rdx,rax
+')
+
+define_c_label(interface_to_C)
+IF387(`
+	OP(cmp,q)	TW(IMM(0),ABS(EVR(i387_presence)))
+	je	interface_to_C_proceed
+	ffree	ST(0)					# Free floating "regs"
+	ffree	ST(1)
+	ffree	ST(2)
+	ffree	ST(3)
+	ffree	ST(4)
+	ffree	ST(5)
+	ffree	ST(6)
+	ffree	ST(7)
+interface_to_C_proceed:')
+
+	OP(mov,q)	TW(REG(rdx),REG(rax))		# Set up result
+	OP(pop,q)	REG(r15)			# Restore callee-saves
+	OP(pop,q)	REG(r14)			#  registers
+	OP(pop,q)	REG(r13)
+	OP(pop,q)	REG(r12)
+	OP(pop,q)	REG(rbx)
+	leave
+	ret
+
+# [TRC 20091025: The cache synchronization bug does not occur in any
+# x86-64 machines of which I am aware.]
+
+#define_code_label(EFR(x86_64_cache_synchronize))
+#	OP(push,q)	REG(rbp)
+#	OP(mov,q)	TW(REG(rsp),REG(rbp))
+#	OP(push,q)	REG(rbx)
+#	OP(xor,q)	TW(REG(rax),REG(rax))
+#	cpuid
+#	OP(pop,q)	REG(rbx)
+#	leave
+#	ret
+
+### Run the CPUID instruction for serialization.
+
+#define_hook_label(serialize_cache)
+#	pushad
+#	OP(xor,q)	TW(REG(rax),REG(rax))
+#	cpuid
+#	popad
+#	ret
+
+### Stub to be used in place of above on machines that don't need it.
+
+#define_hook_label(dont_serialize_cache)
+#	ret
+
+###	Assembly language hooks used to reduce code size.
+###	There is no time advantage to using these over using
+###	scheme_to_interface (or scheme_to_interface_call), but the
+###	code generated by the compiler can be somewhat smaller.
+
+define(define_jump_indirection,
+`define_hook_label($1)
+	OP(mov,b)	TW(IMM(HEX($2)),REG(al))
+	jmp	scheme_to_interface')
+
+define(define_call_indirection,
+`define_hook_label($1)
+	OP(mov,b)	TW(IMM(HEX($2)),REG(al))
+	jmp	scheme_to_interface_call')
+
+define_call_indirection(interrupt_procedure,1a)
+define_call_indirection(interrupt_continuation,1b)
+define_jump_indirection(interrupt_closure,18)
+define_jump_indirection(interrupt_continuation_2,3b)
+
+define_hook_label(interrupt_dlink)
+	OP(mov,q)	TW(DOF(REGBLOCK_DLINK(),regs),REG(rdx))
+	OP(mov,b)	TW(IMM(HEX(19)),REG(al))
+	jmp	scheme_to_interface_call
+
+###
+###	This saves even more instructions than primitive_apply
+###	When the PC is not available.  Instead of jumping here,
+###	a call instruction is used, and the longword offset to
+###	the primitive object follows the call instruction.
+###	This code loads the primitive object and merges with
+###	apply_primitive
+###
+###     [TRC 20091025: But on the x86-64, we have RIP-relative
+###     addressing, so we don't need this.]
+###
+
+#declare_alignment(2)
+#define_hook_label(short_primitive_apply)
+#	OP(pop,l)	REG(edx)			# offset pointer
+#	OP(mov,l)	TW(IND(REG(edx)),REG(ecx))	# offset
+#							# Primitive object
+#	OP(mov,l)	TW(IDX(REG(edx),REG(ecx)),REG(ecx))
+#							# Merge
+#	jmp	hook_reference(primitive_apply)
+
+declare_alignment(2)
+define_jump_indirection(primitive_apply,12)
+
+define_jump_indirection(primitive_lexpr_apply,13)
+define_jump_indirection(error,15)
+define_call_indirection(link,17)
+define_call_indirection(assignment_trap,1d)
+define_call_indirection(reference_trap,1f)
+define_call_indirection(safe_reference_trap,20)
+define_call_indirection(primitive_error,36)
+
+###	Assembly language hooks used to increase speed.
+
+# define_jump_indirection(sc_apply,14)
+# 
+# define(define_apply_fixed_size,
+# `define_hook_label(sc_apply_size_$1)
+# 	OP(mov,q)	TW(IMM($1),REG(rdx))
+# 	OP(mov,b)	TW(IMM(HEX(14)),REG(al))
+# 	jmp	scheme_to_interface')
+
+declare_alignment(2)
+define_hook_label(sc_apply)
+	OP(mov,q)	TW(REG(rcx),REG(rax))		# Copy for type code
+	OP(mov,q)	TW(REG(rcx),REG(rbx))		# Copy for address
+	OP(shr,q)	TW(IMM(DATUM_LENGTH),REG(rax))	# Select type code
+	OP(and,q)	TW(rmask,REG(rbx))		# Select datum
+	OP(cmp,b)	TW(IMM(TC_COMPILED_ENTRY),REG(al))
+	jne	asm_sc_apply_generic
+	# [TRC 20091025: How big are the frame sizes?]
+	OP(movs,bq,x)	TW(BOF(-4,REG(rbx)),REG(rax))	# Extract frame size
+	OP(cmp,q)	TW(REG(rax),REG(rdx))		# Compare to nargs+1
+	jne	asm_sc_apply_generic
+	jmp	IJMP(REG(rbx))				# Invoke
+
+define_debugging_label(asm_sc_apply_generic)
+	OP(mov,q)	TW(IMM(HEX(14)),REG(rax))
+	jmp	scheme_to_interface	
+
+define(define_apply_fixed_size,
+`declare_alignment(2)
+define_hook_label(sc_apply_size_$1)
+	OP(mov,q)	TW(REG(rcx),REG(rax))		# Copy for type code
+	OP(mov,q)	TW(REG(rcx),REG(rbx))		# Copy for address
+	OP(shr,q)	TW(IMM(DATUM_LENGTH),REG(rax))	# Select type code
+	OP(and,q)	TW(rmask,REG(rbx))		# Select datum
+	OP(cmp,b)	TW(IMM(TC_COMPILED_ENTRY),REG(al))
+	jne	asm_sc_apply_generic_$1
+	# [TRC 20091025: How big are the frame sizes?]
+	OP(cmp,b)	TW(IMM($1),BOF(-4,REG(rbx)))	# Compare frame size
+	jne	asm_sc_apply_generic_$1	# to nargs+1
+	jmp	IJMP(REG(rbx))
+
+asm_sc_apply_generic_$1:
+	OP(mov,q)	TW(IMM($1),REG(rdx))
+	OP(mov,b)	TW(IMM(HEX(14)),REG(al))
+	jmp	scheme_to_interface')
+
+define_apply_fixed_size(1)
+define_apply_fixed_size(2)
+define_apply_fixed_size(3)
+define_apply_fixed_size(4)
+define_apply_fixed_size(5)
+define_apply_fixed_size(6)
+define_apply_fixed_size(7)
+define_apply_fixed_size(8)
+
+###	The following code is used by generic arithmetic
+###	whether the fixnum case is open-coded in line or not.
+###	This takes care of fixnums and flonums so that the common
+###	numeric types are much faster than the rare ones
+###	(bignums, ratnums, recnums)
+
+IF387(`declare_alignment(2)
+asm_generic_flonum_result:
+	# The MOV instruction can take a 64-bit immediate operand only
+	# if the target is a register, so we store the manifest in rax
+	# before moving it to memory.
+	OP(mov,q)	TW(IMM_MANIFEST_NM_VECTOR_1,REG(rax))
+	OP(mov,q)	TW(REG(rax), IND(rfree))
+	# The OR instruction cannot take a 64-bit immediate either, so
+	# we need to store the tag in rax first, shift it up, and then
+	# OR the datum into it.
+	OP(mov,q)	TW(IMM(TC_FLONUM),REG(rax))
+	OP(shl,q)	TW(IMM(DATUM_LENGTH),REG(rax))
+	OP(or,q)	TW(rfree,REG(rax))
+	OP(fstp,l)	DOF(8,rfree)			# fstpd
+	OP(and,q)	TW(rmask,IND(REG(rsp)))
+	OP(add,q)	TW(IMM(16),rfree)
+	OP(mov,q)	TW(REG(rax),DOF(REGBLOCK_VAL(),regs))
+	ret
+
+declare_alignment(2)
+asm_generic_fixnum_result:
+	OP(and,q)	TW(rmask,IND(REG(rsp)))
+	OP(or,b)	TW(IMM(TC_FIXNUM),REG(al))
+	OP(ror,q)	TW(IMM(TC_LENGTH),REG(rax))
+	OP(mov,q)	TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+	ret
+
+declare_alignment(2)
+asm_generic_return_sharp_t:
+	OP(and,q)	TW(rmask,IND(REG(rsp)))
+	OP(mov,q)	TW(IMM_TRUE,REG(rax))
+	OP(mov,q)	TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+	ret
+
+declare_alignment(2)
+asm_generic_return_sharp_f:
+	OP(and,q)	TW(rmask,IND(REG(rsp)))
+	OP(mov,q)	TW(IMM_FALSE,REG(rax))
+	OP(mov,q)	TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+	ret')
+
+define(define_unary_operation,
+`declare_alignment(2)
+define_hook_label(generic_$1)
+	OP(pop,q)	REG(rdx)
+	OP(mov,q)	TW(REG(rdx),REG(rax))
+	OP(shr,q)	TW(IMM(DATUM_LENGTH),REG(rax))
+	OP(cmp,b)	TW(IMM(TC_FIXNUM),REG(al))
+	je	asm_generic_$1_fix
+	OP(cmp,b)	TW(IMM(TC_FLONUM),REG(al))
+	jne	asm_generic_$1_fail
+	OP(and,q)	TW(rmask,REG(rdx))
+	fld1
+	OP($4,l)	DOF(8,REG(rdx))
+	jmp	asm_generic_flonum_result
+
+asm_generic_$1_fix:
+	OP(mov,q)	TW(REG(rdx),REG(rax))
+	OP(shl,q)	TW(IMM(TC_LENGTH),REG(rax))
+	OP($3,q)	TW(IMM(eval(1 << TC_LENGTH)),REG(rax))
+	jno	asm_generic_fixnum_result
+
+asm_generic_$1_fail:
+	OP(push,q)	REG(rdx)
+	OP(mov,b)	TW(IMM(HEX($2)),REG(al))
+	jmp	scheme_to_interface')
+
+define(define_unary_predicate,
+`declare_alignment(2)
+define_hook_label(generic_$1)
+	OP(pop,q)	REG(rdx)
+	OP(mov,q)	TW(REG(rdx),REG(rax))
+	OP(shr,q)	TW(IMM(DATUM_LENGTH),REG(rax))
+	OP(cmp,b)	TW(IMM(TC_FIXNUM),REG(al))
+	je	asm_generic_$1_fix
+	OP(cmp,b)	TW(IMM(TC_FLONUM),REG(al))
+	jne	asm_generic_$1_fail
+	OP(and,q)	TW(rmask,REG(rdx))
+	OP(fld,l)	DOF(8,REG(rdx))
+	ftst
+	fstsw	REG(ax)
+	fstp	ST(0)
+	sahf
+	$4	asm_generic_return_sharp_t
+	jmp	asm_generic_return_sharp_f
+
+asm_generic_$1_fix:
+	OP(mov,q)	TW(REG(rdx),REG(rax))
+	OP(shl,q)	TW(IMM(TC_LENGTH),REG(rax))
+	OP(cmp,q)	TW(IMM(0),REG(rax))
+	$3	asm_generic_return_sharp_t
+	jmp	asm_generic_return_sharp_f
+
+asm_generic_$1_fail:
+	OP(push,q)	REG(rdx)
+	OP(mov,b)	TW(IMM(HEX($2)),REG(al))
+	jmp	scheme_to_interface')
+
+define(define_binary_operation,
+`declare_alignment(2)
+define_hook_label(generic_$1)
+	OP(pop,q)	REG(rdx)
+	OP(pop,q)	REG(rbx)
+	OP(mov,q)	TW(REG(rdx),REG(rax))
+	OP(mov,q)	TW(REG(rbx),REG(rcx))
+	OP(shr,q)	TW(IMM(DATUM_LENGTH),REG(rax))
+	OP(shr,q)	TW(IMM(DATUM_LENGTH),REG(rcx))
+	OP(cmp,b)	TW(REG(al),REG(cl))
+	jne	asm_generic_$1_fail
+	OP(cmp,b)	TW(IMM(TC_FIXNUM),REG(al))
+	je	asm_generic_$1_fix
+	OP(cmp,b)	TW(IMM(TC_FLONUM),REG(al))
+	je	asm_generic_$1_flo
+
+asm_generic_$1_fail:
+	OP(push,q)	REG(rbx)
+	OP(push,q)	REG(rdx)
+	OP(mov,b)	TW(IMM(HEX($2)),REG(al))
+	jmp	scheme_to_interface
+
+asm_generic_$1_fix:
+	OP(mov,q)	TW(REG(rdx),REG(rax))
+	OP(mov,q)	TW(REG(rbx),REG(rcx))
+	OP(shl,q)	TW(IMM(TC_LENGTH),REG(rax))
+	OP(shl,q)	TW(IMM(TC_LENGTH),REG(rcx))
+	$5
+	OP($3,q)	TW(REG(rcx),REG(rax))		# subq
+	jo	asm_generic_$1_fail
+	jmp	asm_generic_fixnum_result
+
+asm_generic_$1_flo:
+	OP(and,q)	TW(rmask,REG(rdx))
+	OP(and,q)	TW(rmask,REG(rbx))
+	OP(fld,l)	DOF(8,REG(rdx))			# fldd
+	OP($4,l)	DOF(8,REG(rbx))			# fsubl
+	jmp	asm_generic_flonum_result')
+
+IF387(`declare_alignment(2)
+define_hook_label(generic_divide)
+	OP(pop,q)	REG(rdx)
+	OP(pop,q)	REG(rbx)
+	OP(mov,q)	TW(REG(rdx),REG(rax))
+	OP(mov,q)	TW(REG(rbx),REG(rcx))
+	OP(shr,q)	TW(IMM(DATUM_LENGTH),REG(rax))
+	OP(shr,q)	TW(IMM(DATUM_LENGTH),REG(rcx))
+	OP(cmp,b)	TW(IMM(TC_FIXNUM),REG(al))
+	je	asm_generic_divide_fix
+	OP(cmp,b)	TW(IMM(TC_FLONUM),REG(al))
+	jne	asm_generic_divide_fail
+	OP(cmp,b)	TW(IMM(TC_FLONUM),REG(cl))
+	je	asm_generic_divide_flo_flo
+	OP(cmp,b)	TW(IMM(TC_FIXNUM),REG(cl))
+	jne	asm_generic_divide_fail
+	OP(mov,q)	TW(REG(rbx),REG(rcx))
+	OP(shl,q)	TW(IMM(TC_LENGTH),REG(rcx))
+	je	asm_generic_divide_fail
+	OP(and,q)	TW(rmask,REG(rdx))
+	OP(sar,q)	TW(IMM(TC_LENGTH),REG(rcx))
+	OP(fld,l)	DOF(8,REG(rdx))			# fldd
+	OP(mov,q)	TW(REG(rcx),IND(rfree))
+	OP(fidiv,l)	IND(rfree)
+	jmp	asm_generic_flonum_result
+
+asm_generic_divide_fix:
+	OP(cmp,b)	TW(IMM(TC_FLONUM),REG(cl))
+	jne	asm_generic_divide_fail
+	OP(mov,q)	TW(REG(rdx),REG(rcx))
+	OP(shl,q)	TW(IMM(TC_LENGTH),REG(rcx))
+	je	asm_generic_divide_fail
+	OP(and,q)	TW(rmask,REG(rbx))
+	OP(sar,q)	TW(IMM(TC_LENGTH),REG(rcx))
+	OP(fld,l)	DOF(8,REG(rbx))			# fldd
+	OP(mov,q)	TW(REG(rcx),IND(rfree))
+	OP(fidivr,l)	IND(rfree)
+	jmp	asm_generic_flonum_result
+
+asm_generic_divide_flo_flo:
+	OP(mov,q)	TW(REG(rbx),REG(rcx))
+	OP(and,q)	TW(rmask,REG(rcx))
+	OP(fld,l)	DOF(8,REG(rcx))			# fldd
+	ftst
+	fstsw	REG(ax)
+	sahf
+	je	asm_generic_divide_by_zero
+	OP(and,q)	TW(rmask,REG(rdx))
+	OP(fdivr,l)	DOF(8,REG(rdx))
+	jmp	asm_generic_flonum_result	
+
+asm_generic_divide_by_zero:
+	fstp	ST(0)					# Pop second arg
+
+asm_generic_divide_fail:
+	OP(push,q)	REG(rbx)
+	OP(push,q)	REG(rdx)
+	OP(mov,b)	TW(IMM(HEX(23)),REG(al))
+	jmp	scheme_to_interface')
+
+define(define_binary_predicate,
+`declare_alignment(2)
+define_hook_label(generic_$1)
+	OP(pop,q)	REG(rdx)
+	OP(pop,q)	REG(rbx)
+	OP(mov,q)	TW(REG(rdx),REG(rax))
+	OP(mov,q)	TW(REG(rbx),REG(rcx))
+	OP(shr,q)	TW(IMM(DATUM_LENGTH),REG(rax))
+	OP(shr,q)	TW(IMM(DATUM_LENGTH),REG(rcx))
+	OP(cmp,b)	TW(REG(al),REG(cl))
+	jne	asm_generic_$1_fail
+	OP(cmp,b)	TW(IMM(TC_FIXNUM),REG(al))
+	jne	asm_generic_$1_fail
+
+	OP(shl,q)	TW(IMM(TC_LENGTH),REG(rdx))
+	OP(shl,q)	TW(IMM(TC_LENGTH),REG(rbx))
+	OP(cmp,q)	TW(REG(rbx),REG(rdx))
+	$3	asm_generic_return_sharp_t	
+	jmp	asm_generic_return_sharp_f
+
+asm_generic_$1_fail:
+	OP(push,q)	REG(rbx)
+	OP(push,q)	REG(rdx)
+	OP(mov,b)	TW(IMM(HEX($2)),REG(al))
+	jmp	scheme_to_interface')
+
+IF387(`define_unary_operation(decrement,22,sub,fsubr)
+define_unary_operation(increment,26,add,fadd)
+
+define_unary_predicate(negative,2a,jl,jb)
+define_unary_predicate(positive,2c,jg,ja)
+define_unary_predicate(zero,2d,je,je)
+
+# define_binary_operation(name,index,fix*fix,flo*flo, fixup)
+# define_binary_operation(  $1,   $2,     $3,     $4, $5)
+# The fixup is optional; only multiplication needs it to shift the
+# result back down by six bits.
+define_binary_operation(add,2b,add,fadd)
+define_binary_operation(subtract,28,sub,fsub)
+define_binary_operation(multiply,29,imul,fmul,
+	`OP(shr,q)	TW(IMM(6),REG(rax))')
+# Divide needs to check for 0, so we cant really use the following
+# define_binary_operation(divide,23,NONE,fdiv)
+
+# define_binary_predicate(name,index,fix*fix,flo*flo)
+define_binary_predicate(equal,24,je,je)
+define_binary_predicate(greater,25,jg,ja)
+define_binary_predicate(less,27,jl,jb)')
+
+IFN387(`define_jump_indirection(generic_decrement,22)
+define_jump_indirection(generic_divide,23)
+define_jump_indirection(generic_equal,24)
+define_jump_indirection(generic_greater,25)
+define_jump_indirection(generic_increment,26)
+define_jump_indirection(generic_less,27)
+define_jump_indirection(generic_subtract,28)
+define_jump_indirection(generic_multiply,29)
+define_jump_indirection(generic_negative,2a)
+define_jump_indirection(generic_add,2b)
+define_jump_indirection(generic_positive,2c)
+define_jump_indirection(generic_zero,2d)')
+
+# These don't currently differ according to whether there
+# is a 387 or not.
+
+define_jump_indirection(generic_quotient,37)
+define_jump_indirection(generic_remainder,38)
+define_jump_indirection(generic_modulo,39)
+
+define_jump_indirection(nofp_decrement,22)
+define_jump_indirection(nofp_divide,23)
+define_jump_indirection(nofp_equal,24)
+define_jump_indirection(nofp_greater,25)
+define_jump_indirection(nofp_increment,26)
+define_jump_indirection(nofp_less,27)
+define_jump_indirection(nofp_subtract,28)
+define_jump_indirection(nofp_multiply,29)
+define_jump_indirection(nofp_negative,2a)
+define_jump_indirection(nofp_add,2b)
+define_jump_indirection(nofp_positive,2c)
+define_jump_indirection(nofp_zero,2d)
+define_jump_indirection(nofp_quotient,37)
+define_jump_indirection(nofp_remainder,38)
+define_jump_indirection(nofp_modulo,39)
+
+IFDASM(`end')
+
+### Edwin Variables:
+### comment-column: 56
+### comment-start: "#"
+### End:
diff --git a/src/microcode/cmpintmd/x86-64-config.h b/src/microcode/cmpintmd/x86-64-config.h
new file mode 100644
index 000000000..7678d4646
--- /dev/null
+++ b/src/microcode/cmpintmd/x86-64-config.h
@@ -0,0 +1,32 @@
+/* -*-C-*-
+
+Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
+    1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+    2006, 2007, 2008, 2009 Massachusetts Institute of Technology
+
+This file is part of MIT/GNU Scheme.
+
+MIT/GNU Scheme is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
+
+MIT/GNU Scheme is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MIT/GNU Scheme; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
+USA.
+
+*/
+
+#ifndef SCM_CMPINTMD_CONFIG_H_INCLUDED
+#define SCM_CMPINTMD_CONFIG_H_INCLUDED 1
+
+#define COMPILER_PROCESSOR_TYPE COMPILER_X86_64_TYPE
+#define CC_IS_NATIVE 1
+
+#endif /* !SCM_CMPINTMD_CONFIG_H_INCLUDED */
diff --git a/src/microcode/cmpintmd/x86-64.c b/src/microcode/cmpintmd/x86-64.c
new file mode 100644
index 000000000..a7cc7f0d4
--- /dev/null
+++ b/src/microcode/cmpintmd/x86-64.c
@@ -0,0 +1,372 @@
+/* -*-C-*-
+
+Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
+    1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+    2006, 2007, 2008, 2009 Massachusetts Institute of Technology
+
+This file is part of MIT/GNU Scheme.
+
+MIT/GNU Scheme is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
+
+MIT/GNU Scheme is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MIT/GNU Scheme; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
+USA.
+
+*/
+
+/* Compiled code interface for AMD x86-64.  */
+
+#include "cmpint.h"
+#include "extern.h"
+#include "outf.h"
+#include "errors.h"
+
+extern void * tospace_to_newspace (void *);
+
+bool
+read_cc_entry_type (cc_entry_type_t * cet, insn_t * address)
+{
+  return (decode_old_style_format_word (cet, (((uint16_t *) address) [-2])));
+}
+
+bool
+write_cc_entry_type (cc_entry_type_t * cet, insn_t * address)
+{
+  return (encode_old_style_format_word (cet, ((uint16_t *) address) - 2));
+}
+
+bool
+read_cc_entry_offset (cc_entry_offset_t * ceo, insn_t * address)
+{
+  uint16_t n = (((uint16_t *) address) [-1]);
+  (ceo->offset) = (n >> 1);
+  (ceo->continued_p) = ((n & 1) != 0);
+  return (false);
+}
+
+bool
+write_cc_entry_offset (cc_entry_offset_t * ceo, insn_t * address)
+{
+  if (! ((ceo->offset) < 0x4000))
+    return (true);
+  (((uint16_t *) address) [-1])
+    = (((ceo->offset) << 1) | ((ceo->continued_p) ? 1 : 0));
+  return (false);
+}
+
+/* Compiled closures */
+
+/* MOV RAX,imm64 has two bytes of opcode cruft before the imm64.  */
+
+insn_t *
+read_compiled_closure_target (insn_t * start)
+{
+  return (* ((insn_t **) (start + CC_ENTRY_HEADER_SIZE + 2)));
+}
+
+void
+write_compiled_closure_target (insn_t * target, insn_t * start)
+{
+  (* ((insn_t **) (start + CC_ENTRY_HEADER_SIZE + 2))) = target;
+}
+
+unsigned long
+compiled_closure_count (SCHEME_OBJECT * block)
+{
+  /* `block' is a pointer to the first object after the manifest.  The
+     first object following it is the entry count.  */
+  return ((unsigned long) (* ((uint32_t *) block)));
+}
+
+insn_t *
+compiled_closure_start (SCHEME_OBJECT * block)
+{
+  /* Skip the 32-bit entry count.  */
+  return (((insn_t *) block) + 4);
+}
+
+insn_t *
+compiled_closure_entry (insn_t * start)
+{
+  return (start + CC_ENTRY_HEADER_SIZE);
+}
+
+insn_t *
+compiled_closure_next (insn_t * start)
+{
+  return (start + CC_ENTRY_HEADER_SIZE + 12);
+}
+
+SCHEME_OBJECT *
+skip_compiled_closure_padding (insn_t * start)
+{
+  /* The padding is the same size as the entry header (format word).  */
+  return ((SCHEME_OBJECT *) (start + CC_ENTRY_HEADER_SIZE));
+}
+
+SCHEME_OBJECT
+compiled_closure_entry_to_target (insn_t * entry)
+{
+  /* `entry' points to the start of the MOV RAX,imm64 instruction,
+     which has two bytes of opcode cruft before the imm64.  */
+  return (MAKE_CC_ENTRY (* ((long *) (entry + 2))));
+}
+
+/* Execution caches (UUO links)
+
+   An execution cache is a region of memory that lives in the
+   constants section of a compiled-code block.  It is an indirection
+   for calling external procedures that allows the linker to control
+   the calling process without having to find and change all the
+   places in the compiled code that refer to it.
+
+   Prior to linking, the execution cache has two pieces of
+   information: (1) the name of the procedure being called (a symbol),
+   and (2) the number of arguments that will be passed to the
+   procedure.  `saddr' points to the arity at the beginning of the
+   execution cache.  */
+
+SCHEME_OBJECT
+read_uuo_symbol (SCHEME_OBJECT * saddr)
+{
+  return (saddr[1]);
+}
+
+unsigned int
+read_uuo_frame_size (SCHEME_OBJECT * saddr)
+{
+  return (* ((uint16_t *) saddr));
+}
+
+insn_t *
+read_uuo_target (SCHEME_OBJECT * saddr)
+{
+  insn_t * mov_addr = ((insn_t *) (saddr + 1));
+  return (* ((insn_t **) (mov_addr + 2)));
+}
+
+insn_t *
+read_uuo_target_no_reloc (SCHEME_OBJECT * saddr)
+{
+  return (read_uuo_target (saddr));
+}
+
+void
+write_uuo_target (insn_t * target, SCHEME_OBJECT * saddr)
+{
+  /* Skip the arity. */
+  insn_t * addr = ((insn_t *) (saddr + 1));
+  (*addr++) = 0x48;		/* REX.W (64-bit operand size prefix) */
+  (*addr++) = 0xB8;		/* MOV RAX,imm64 */
+  (* ((insn_t **) addr)) = target;
+  addr += 8;
+  (*addr++) = 0xFF;		/* JMP reg/mem64 */
+  (*addr++) = 0xE0;		/* ModR/M for RAX */
+}
+
+#define BYTES_PER_TRAMPOLINE_ENTRY_PADDING 4
+#define OBJECTS_PER_TRAMPOLINE_ENTRY 2
+
+#define RSI_TRAMPOLINE_TO_INTERFACE_OFFSET				\
+  ((COMPILER_REGBLOCK_N_FIXED + (2 * COMPILER_HOOK_SIZE))		\
+   * SIZEOF_SCHEME_OBJECT)
+
+unsigned long
+trampoline_entry_size (unsigned long n_entries)
+{
+  return (n_entries * OBJECTS_PER_TRAMPOLINE_ENTRY);
+}
+
+insn_t *
+trampoline_entry_addr (SCHEME_OBJECT * block, unsigned long index)
+{
+  return (((insn_t *) (block + 2 + (index * OBJECTS_PER_TRAMPOLINE_ENTRY)))
+	  + BYTES_PER_TRAMPOLINE_ENTRY_PADDING + CC_ENTRY_HEADER_SIZE);
+}
+
+bool
+store_trampoline_insns (insn_t * entry, byte_t code)
+{
+  (*entry++) = 0xB0;		/* MOV AL,code */
+  (*entry++) = code;
+  (*entry++) = 0xFF;		/* CALL /2 disp32(ESI) */
+  (*entry++) = 0x96;
+  (* ((uint32_t *) entry)) = RSI_TRAMPOLINE_TO_INTERFACE_OFFSET;
+  X86_64_CACHE_SYNCHRONIZE ();
+  return (false);
+}
+
+#ifdef _MACH_UNIX
+#  include <mach.h>
+#  define VM_PROT_SCHEME (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
+#endif
+
+#define SETUP_REGISTER(hook) do						\
+{									\
+  (* ((unsigned long *) (rsi_value + offset)))				\
+    = ((unsigned long) (hook));						\
+  offset += (COMPILER_HOOK_SIZE * (sizeof (SCHEME_OBJECT)));		\
+  declare_builtin (((unsigned long) hook), #hook);			\
+} while (0)
+
+void
+x86_64_reset_hook (void)
+{
+  int offset = (COMPILER_REGBLOCK_N_FIXED * (sizeof (SCHEME_OBJECT)));
+  unsigned char * rsi_value = ((unsigned char *) Registers);
+  bool fp_support_present = (x86_64_interface_initialize ());
+
+  /* These must match machines/x86-64/lapgen.scm */
+
+  SETUP_REGISTER (asm_scheme_to_interface); 		/* 0 */
+  SETUP_REGISTER (asm_scheme_to_interface_call);	/* 1 */
+
+  if (offset != RSI_TRAMPOLINE_TO_INTERFACE_OFFSET)
+    {
+      outf_fatal ("\nx86_64_reset_hook: RSI_TRAMPOLINE_TO_INTERFACE_OFFSET\n");
+      Microcode_Termination (TERM_EXIT);
+    }
+  SETUP_REGISTER (asm_trampoline_to_interface);		/* 2 */
+
+  SETUP_REGISTER (asm_interrupt_procedure);		/* 3 */
+  SETUP_REGISTER (asm_interrupt_continuation);		/* 4 */
+  SETUP_REGISTER (asm_interrupt_closure);		/* 5 */
+  SETUP_REGISTER (asm_interrupt_dlink);			/* 6 */
+  SETUP_REGISTER (asm_primitive_apply);			/* 7 */
+  SETUP_REGISTER (asm_primitive_lexpr_apply);		/* 8 */
+  SETUP_REGISTER (asm_assignment_trap);			/* 9 */
+  SETUP_REGISTER (asm_reference_trap);			/* 10 */
+  SETUP_REGISTER (asm_safe_reference_trap);		/* 11 */
+  SETUP_REGISTER (asm_link);				/* 12 */
+  SETUP_REGISTER (asm_error);				/* 13 */
+  SETUP_REGISTER (asm_primitive_error);			/* 14 */
+  /* [TRC 20091025: This was an i386 hack for when the PC is not
+     available, which on x86-64 it always is. */
+  /* SETUP_REGISTER (asm_short_primitive_apply); */		/* 15 */
+
+  /* No more room for positive offsets without going to 32-bit
+     offsets!  */
+
+  /* This is a hack to make all the hooks be addressable with byte
+     offsets (instead of longword offsets).  The register block
+     extends to negative offsets as well, so all the following hooks
+     are accessed with negative offsets, and all fit in a byte.  */
+
+  /* [TRC 20091029: This hack doesn't work any longer; this code
+     should be cleaned up, since we must use longword offsets anyway.]
+     */
+
+  offset = -256;
+  if (fp_support_present)
+    {
+      SETUP_REGISTER (asm_generic_add);			/* -32 */
+      SETUP_REGISTER (asm_generic_subtract);		/* -31 */
+      SETUP_REGISTER (asm_generic_multiply);		/* -30 */
+      SETUP_REGISTER (asm_generic_divide);		/* -29 */
+      SETUP_REGISTER (asm_generic_equal);		/* -28 */
+      SETUP_REGISTER (asm_generic_less);		/* -27 */
+      SETUP_REGISTER (asm_generic_greater);		/* -26 */
+      SETUP_REGISTER (asm_generic_increment);		/* -25 */
+      SETUP_REGISTER (asm_generic_decrement);		/* -24 */
+      SETUP_REGISTER (asm_generic_zero);		/* -23 */
+      SETUP_REGISTER (asm_generic_positive);		/* -22 */
+      SETUP_REGISTER (asm_generic_negative);		/* -21 */
+      SETUP_REGISTER (asm_generic_quotient);		/* -20 */
+      SETUP_REGISTER (asm_generic_remainder);		/* -19 */
+      SETUP_REGISTER (asm_generic_modulo);		/* -18 */
+    }
+  else
+    {
+      SETUP_REGISTER (asm_nofp_add);			/* -32 */
+      SETUP_REGISTER (asm_nofp_subtract);		/* -31 */
+      SETUP_REGISTER (asm_nofp_multiply);		/* -30 */
+      SETUP_REGISTER (asm_nofp_divide);			/* -29 */
+      SETUP_REGISTER (asm_nofp_equal);			/* -28 */
+      SETUP_REGISTER (asm_nofp_less);			/* -27 */
+      SETUP_REGISTER (asm_nofp_greater);		/* -26 */
+      SETUP_REGISTER (asm_nofp_increment);		/* -25 */
+      SETUP_REGISTER (asm_nofp_decrement);		/* -24 */
+      SETUP_REGISTER (asm_nofp_zero);			/* -23 */
+      SETUP_REGISTER (asm_nofp_positive);		/* -22 */
+      SETUP_REGISTER (asm_nofp_negative);		/* -21 */
+      SETUP_REGISTER (asm_nofp_quotient);		/* -20 */
+      SETUP_REGISTER (asm_nofp_remainder);		/* -19 */
+      SETUP_REGISTER (asm_nofp_modulo);			/* -18 */
+    }
+
+  SETUP_REGISTER (asm_sc_apply);			/* -17 */
+  SETUP_REGISTER (asm_sc_apply_size_1);			/* -16 */
+  SETUP_REGISTER (asm_sc_apply_size_2);			/* -15 */
+  SETUP_REGISTER (asm_sc_apply_size_3);			/* -14 */
+  SETUP_REGISTER (asm_sc_apply_size_4);			/* -13 */
+  SETUP_REGISTER (asm_sc_apply_size_5);			/* -12 */
+  SETUP_REGISTER (asm_sc_apply_size_6);			/* -11 */
+  SETUP_REGISTER (asm_sc_apply_size_7);			/* -10 */
+  SETUP_REGISTER (asm_sc_apply_size_8);			/* -9 */
+  SETUP_REGISTER (asm_interrupt_continuation_2);	/* -8 */
+  /* [TRC 20091025: The cache synchronization bug does not occur in any
+      x86-64 machines of which I am aware.]
+
+  if (x86_64_cpuid_needed)
+    SETUP_REGISTER (asm_serialize_cache);		/\* -7 *\/
+  else
+    SETUP_REGISTER (asm_dont_serialize_cache);		/\* -7 *\/
+  */
+
+#ifdef _MACH_UNIX
+  {
+    vm_address_t addr;
+    vm_size_t size;
+    vm_prot_t prot;
+    vm_prot_t max_prot;
+    vm_inherit_t inheritance;
+    boolean_t shared;
+    port_t object;
+    vm_offset_t offset;
+
+    addr = ((vm_address_t) Heap);
+    if ((vm_region ((task_self ()), &addr, &size, &prot, &max_prot,
+		    &inheritance, &shared, &object, &offset))
+	!= KERN_SUCCESS)
+      {
+	outf_fatal ( "compiler_reset: vm_region() failed.\n");
+	Microcode_Termination (TERM_EXIT);
+	/*NOTREACHED*/
+      }
+    if ((prot & VM_PROT_SCHEME) != VM_PROT_SCHEME)
+      {
+	if ((max_prot & VM_PROT_SCHEME) != VM_PROT_SCHEME)
+	  {
+	    outf_fatal (
+			"compiler_reset: inadequate protection for Heap.\n");
+	    outf_fatal ( "maximum = 0x%lx; desired = 0x%lx\n",
+			((unsigned long) (max_prot & VM_PROT_SCHEME)),
+			((unsigned long) VM_PROT_SCHEME));
+	    Microcode_Termination (TERM_EXIT);
+	    /*NOTREACHED*/
+	  }
+	if ((vm_protect ((task_self ()), ((vm_address_t) Heap),
+			 (((char *) constant_end) - ((char *) Heap)),
+			 0, VM_PROT_SCHEME))
+	    != KERN_SUCCESS)
+	  {
+	    outf_fatal ("Unable to change protection for Heap.\n");
+	    outf_fatal ("actual = 0x%lx; desired = 0x%lx\n",
+			((unsigned long) (prot & VM_PROT_SCHEME)),
+			((unsigned long) VM_PROT_SCHEME));
+	    Microcode_Termination (TERM_EXIT);
+	    /*NOTREACHED*/
+	  }
+      }
+  }
+#endif /* _MACH_UNIX */
+}
diff --git a/src/microcode/cmpintmd/x86-64.h b/src/microcode/cmpintmd/x86-64.h
new file mode 100644
index 000000000..49d0f81d0
--- /dev/null
+++ b/src/microcode/cmpintmd/x86-64.h
@@ -0,0 +1,351 @@
+/* -*-C-*-
+
+Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
+    1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+    2006, 2007, 2008, 2009 Massachusetts Institute of Technology
+
+This file is part of MIT/GNU Scheme.
+
+MIT/GNU Scheme is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
+
+MIT/GNU Scheme is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MIT/GNU Scheme; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
+USA.
+
+*/
+
+/* Compiled code interface macros for AMD x86-64.  */
+
+#ifndef SCM_CMPINTMD_H_INCLUDED
+#define SCM_CMPINTMD_H_INCLUDED 1
+
+/*
+
+Problems with the AMD x86-64 instruction set architecture
+====================================================
+
+1. Jumps are PC-relative.  There are absolute jumps, assuming the PC
+   is in a data location, or with immediate destinations that include
+   a segment descriptor (16 bits).  The short forms have a PC-relative
+   offset defined with respect to the immediately following
+   instruction.
+
+Problem: Closures and execute caches need their address in old space
+   in order to be relocated correctly.
+
+Fix:
+
+For execute caches we can define a new linker field, called
+load-relocation-address which on every GC/relocation stores the new
+address and the old contents into global variables and stores the new
+address in the field.  Alternatively the difference between the new
+address and the old contents can be stored into a single global
+variable, and this can be used, together with the new address of each
+cache, to find the old code.
+
+For closures the code that reads the header (manifest closure) can do
+the same.
+
+
+2. The CALL and JMP instructions do not accept 64-bit displacements.
+
+Problem: We want heaps bigger than 4 GB.
+
+Fix:
+
+Assemble more than one instruction for closure entry points, expanding
+them even more.  Yech.
+
+
+3. The stack pointer register (RSP) cannot be used as the base in
+   (base + displacement) addressing mode.
+
+Problem: Common operation in the compiler, which assumes direct access
+   to the stack.
+
+Fix: Use base + indexed mode, which allows specification of RSP as
+   base and nullification of the index (by using RSP again).  This is
+   one byte longer than otherwise, but...
+
+
+Register assignments
+====================
+
+RAX (0)		Unassigned
+RCX (1)		Unassigned
+RDX (2)		Unassigned
+RBX (3)		Unassigned
+
+RSP (4)		Stack Pointer
+RBP (5)		Register Mask
+RSI (6)		Pointer to register block, etc.
+RDI (7)		Free Pointer
+
+R8-R15		Unassigned
+
+The dynamic link and value "registers" are not processor registers.
+Slots in the register array must be reserved for them.
+[TRC 20091025: Later, we ought to use machine registers for these.]
+
+The Free Pointer is RDI because RDI is the implicit base register for
+the memory-to-memory move instructions, and the string store
+instruction.  Perhaps we can make use of it.
+
+The pointer to register block is not held in RBP (the processor's
+"frame" register is typically used) because its most common use, (RBP)
+(address syllable for memory memtop) takes more bytes than (RSI).
+
+Encodings and layout of various control features
+================================================
+
+Assumptions:
+
+The processor will be in 64-bit address and operand mode.  Thus
+instructions use 64-bit operands, and displacements for addressing
+modes and jump instructions are all 64 bits by default.
+
+	Offset		Contents		Encoding
+
+
+- Execute cache entry encoding:
+
+		Before linking
+
+	0		16-bit arity	\
+	2		zero		  [TC_FIXNUM | arity]
+	7		0x1A		/
+entry	8		symbol
+	16		<eight bytes of padding>
+	24		<next cache>
+
+		After linking
+
+	0		16-bit arity
+	2		zero
+	7		0x1A
+entry	8		MOV RAX,imm64		0x48 0xB8
+	10		<address>
+	18		JMP (RAX)		0xFF 0xE0
+	19-23		<four bytes of padding>
+	24		<next cache>
+
+
+- Closures:
+
+The only reason for a 32-bit entry count is to align everything
+nicely.
+
+	0		<closure manifest>
+	8		<entry count>
+	12		<type/arity info>       \__ format word
+	14		<gc offset>             /
+entry0	16		MOV RAX,imm64		0x48 0xB8
+	18		<address>
+	26		CALL (RAX)		0xFF 0xD0
+	28		<four bytes of padding or next format word>
+	...
+	16*(n+1)	<variables>
+
+
+- Trampoline encoding:
+
+	-8		<padding>
+	-4		<type/arity info>
+	-2		<gc offset>
+entry	0		MOV	AL,code		0xB0, code-byte
+	2		CALL	n(RSI)		0xFF 0x96 n-longword
+	8		<trampoline dependent storage>
+
+
+[TRC 20091027: The next two are wrong; need to update.]
+
+- GC & interrupt check at procedure/continuation entry:
+
+gc_lab	-7		CALL	n(RSI)		0xFF 0x56 n-byte
+	-4		<type/arity info>
+	-2		<gc offset>
+entry	0		CMP	RDI,(RSI)	0x48 0x39 0x3e
+	3		JAE	gc_lab		0x73 -12
+	5		<real code>
+
+
+- GC & interrupt check at closure entry:
+
+gc_lab	-11		ADD	(RSP),&offset	0x83 0x04 0x24 offset-byte
+  	-7		JMP	n(RSI)		0xFF 0x66 n-byte
+	-4		<type/arity info>
+	-2		<gc offset>
+entry	0		ADD	(RSP),&magic	0x81 0x04 0x24 magic-longword
+	7		CMP	RDI,(RSI)	0x39 0x3e
+	9		JAE	gc_lab		0x73 0xea (= -22)
+	11		<real code>
+
+The magic value depends on the closure because of canonicalization.
+
+The ADD instruction at offset -11 is not present for the 0th closure
+entry, since it is the canonical entry point.  Its format depends on
+the value of offset, since the sign-extending forms often suffice.
+
+offset = entry_number * entry_size
+magic = ([TC_COMPILED_ENTRY | 0] - (offset + length_of_CALL_instruction))
+
+*/
+
+#define ASM_RESET_HOOK x86_64_reset_hook
+#define FPE_RESET_TRAPS x86_64_interface_initialize
+
+#define CMPINT_USE_STRUCS 1
+
+/* These next definitions must agree with "cmpauxmd/x86-64.m4", which is
+   where the register block is allocated.  */
+#define COMPILER_REGBLOCK_N_FIXED 16
+/* Big enough to hold 80-bit floating-point value: */
+#define COMPILER_TEMP_SIZE 2
+#define COMPILER_REGBLOCK_N_TEMPS 256
+#define COMPILER_REGBLOCK_N_HOOKS 80
+#define COMPILER_HOOK_SIZE 1
+
+#define COMPILER_REGBLOCK_EXTRA_SIZE					\
+  (COMPILER_REGBLOCK_N_HOOKS * COMPILER_HOOK_SIZE)
+
+#define REGBLOCK_ALLOCATED_BY_INTERFACE true
+
+typedef byte_t insn_t;
+
+/* Number of insn_t units preceding entry address in which header
+   (type and offset info) is stored.  */
+#define CC_ENTRY_HEADER_SIZE (CC_ENTRY_TYPE_SIZE + CC_ENTRY_OFFSET_SIZE)
+#define CC_ENTRY_TYPE_SIZE 2
+#define CC_ENTRY_OFFSET_SIZE 2
+
+/* Number of insn_t units preceding entry header in which GC trap
+   instructions are stored.  */
+#define CC_ENTRY_GC_TRAP_SIZE 3
+
+#define EMBEDDED_CLOSURE_ADDRS_P 1
+
+#define DECLARE_RELOCATION_REFERENCE(name)
+
+#define START_CLOSURE_RELOCATION(scan, ref)	do {} while (0)
+#define START_OPERATOR_RELOCATION(scan, ref)	do {} while (0)
+
+#define OPERATOR_RELOCATION_OFFSET 0
+
+#define READ_COMPILED_CLOSURE_TARGET(a, r)				\
+  read_compiled_closure_target (a)
+
+/* Size of execution cache in SCHEME_OBJECTS.  */
+#define UUO_LINK_SIZE 3
+
+#define UUO_WORDS_TO_COUNT(nw) ((nw) / UUO_LINK_SIZE)
+#define UUO_COUNT_TO_WORDS(nc) ((nc) * UUO_LINK_SIZE)
+
+#define READ_UUO_TARGET(a, r) read_uuo_target (a)
+
+#define FLUSH_I_CACHE() X86_64_CACHE_SYNCHRONIZE ()
+#define FLUSH_I_CACHE_REGION(address, nwords) X86_64_CACHE_SYNCHRONIZE ()
+#define PUSH_D_CACHE_REGION(address, nwords) X86_64_CACHE_SYNCHRONIZE ()
+
+/* [TRC 20091025: The cache synchronization bug does not occur in any
+    x86-64 machines of which I am aware.] */
+
+#define X86_64_CACHE_SYNCHRONIZE() do {} while (0)
+
+/*
+#define X86_64_CACHE_SYNCHRONIZE() do					\
+{									\
+  if (x86_64_cpuid_needed)						\
+    x86_64_cache_synchronize ();					\
+} while (false)
+*/
+
+#if defined(__OS2__) && (defined(__IBMC__) || defined(__WATCOMC__))
+#  define ASM_ENTRY_POINT(name) (_System name)
+#elif defined(__WIN32__) && defined(__WATCOMC__)
+#  define ASM_ENTRY_POINT(name) (__cdecl name)
+#else
+#  define ASM_ENTRY_POINT(name) name
+#endif
+
+extern int ASM_ENTRY_POINT (x86_64_interface_initialize) (void);
+
+extern void asm_assignment_trap (void);
+extern void asm_dont_serialize_cache (void);
+extern void asm_error (void);
+extern void asm_generic_add (void);
+extern void asm_generic_decrement (void);
+extern void asm_generic_divide (void);
+extern void asm_generic_equal (void);
+extern void asm_generic_greater (void);
+extern void asm_generic_increment (void);
+extern void asm_generic_less (void);
+extern void asm_generic_modulo (void);
+extern void asm_generic_multiply (void);
+extern void asm_generic_negative (void);
+extern void asm_generic_positive (void);
+extern void asm_generic_quotient (void);
+extern void asm_generic_remainder (void);
+extern void asm_generic_subtract (void);
+extern void asm_generic_zero (void);
+extern void asm_interrupt_closure (void);
+extern void asm_interrupt_continuation (void);
+extern void asm_interrupt_continuation_2 (void);
+extern void asm_interrupt_dlink (void);
+extern void asm_interrupt_procedure (void);
+extern void asm_link (void);
+extern void asm_nofp_add (void);
+extern void asm_nofp_decrement (void);
+extern void asm_nofp_divide (void);
+extern void asm_nofp_equal (void);
+extern void asm_nofp_greater (void);
+extern void asm_nofp_increment (void);
+extern void asm_nofp_less (void);
+extern void asm_nofp_modulo (void);
+extern void asm_nofp_multiply (void);
+extern void asm_nofp_negative (void);
+extern void asm_nofp_positive (void);
+extern void asm_nofp_quotient (void);
+extern void asm_nofp_remainder (void);
+extern void asm_nofp_subtract (void);
+extern void asm_nofp_zero (void);
+extern void asm_primitive_apply (void);
+extern void asm_primitive_error (void);
+extern void asm_primitive_lexpr_apply (void);
+extern void asm_reference_trap (void);
+extern void asm_safe_reference_trap (void);
+extern void asm_sc_apply (void);
+extern void asm_sc_apply_size_1 (void);
+extern void asm_sc_apply_size_2 (void);
+extern void asm_sc_apply_size_3 (void);
+extern void asm_sc_apply_size_4 (void);
+extern void asm_sc_apply_size_5 (void);
+extern void asm_sc_apply_size_6 (void);
+extern void asm_sc_apply_size_7 (void);
+extern void asm_sc_apply_size_8 (void);
+extern void asm_scheme_to_interface (void);
+extern void asm_scheme_to_interface_call (void);
+extern void asm_serialize_cache (void);
+/* [TRC 20091025: This was an i386 hack for when the PC is not
+   available, which on x86-64 it always is. */
+/* extern void asm_short_primitive_apply (void); */
+extern void asm_trampoline_to_interface (void);
+
+/* extern void x86_64_cache_synchronize (void); */
+/* extern void start_closure_relocation (SCHEME_OBJECT *, reloc_ref_t *); */
+extern insn_t * read_compiled_closure_target (insn_t *);
+/* extern void start_operator_relocation (SCHEME_OBJECT *, reloc_ref_t *); */
+extern insn_t * read_uuo_target (SCHEME_OBJECT *);
+extern void x86_64_reset_hook (void);
+
+extern int x86_64_cpuid_needed;
+
+#endif /* !SCM_CMPINTMD_H_INCLUDED */
diff --git a/src/microcode/confshared.h b/src/microcode/confshared.h
index eaf38be51..be6837643 100644
--- a/src/microcode/confshared.h
+++ b/src/microcode/confshared.h
@@ -225,7 +225,8 @@ typedef enum
   COMPILER_ALPHA_TYPE,
   COMPILER_MIPS_TYPE,
   COMPILER_C_TYPE,
-  COMPILER_SVM_TYPE
+  COMPILER_SVM_TYPE,
+  COMPILER_X86_64_TYPE,
 } cc_arch_t;
 
 #include "cmpintmd-config.h"
@@ -609,6 +610,8 @@ extern void win32_stack_reset (void);
 #ifdef __x86_64__
 #  define MACHINE_TYPE		"x86-64"
 #  define CURRENT_FASL_ARCH	FASL_X86_64
+#  define PC_ZERO_BITS		0
+#  define HEAP_IN_LOW_MEMORY	1
 #endif
 
 #ifdef __ia64__
diff --git a/src/microcode/utabmd.c b/src/microcode/utabmd.c
index 91fcb0d74..bc89addf0 100644
--- a/src/microcode/utabmd.c
+++ b/src/microcode/utabmd.c
@@ -117,6 +117,7 @@ cc_arch_name (void)
     case COMPILER_MIPS_TYPE: return ("mips");
     case COMPILER_C_TYPE: return ("c");
     case COMPILER_SVM_TYPE: return ("svm1");
+    case COMPILER_X86_64_TYPE: return ("x86-64");
     default: return (0);
     }
 }
-- 
2.25.1