--- /dev/null
+### -*-Midas-*-
+###
+### Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993,
+### 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+### 2004, 2005, 2006, 2007, 2008, 2009 Massachusetts Institute of
+### Technology
+###
+### This file is part of MIT/GNU Scheme.
+###
+### MIT/GNU Scheme is free software; you can redistribute it and/or
+### modify it under the terms of the GNU General Public License as
+### published by the Free Software Foundation; either version 2 of the
+### License, or (at your option) any later version.
+###
+### MIT/GNU Scheme is distributed in the hope that it will be useful,
+### but WITHOUT ANY WARRANTY; without even the implied warranty of
+### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+### General Public License for more details.
+###
+### You should have received a copy of the GNU General Public License
+### along with MIT/GNU Scheme; if not, write to the Free Software
+### Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+### 02110-1301, USA.
+\f
+### AMD x86-64 assembly language part of the compiled code interface.
+### See cmpint.txt, cmpint.c, cmpint-mc68k.h, and cmpgc.h for more
+### documentation.
+###
+### This m4 source expands into either Unix (gas) source or PC
+### (masm/wasm) source.
+###
+### NOTE:
+### Assumptions:
+###
+### 0) Segment registers and paging are set up for 64-bit "flat"
+### operation.
+###
+### 1) All registers and stack locations hold a C long object.
+###
+### 2) The C compiler divides registers into three groups:
+### - Linkage registers, used for procedure calls and global
+### references. On AMD64 Unix ABI: %rbp, %rsp.
+### - super temporaries, not preserved accross procedure calls and
+### always usable. On AMD64 Unix ABI: everything but what is
+### listed below.
+### - preserved registers saved by the callee if they are written.
+### On AMD64 Unix ABI: %rbp, %rbx, %r12-%r15, MXCSR, x87 control
+### word.
+###
+### 3) Arguments, if passed on a stack, are popped by the caller
+### or by the procedure return instruction (as on the VAX). Thus
+### most "leaf" procedures need not worry about them. On x86-64,
+### arguments beyond the sixth are passed on the stack; the first
+### through sixth are passed in %rdi, %rsi, %rdx, %rcx, %r8, or
+### %r9. (Non-integer arguments are passed in other ways.)
+###
+### 4) There is a hardware or software maintained stack for
+### control. The procedure calling sequence may leave return
+### addresses in registers, but they must be saved somewhere for
+### nested calls and recursive procedures. On x86-64: saved on
+### the stack by the CALL instruction.
+###
+### 5) C procedures return long values in a super temporary
+### register. Two word structures are returned in super temporary
+### registers as well in the AMD64 Unix ABI: %rax and %rdi.
+###
+### 6) Floating point registers are not preserved by this
+### interface. The interface is only called from the Scheme
+### interpreter, which does not use floating point data. Thus
+### although the calling convention would require us to preserve
+### them, they contain garbage.
+###
+### Compiled Scheme code uses the following register convention:
+### - %rsp containts the Scheme stack pointer, not the C stack
+### pointer.
+### - %rsi contains a pointer to the Scheme interpreter's "register"
+### block. This block contains the compiler's copy of MemTop,
+### the interpreter's registers (val, env, exp, etc.),
+### temporary locations for compiled code, and the addresses
+### of various hooks defined in this file.
+### - %rdi contains the Scheme free pointer.
+### - %rbp contains the Scheme datum mask.
+### The dynamic link (when needed) is in Registers[REGBLOCK_COMPILER_TEMP]
+### Values are returned in Registers[REGBLOCK_VAL]
+### [TRC 20091025: Later, we ought to use machine registers for
+### these.]
+###
+### All other registers are available to the compiler. A
+### caller-saves convention is used, so the registers need not be
+### preserved by subprocedures.
+\f
+### The following m4 macros can be defined to change how this file is
+### expanded.
+###
+### DASM
+### If defined, expand to Intel assembly-language syntax, used by
+### Microsoft assembler (MASM) and Watcom assembler (WASM).
+### Otherwise, expand to AT&T syntax, used by GAS. [TRC 20091025:
+### The Intel syntax probably won't work here.]
+###
+### WIN32
+### If defined, expand to run under Win32; implies DASM.
+###
+### SUPPRESS_LEADING_UNDERSCORE
+### If defined, external symbol names are generated as written;
+### otherwise, they have an underscore prepended to them.
+### WCC386
+### Should be defined when using Watcom assembler.
+### WCC386R
+### Should be defined when using Watcom assembler and generating
+### code to use the Watcom register-based argument conventions.
+### TYPE_CODE_LENGTH
+### Normally defined to be 6. Don't change this unless you know
+### what you're doing.
+### DISABLE_387
+### If defined, do not generate 387 floating-point instructions.
+### VALGRIND_MODE
+### If defined, modify code to make it work with valgrind.
+\f
+#### Utility macros and definitions
+
+ifdef(`WIN32',
+ `define(IF_WIN32,`$1')',
+ `define(IF_WIN32,`')')
+
+ifdef(`DISABLE_387',
+ `define(IF387,`')',
+ `define(IF387,`$1')')
+
+ifdef(`DISABLE_387',
+ `define(IFN387,`$1')',
+ `define(IFN387,`')')
+
+IF_WIN32(`define(DASM,1)')
+ifdef(`WCC386R',`define(WCC386,1)')
+
+ifdef(`DASM',
+ `define(IFDASM,`$1')',
+ `define(IFDASM,`')')
+
+ifdef(`DASM',
+ `define(IFNDASM,`')',
+ `define(IFNDASM,`$1')')
+
+ifdef(`DASM',
+ `define(use_external_data,` extrn $1':dword)',
+ `define(use_external_data,`')')
+
+ifdef(`DASM',
+ `define(use_external_code,` extrn $1':near)',
+ `define(use_external_code,`')')
+
+ifdef(`DASM',
+ `define(export_label,` public $1')',
+ `define(export_label,` .globl $1')')
+
+IFNDASM(` .file "cmpaux-x86-64.s"')
+
+# GAS doesn't implement these, for no obvious reason.
+IFNDASM(`define(pushad,`pusha')')
+IFNDASM(`define(popad,`popa')')
+IFNDASM(`define(pushfd,`pushf')')
+IFNDASM(`define(popfd,`popf')')
+
+ifdef(`SUPPRESS_LEADING_UNDERSCORE',
+ `define(EVR,`$1')',
+ `define(EVR,`_$1')')
+
+# When using the Watcom C compiler with register-based calling
+# conventions, source-code function names normally expand to `FOO_',
+# but functions that are compiled with prefix keywords such as
+# `__cdecl' or `__syscall' expand differently. References to the
+# former type of name are marked with `EFR', while references to the
+# latter are marked with `EPFR'.
+
+ifdef(`SUPPRESS_LEADING_UNDERSCORE',
+ `define(EPFR,`$1')',
+ `define(EPFR,`_$1')')
+
+ifdef(`WCC386R',
+ `define(EFR,`$1_')',
+ `define(EFR,`EPFR($1)')')
+
+define(hook_reference,`EFR(asm_$1)')
+
+define(define_data,`export_label(EVR($1))')
+
+define(define_code_label,`
+export_label($1)
+$1:')
+
+define(define_c_label,`define_code_label(EPFR($1))')
+define(define_debugging_label,`define_code_label($1)')
+define(define_hook_label,`define_code_label(hook_reference($1))')
+
+ifdef(`DASM',
+ `define(DECLARE_DATA_SEGMENT,` .data')',
+ `define(DECLARE_DATA_SEGMENT,` .data')')
+
+ifdef(`DASM',
+ `define(DECLARE_CODE_SEGMENT,` .code')',
+ `define(DECLARE_CODE_SEGMENT,` .text')')
+
+ifdef(`DASM',
+ `define(declare_alignment,` align $1')',
+ `define(declare_alignment,` .align $1')')
+
+ifdef(`DASM',
+ `define(allocate_word,`EVR($1) dw 0')',
+ `define(allocate_word,` .comm EVR($1),2')')
+
+ifdef(`DASM',
+ `define(allocate_longword,`EVR($1) dd 0')',
+ `define(allocate_longword,` .comm EVR($1),4')')
+
+ifdef(`DASM',
+ `define(allocate_quadword,`EVR($1) dq 0')',
+ `define(allocate_quadword,` .comm EVR($1),8')')
+
+ifdef(`DASM',
+ `define(allocate_space,`EVR($1) db $2 dup (0)')',
+ `define(allocate_space,`EVR($1):
+ .space $2')')
+\f
+ifdef(`DASM',
+ `define(HEX, `0$1H')',
+ `define(HEX, `0x$1')')
+
+ifdef(`DASM',
+ `define(OP,`$1$3')',
+ `define(OP,`$1$2')')
+
+ifdef(`DASM',
+ `define(TW,`$2,$1')',
+ `define(TW,`$1,$2')')
+
+ifdef(`DASM',
+ `define(ABS, `dword ptr $1')',
+ `define(ABS, `$1(%rip)')')
+
+ifdef(`DASM',
+ `define(IMM, `$1')',
+ `define(IMM, `$$1')')
+
+ifdef(`DASM',
+ `define(REG,`$1')',
+ `define(REG,`%$1')')
+
+ifdef(`DASM',
+ `define(ST,`st($1)')',
+ `define(ST,`%st ($1)')')
+
+ifdef(`DASM',
+ `define(IND,`dword ptr [$1]')',
+ `define(IND,`($1)')')
+
+ifdef(`DASM',
+ `define(BOF,`byte ptr $1[$2]')',
+ `define(BOF,`$1($2)')')
+
+ifdef(`DASM',
+ `define(WOF,`word ptr $1[$2]')',
+ `define(WOF,`$1($2)')')
+
+ifdef(`DASM',
+ `define(LOF,`dword ptr $1[$2]')',
+ `define(LOF,`$1($2)')')
+
+ifdef(`DASM',
+ `define(DOF,`qword ptr $1[$2]')',
+ `define(DOF,`$1($2)')')
+
+ifdef(`DASM',
+ `define(IDX,`dword ptr [$1] [$2]')',
+ `define(IDX,`($1,$2)')')
+
+ifdef(`DASM',
+ `define(SDX,`dword ptr $1[$2*$3]')',
+ `define(SDX,`$1(,$2,$3)')')
+
+ifdef(`DASM',
+ `define(IJMP,`$1')',
+ `define(IJMP,`*$1')')
+\f
+define(TC_LENGTH, ifdef(`TYPE_CODE_LENGTH', TYPE_CODE_LENGTH, 6))
+define(DATUM_LENGTH, eval(64 - TC_LENGTH))
+define(DATUM_SHIFT, eval(1 << DATUM_LENGTH))
+# This doesn't work because m4 is !@#&$*%^!#!$(%!&*@#^(.
+#define(ADDRESS_MASK, eval(DATUM_SHIFT - 1))
+define(ADDRESS_MASK, HEX(3ffffffffffffff))
+# TAG doesn't work for the same reason.
+#define(TAG, ($2 + ($1 * DATUM_SHIFT)))
+
+define(TC_FALSE,0)
+define(TC_FLONUM,6)
+define(TC_TRUE,8)
+define(TC_FIXNUM,26)
+define(TC_MANIFEST_NM_VECTOR,39)
+define(TC_COMPILED_ENTRY,40)
+
+# TAG doesn't work due to m4 stupidity, so define these magic
+# constants here. These are computed in terms of the parameters
+# above.
+
+define(IMM_MANIFEST_NM_VECTOR_1, `IMM(HEX(9c00000000000001))')
+define(IMM_TRUE, `IMM(HEX(2000000000000000))')
+define(IMM_FALSE, `IMM(HEX(0000000000000000))')
+
+define(REGBLOCK_VAL,16)
+define(REGBLOCK_COMPILER_TEMP,32)
+define(REGBLOCK_LEXPR_ACTUALS,56)
+define(REGBLOCK_PRIMITIVE,64)
+define(REGBLOCK_CLOSURE_FREE,72)
+
+define(REGBLOCK_DLINK,REGBLOCK_COMPILER_TEMP)
+define(REGBLOCK_UTILITY_ARG4,REGBLOCK_CLOSURE_FREE)
+
+define(COMPILER_REGBLOCK_N_FIXED,16)
+define(COMPILER_REGBLOCK_N_HOOKS,80)
+define(COMPILER_REGBLOCK_N_TEMPS,256)
+define(COMPILER_FIXED_SIZE,1)
+define(COMPILER_HOOK_SIZE,1)
+define(COMPILER_TEMP_SIZE,2)
+define(REGBLOCK_SIZE_IN_OBJECTS,
+ eval((COMPILER_REGBLOCK_N_FIXED*COMPILER_FIXED_SIZE)
+ +(COMPILER_REGBLOCK_N_HOOKS*COMPILER_HOOK_SIZE)
+ +(COMPILER_REGBLOCK_N_TEMPS*COMPILER_TEMP_SIZE)))
+
+# Define the floating-point processor control word. Always set
+# round-to-even and double precision. Under Win32, mask all
+# exceptions. Under unix and OS/2, mask only the inexact result
+# exception.
+ifdef(`WIN32',
+ `define(FP_CONTROL_WORD,HEX(023f))',
+ `define(FP_CONTROL_WORD,HEX(0220))')
+
+define(regs,REG(rsi))
+define(rfree,REG(rdi))
+define(rmask,REG(rbp))
+
+IFDASM(`.586p
+.model flat')
+
+DECLARE_DATA_SEGMENT()
+declare_alignment(2)
+
+use_external_data(EVR(Free))
+use_external_data(EVR(stack_pointer))
+use_external_data(EVR(utility_table))
+
+ifdef(`WIN32',`
+use_external_data(EVR(RegistersPtr))
+',`
+define_data(Regstart)
+allocate_space(Regstart,256)
+
+define_data(Registers)
+allocate_space(Registers,eval(REGBLOCK_SIZE_IN_OBJECTS*8))
+')
+
+define_data(i387_presence)
+allocate_quadword(i387_presence)
+
+define_data(C_Stack_Pointer)
+allocate_quadword(C_Stack_Pointer)
+
+define_data(C_Frame_Pointer)
+allocate_quadword(C_Frame_Pointer)
+
+# [TRC 20091025: CPUID is always supported.]
+# define_data(x86_64_cpuid_supported)
+# allocate_quadword(x86_64_cpuid_supported)
+
+# [TRC 20091025: The cache synchronization bug does not occur in any
+# x86-64 machines of which I am aware.]
+# define_data(x86_64_cpuid_needed)
+# allocate_quadword(x86_64_cpuid_needed)
+\f
+DECLARE_CODE_SEGMENT()
+declare_alignment(2)
+
+# [TRC 20091025: We need to check for MMX/SSEn instructions too.]
+
+define_c_label(x86_64_interface_initialize)
+ OP(push,q) REG(rbp)
+ OP(mov,q) TW(REG(rsp),REG(rbp))
+ OP(xor,q) TW(REG(rax),REG(rax)) # No 387 available
+
+# [TRC 20091025: The AMD64 reference manual suggests using the CPUID
+# instruction to detect instruction subsets instead.]
+
+# Unfortunately, the `movl cr0,ecx' instruction is privileged.
+# Use the deprecated `smsw cx' instruction instead.
+
+IF387(`
+# OP(mov,q) TW(REG(cr0),REG(rcx)) # Test for 387 presence
+ifdef(`VALGRIND_MODE',`',`
+ smsw REG(cx)
+ OP(mov,q) TW(IMM(HEX(12)),REG(rdx))
+ OP(and,q) TW(REG(rdx),REG(rcx))
+ OP(cmp,q) TW(REG(rdx),REG(rcx))
+ jne x86_64_initialize_no_fp
+')
+ OP(inc,q) REG(rax) # 387 available
+ OP(sub,q) TW(IMM(8),REG(rsp))
+ fclex
+ fnstcw WOF(-2,REG(rbp))
+ OP(and,w) TW(IMM(HEX(f0e0)),WOF(-2,REG(rbp)))
+ OP(or,w) TW(IMM(FP_CONTROL_WORD),WOF(-2,REG(rbp)))
+ fldcw WOF(-2,REG(rbp))
+x86_64_initialize_no_fp:
+')
+ OP(mov,q) TW(REG(rax),ABS(EVR(i387_presence)))
+
+# [TRC 20091025: CPUID is always supported.]
+
+# Do a bunch of hair to determine if we need to do cache synchronization.
+# See if the CPUID instruction is supported.
+
+# OP(xor,q) TW(REG(rax),REG(rax))
+# OP(mov,q) TW(REG(rax),ABS(EVR(x86_64_cpuid_supported)))
+# OP(mov,q) TW(REG(rax),ABS(EVR(x86_64_cpuid_needed)))
+
+# First test: can we toggle the AC bit?
+
+# pushfd
+# OP(pop,l) REG(eax)
+# OP(mov,l) TW(REG(eax),REG(ecx))
+# OP(xor,l) TW(IMM(HEX(00040000)),REG(eax))
+# OP(push,l) REG(eax)
+# popfd
+# pushfd
+# OP(pop,l) REG(eax)
+
+# if AC bit can't be toggled, this is a 386 (and doesn't support CPUID).
+
+# OP(xor,l) TW(REG(ecx),REG(eax))
+# jz no_cpuid_instr
+# OP(push,l) REG(ecx) # restore EFLAGS
+# popfd
+
+# Now test to see if the ID bit can be toggled.
+
+# OP(mov,l) TW(REG(ecx),REG(eax))
+# OP(xor,l) TW(IMM(HEX(00200000)),REG(eax))
+# OP(push,l) REG(eax)
+# popfd
+# pushfd
+# OP(pop,l) REG(eax)
+
+# if ID bit can't be toggled, this is a 486 that doesn't support CPUID.
+
+# OP(xor,l) TW(REG(ecx),REG(eax))
+# jz no_cpuid_instr
+# OP(push,l) REG(ecx) # restore EFLAGS
+# popfd
+
+# Now we know that cpuid is supported.
+
+# OP(mov,q) TW(IMM(HEX(00000001)),ABS(EVR(x86_64_cpuid_supported)))
+
+# Next, use the CPUID instruction to determine the processor type.
+
+# OP(push,l) REG(ebx)
+# OP(xor,l) TW(REG(eax),REG(eax))
+# cpuid
+
+# Check that CPUID accepts argument 1.
+
+# OP(cmp,l) TW(IMM(HEX(00000001)),REG(eax))
+# jl done_setting_up_cpuid
+
+# Detect "GenuineIntel".
+
+# OP(cmp,l) TW(IMM(HEX(756e6547)),REG(ebx))
+# jne not_intel_cpu
+# OP(cmp,l) TW(IMM(HEX(49656e69)),REG(edx))
+# jne not_intel_cpu
+# OP(cmp,l) TW(IMM(HEX(6c65746e)),REG(ecx))
+# jne not_intel_cpu
+
+# For CPU families 4 (486), 5 (Pentium), or 6 (Pentium Pro, Pentium
+# II, Pentium III), don't use CPUID synchronization.
+
+# OP(mov,l) TW(IMM(HEX(01)),REG(eax))
+# cpuid
+# OP(shr,l) TW(IMM(HEX(08)),REG(eax))
+# OP(and,l) TW(IMM(HEX(0000000F)),REG(eax))
+# OP(cmp,l) TW(IMM(HEX(4)),REG(eax))
+# jl done_setting_up_cpuid
+# OP(cmp,l) TW(IMM(HEX(6)),REG(eax))
+# jg done_setting_up_cpuid
+#
+# jmp cpuid_not_needed
+#
+#not_intel_cpu:
+
+# Detect "AuthenticAMD".
+
+# OP(cmp,l) TW(IMM(HEX(68747541)),REG(ebx))
+# jne not_amd_cpu
+# OP(cmp,l) TW(IMM(HEX(69746e65)),REG(edx))
+# jne not_amd_cpu
+# OP(cmp,l) TW(IMM(HEX(444d4163)),REG(ecx))
+# jne not_amd_cpu
+
+# Problem appears to exist only on Athlon models 1, 3, and 4.
+
+# OP(mov,l) TW(IMM(HEX(01)),REG(eax))
+# cpuid
+
+# OP(mov,l) TW(REG(eax),REG(ecx))
+# OP(shr,l) TW(IMM(HEX(08)),REG(eax))
+# OP(and,l) TW(IMM(HEX(0000000F)),REG(eax))
+# OP(cmp,l) TW(IMM(HEX(6)),REG(eax)) # family 6 = Athlon
+# jne done_setting_up_cpuid
+
+# OP(mov,l) TW(REG(ecx),REG(eax))
+# OP(shr,l) TW(IMM(HEX(04)),REG(eax))
+# OP(and,l) TW(IMM(HEX(0000000F)),REG(eax))
+# OP(cmp,l) TW(IMM(HEX(6)),REG(eax)) # model 6 and up OK
+# jge done_setting_up_cpuid
+# OP(cmp,l) TW(IMM(HEX(2)),REG(eax)) # model 2 OK
+# je done_setting_up_cpuid
+
+# OP(mov,l) TW(IMM(HEX(00000001)),ABS(EVR(x86_64_cpuid_needed)))
+
+#not_amd_cpu:
+#done_setting_up_cpuid:
+# OP(pop,l) REG(ebx)
+#no_cpuid_instr:
+ leave
+ ret
+
+define_c_label(C_to_interface)
+ OP(push,q) REG(rbp) # Link according
+ OP(mov,q) TW(REG(rsp),REG(rbp)) # to C's conventions
+ OP(push,q) REG(rbx) # Save callee-saves
+ OP(push,q) REG(r12) # registers
+ OP(push,q) REG(r13)
+ OP(push,q) REG(r14)
+ OP(push,q) REG(r15)
+ OP(mov,q) TW(REG(rdi),REG(rdx)) # Entry point
+ # Preserve frame ptr
+ OP(mov,q) TW(REG(rbp),ABS(EVR(C_Frame_Pointer)))
+ # Preserve stack ptr
+ OP(mov,q) TW(REG(rsp),ABS(EVR(C_Stack_Pointer)))
+ jmp EPFR(interface_to_scheme)
+
+define_hook_label(trampoline_to_interface)
+define_debugging_label(trampoline_to_interface)
+ OP(pop,q) REG(rcx) # trampoline storage
+ jmp scheme_to_interface
+
+define_hook_label(scheme_to_interface_call)
+define_debugging_label(scheme_to_interface_call)
+ OP(pop,q) REG(rcx) # arg1 = ret. add
+ OP(add,q) TW(IMM(4),REG(rcx)) # Skip format info
+# jmp scheme_to_interface
+\f
+define_hook_label(scheme_to_interface)
+define_debugging_label(scheme_to_interface)
+
+# These two moves must happen _before_ the ffree instructions below.
+# Otherwise recovery from SIGFPE there will fail.
+ OP(mov,q) TW(REG(rsp),ABS(EVR(stack_pointer)))
+ OP(mov,q) TW(rfree,ABS(EVR(Free)))
+
+# [TRC 20091025: I think this should be excised.]
+
+IF387(`
+ OP(cmp,q) TW(IMM(0),ABS(EVR(i387_presence)))
+ je scheme_to_interface_proceed
+ ffree ST(0) # Free floating "regs"
+ ffree ST(1)
+ ffree ST(2)
+ ffree ST(3)
+ ffree ST(4)
+ ffree ST(5)
+ ffree ST(6)
+ ffree ST(7)
+scheme_to_interface_proceed:
+')
+
+ OP(mov,q) TW(ABS(EVR(C_Stack_Pointer)),REG(rsp))
+ OP(mov,q) TW(ABS(EVR(C_Frame_Pointer)),REG(rbp))
+
+ OP(sub,q) TW(IMM(16),REG(rsp)) # alloc struct return
+
+ # Shuffle Scheme -> AMD64 calling conventions:
+ # struct pointer -> rdi
+ # rcx -> rsi
+ # rdx -> rdx
+ # rbx -> rcx
+ # arg4 -> r8
+ # Parallel assignment problems:
+ # arg4 depends on rsi: do arg4->r8 first
+ # target depends on rcx (why?): use r11 as a temporary
+ # [TRC 20091025: Perhaps we can rearrange LIAR to generate
+ # arguments in the registers we want, to avoid this
+ # shuffling.]
+
+ OP(mov,q) TW(REG(rcx),REG(r11))
+
+ OP(xor,q) TW(REG(rcx),REG(rcx))
+ OP(mov,b) TW(REG(al),REG(cl))
+ OP(mov,q) TW(SDX(EVR(utility_table),REG(rcx),8),REG(rax))
+
+ OP(mov,q) TW(REG(rsp),REG(rdi))
+ OP(mov,q) TW(DOF(REGBLOCK_UTILITY_ARG4(),regs),REG(r8))
+ OP(mov,q) TW(REG(r11),REG(rsi))
+ OP(mov,q) TW(REG(rbx),REG(rcx))
+
+ call IJMP(REG(rax))
+
+define_debugging_label(scheme_to_interface_return)
+ OP(pop,q) REG(rax) # pop struct return
+ OP(pop,q) REG(rdx)
+ jmp IJMP(REG(rax)) # Invoke handler
+
+define_c_label(interface_to_scheme)
+IF387(`
+ OP(cmp,q) TW(IMM(0),ABS(EVR(i387_presence)))
+ je interface_to_scheme_proceed
+ ffree ST(0) # Free floating "regs"
+ ffree ST(1)
+ ffree ST(2)
+ ffree ST(3)
+ ffree ST(4)
+ ffree ST(5)
+ ffree ST(6)
+ ffree ST(7)
+interface_to_scheme_proceed:
+')
+ # Register block = %rsi
+ # Scheme offset in NT
+ifdef(`WIN32',
+` OP(mov,q) TW(ABS(EVR(RegistersPtr)),regs)',
+` OP(lea,q) TW(ABS(EVR(Registers)),regs)')
+
+ OP(mov,q) TW(ABS(EVR(Free)),rfree) # Free pointer = %rdi
+ OP(mov,q) TW(DOF(REGBLOCK_VAL(),regs),REG(rax)) # Value/dynamic link
+ OP(mov,q) TW(IMM(ADDRESS_MASK),rmask) # = %rbp
+
+ OP(mov,q) TW(ABS(EVR(stack_pointer)),REG(rsp))
+ OP(mov,q) TW(REG(rax),REG(rcx)) # Preserve if used
+ OP(and,q) TW(rmask,REG(rcx)) # Restore potential dynamic link
+ OP(mov,q) TW(REG(rcx),DOF(REGBLOCK_DLINK(),regs))
+ jmp IJMP(REG(rdx))
+
+IF_WIN32(`
+use_external_code(EFR(WinntExceptionTransferHook))
+define_code_label(EFR(callWinntExceptionTransferHook))
+ call EFR(WinntExceptionTransferHook)
+ mov rdx,rax
+')
+
+define_c_label(interface_to_C)
+IF387(`
+ OP(cmp,q) TW(IMM(0),ABS(EVR(i387_presence)))
+ je interface_to_C_proceed
+ ffree ST(0) # Free floating "regs"
+ ffree ST(1)
+ ffree ST(2)
+ ffree ST(3)
+ ffree ST(4)
+ ffree ST(5)
+ ffree ST(6)
+ ffree ST(7)
+interface_to_C_proceed:')
+
+ OP(mov,q) TW(REG(rdx),REG(rax)) # Set up result
+ OP(pop,q) REG(r15) # Restore callee-saves
+ OP(pop,q) REG(r14) # registers
+ OP(pop,q) REG(r13)
+ OP(pop,q) REG(r12)
+ OP(pop,q) REG(rbx)
+ leave
+ ret
+\f
+# [TRC 20091025: The cache synchronization bug does not occur in any
+# x86-64 machines of which I am aware.]
+
+#define_code_label(EFR(x86_64_cache_synchronize))
+# OP(push,q) REG(rbp)
+# OP(mov,q) TW(REG(rsp),REG(rbp))
+# OP(push,q) REG(rbx)
+# OP(xor,q) TW(REG(rax),REG(rax))
+# cpuid
+# OP(pop,q) REG(rbx)
+# leave
+# ret
+
+### Run the CPUID instruction for serialization.
+
+#define_hook_label(serialize_cache)
+# pushad
+# OP(xor,q) TW(REG(rax),REG(rax))
+# cpuid
+# popad
+# ret
+
+### Stub to be used in place of above on machines that don't need it.
+
+#define_hook_label(dont_serialize_cache)
+# ret
+\f
+### Assembly language hooks used to reduce code size.
+### There is no time advantage to using these over using
+### scheme_to_interface (or scheme_to_interface_call), but the
+### code generated by the compiler can be somewhat smaller.
+
+define(define_jump_indirection,
+`define_hook_label($1)
+ OP(mov,b) TW(IMM(HEX($2)),REG(al))
+ jmp scheme_to_interface')
+
+define(define_call_indirection,
+`define_hook_label($1)
+ OP(mov,b) TW(IMM(HEX($2)),REG(al))
+ jmp scheme_to_interface_call')
+
+define_call_indirection(interrupt_procedure,1a)
+define_call_indirection(interrupt_continuation,1b)
+define_jump_indirection(interrupt_closure,18)
+define_jump_indirection(interrupt_continuation_2,3b)
+
+define_hook_label(interrupt_dlink)
+ OP(mov,q) TW(DOF(REGBLOCK_DLINK(),regs),REG(rdx))
+ OP(mov,b) TW(IMM(HEX(19)),REG(al))
+ jmp scheme_to_interface_call
+
+###
+### This saves even more instructions than primitive_apply
+### When the PC is not available. Instead of jumping here,
+### a call instruction is used, and the longword offset to
+### the primitive object follows the call instruction.
+### This code loads the primitive object and merges with
+### apply_primitive
+###
+### [TRC 20091025: But on the x86-64, we have RIP-relative
+### addressing, so we don't need this.]
+###
+
+#declare_alignment(2)
+#define_hook_label(short_primitive_apply)
+# OP(pop,l) REG(edx) # offset pointer
+# OP(mov,l) TW(IND(REG(edx)),REG(ecx)) # offset
+# # Primitive object
+# OP(mov,l) TW(IDX(REG(edx),REG(ecx)),REG(ecx))
+# # Merge
+# jmp hook_reference(primitive_apply)
+
+declare_alignment(2)
+define_jump_indirection(primitive_apply,12)
+
+define_jump_indirection(primitive_lexpr_apply,13)
+define_jump_indirection(error,15)
+define_call_indirection(link,17)
+define_call_indirection(assignment_trap,1d)
+define_call_indirection(reference_trap,1f)
+define_call_indirection(safe_reference_trap,20)
+define_call_indirection(primitive_error,36)
+\f
+### Assembly language hooks used to increase speed.
+
+# define_jump_indirection(sc_apply,14)
+#
+# define(define_apply_fixed_size,
+# `define_hook_label(sc_apply_size_$1)
+# OP(mov,q) TW(IMM($1),REG(rdx))
+# OP(mov,b) TW(IMM(HEX(14)),REG(al))
+# jmp scheme_to_interface')
+
+declare_alignment(2)
+define_hook_label(sc_apply)
+ OP(mov,q) TW(REG(rcx),REG(rax)) # Copy for type code
+ OP(mov,q) TW(REG(rcx),REG(rbx)) # Copy for address
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) # Select type code
+ OP(and,q) TW(rmask,REG(rbx)) # Select datum
+ OP(cmp,b) TW(IMM(TC_COMPILED_ENTRY),REG(al))
+ jne asm_sc_apply_generic
+ # [TRC 20091025: How big are the frame sizes?]
+ OP(movs,bq,x) TW(BOF(-4,REG(rbx)),REG(rax)) # Extract frame size
+ OP(cmp,q) TW(REG(rax),REG(rdx)) # Compare to nargs+1
+ jne asm_sc_apply_generic
+ jmp IJMP(REG(rbx)) # Invoke
+
+define_debugging_label(asm_sc_apply_generic)
+ OP(mov,q) TW(IMM(HEX(14)),REG(rax))
+ jmp scheme_to_interface
+
+define(define_apply_fixed_size,
+`declare_alignment(2)
+define_hook_label(sc_apply_size_$1)
+ OP(mov,q) TW(REG(rcx),REG(rax)) # Copy for type code
+ OP(mov,q) TW(REG(rcx),REG(rbx)) # Copy for address
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) # Select type code
+ OP(and,q) TW(rmask,REG(rbx)) # Select datum
+ OP(cmp,b) TW(IMM(TC_COMPILED_ENTRY),REG(al))
+ jne asm_sc_apply_generic_$1
+ # [TRC 20091025: How big are the frame sizes?]
+ OP(cmp,b) TW(IMM($1),BOF(-4,REG(rbx))) # Compare frame size
+ jne asm_sc_apply_generic_$1 # to nargs+1
+ jmp IJMP(REG(rbx))
+
+asm_sc_apply_generic_$1:
+ OP(mov,q) TW(IMM($1),REG(rdx))
+ OP(mov,b) TW(IMM(HEX(14)),REG(al))
+ jmp scheme_to_interface')
+
+define_apply_fixed_size(1)
+define_apply_fixed_size(2)
+define_apply_fixed_size(3)
+define_apply_fixed_size(4)
+define_apply_fixed_size(5)
+define_apply_fixed_size(6)
+define_apply_fixed_size(7)
+define_apply_fixed_size(8)
+\f
+### The following code is used by generic arithmetic
+### whether the fixnum case is open-coded in line or not.
+### This takes care of fixnums and flonums so that the common
+### numeric types are much faster than the rare ones
+### (bignums, ratnums, recnums)
+
+IF387(`declare_alignment(2)
+asm_generic_flonum_result:
+ # The MOV instruction can take a 64-bit immediate operand only
+ # if the target is a register, so we store the manifest in rax
+ # before moving it to memory.
+ OP(mov,q) TW(IMM_MANIFEST_NM_VECTOR_1,REG(rax))
+ OP(mov,q) TW(REG(rax), IND(rfree))
+ # The OR instruction cannot take a 64-bit immediate either, so
+ # we need to store the tag in rax first, shift it up, and then
+ # OR the datum into it.
+ OP(mov,q) TW(IMM(TC_FLONUM),REG(rax))
+ OP(shl,q) TW(IMM(DATUM_LENGTH),REG(rax))
+ OP(or,q) TW(rfree,REG(rax))
+ OP(fstp,l) DOF(8,rfree) # fstpd
+ OP(and,q) TW(rmask,IND(REG(rsp)))
+ OP(add,q) TW(IMM(16),rfree)
+ OP(mov,q) TW(REG(rax),DOF(REGBLOCK_VAL(),regs))
+ ret
+
+declare_alignment(2)
+asm_generic_fixnum_result:
+ OP(and,q) TW(rmask,IND(REG(rsp)))
+ OP(or,b) TW(IMM(TC_FIXNUM),REG(al))
+ OP(ror,q) TW(IMM(TC_LENGTH),REG(rax))
+ OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+ ret
+
+declare_alignment(2)
+asm_generic_return_sharp_t:
+ OP(and,q) TW(rmask,IND(REG(rsp)))
+ OP(mov,q) TW(IMM_TRUE,REG(rax))
+ OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+ ret
+
+declare_alignment(2)
+asm_generic_return_sharp_f:
+ OP(and,q) TW(rmask,IND(REG(rsp)))
+ OP(mov,q) TW(IMM_FALSE,REG(rax))
+ OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+ ret')
+\f
+define(define_unary_operation,
+`declare_alignment(2)
+define_hook_label(generic_$1)
+ OP(pop,q) REG(rdx)
+ OP(mov,q) TW(REG(rdx),REG(rax))
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax))
+ OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
+ je asm_generic_$1_fix
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
+ jne asm_generic_$1_fail
+ OP(and,q) TW(rmask,REG(rdx))
+ fld1
+ OP($4,l) DOF(8,REG(rdx))
+ jmp asm_generic_flonum_result
+
+asm_generic_$1_fix:
+ OP(mov,q) TW(REG(rdx),REG(rax))
+ OP(shl,q) TW(IMM(TC_LENGTH),REG(rax))
+ OP($3,q) TW(IMM(eval(1 << TC_LENGTH)),REG(rax))
+ jno asm_generic_fixnum_result
+
+asm_generic_$1_fail:
+ OP(push,q) REG(rdx)
+ OP(mov,b) TW(IMM(HEX($2)),REG(al))
+ jmp scheme_to_interface')
+
+define(define_unary_predicate,
+`declare_alignment(2)
+define_hook_label(generic_$1)
+ OP(pop,q) REG(rdx)
+ OP(mov,q) TW(REG(rdx),REG(rax))
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax))
+ OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
+ je asm_generic_$1_fix
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
+ jne asm_generic_$1_fail
+ OP(and,q) TW(rmask,REG(rdx))
+ OP(fld,l) DOF(8,REG(rdx))
+ ftst
+ fstsw REG(ax)
+ fstp ST(0)
+ sahf
+ $4 asm_generic_return_sharp_t
+ jmp asm_generic_return_sharp_f
+
+asm_generic_$1_fix:
+ OP(mov,q) TW(REG(rdx),REG(rax))
+ OP(shl,q) TW(IMM(TC_LENGTH),REG(rax))
+ OP(cmp,q) TW(IMM(0),REG(rax))
+ $3 asm_generic_return_sharp_t
+ jmp asm_generic_return_sharp_f
+
+asm_generic_$1_fail:
+ OP(push,q) REG(rdx)
+ OP(mov,b) TW(IMM(HEX($2)),REG(al))
+ jmp scheme_to_interface')
+\f
+define(define_binary_operation,
+`declare_alignment(2)
+define_hook_label(generic_$1)
+ OP(pop,q) REG(rdx)
+ OP(pop,q) REG(rbx)
+ OP(mov,q) TW(REG(rdx),REG(rax))
+ OP(mov,q) TW(REG(rbx),REG(rcx))
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax))
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rcx))
+ OP(cmp,b) TW(REG(al),REG(cl))
+ jne asm_generic_$1_fail
+ OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
+ je asm_generic_$1_fix
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
+ je asm_generic_$1_flo
+
+asm_generic_$1_fail:
+ OP(push,q) REG(rbx)
+ OP(push,q) REG(rdx)
+ OP(mov,b) TW(IMM(HEX($2)),REG(al))
+ jmp scheme_to_interface
+
+asm_generic_$1_fix:
+ OP(mov,q) TW(REG(rdx),REG(rax))
+ OP(mov,q) TW(REG(rbx),REG(rcx))
+ OP(shl,q) TW(IMM(TC_LENGTH),REG(rax))
+ OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx))
+ $5
+ OP($3,q) TW(REG(rcx),REG(rax)) # subq
+ jo asm_generic_$1_fail
+ jmp asm_generic_fixnum_result
+
+asm_generic_$1_flo:
+ OP(and,q) TW(rmask,REG(rdx))
+ OP(and,q) TW(rmask,REG(rbx))
+ OP(fld,l) DOF(8,REG(rdx)) # fldd
+ OP($4,l) DOF(8,REG(rbx)) # fsubl
+ jmp asm_generic_flonum_result')
+\f
+IF387(`declare_alignment(2)
+define_hook_label(generic_divide)
+ OP(pop,q) REG(rdx)
+ OP(pop,q) REG(rbx)
+ OP(mov,q) TW(REG(rdx),REG(rax))
+ OP(mov,q) TW(REG(rbx),REG(rcx))
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax))
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rcx))
+ OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
+ je asm_generic_divide_fix
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
+ jne asm_generic_divide_fail
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
+ je asm_generic_divide_flo_flo
+ OP(cmp,b) TW(IMM(TC_FIXNUM),REG(cl))
+ jne asm_generic_divide_fail
+ OP(mov,q) TW(REG(rbx),REG(rcx))
+ OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx))
+ je asm_generic_divide_fail
+ OP(and,q) TW(rmask,REG(rdx))
+ OP(sar,q) TW(IMM(TC_LENGTH),REG(rcx))
+ OP(fld,l) DOF(8,REG(rdx)) # fldd
+ OP(mov,q) TW(REG(rcx),IND(rfree))
+ OP(fidiv,l) IND(rfree)
+ jmp asm_generic_flonum_result
+
+asm_generic_divide_fix:
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
+ jne asm_generic_divide_fail
+ OP(mov,q) TW(REG(rdx),REG(rcx))
+ OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx))
+ je asm_generic_divide_fail
+ OP(and,q) TW(rmask,REG(rbx))
+ OP(sar,q) TW(IMM(TC_LENGTH),REG(rcx))
+ OP(fld,l) DOF(8,REG(rbx)) # fldd
+ OP(mov,q) TW(REG(rcx),IND(rfree))
+ OP(fidivr,l) IND(rfree)
+ jmp asm_generic_flonum_result
+
+asm_generic_divide_flo_flo:
+ OP(mov,q) TW(REG(rbx),REG(rcx))
+ OP(and,q) TW(rmask,REG(rcx))
+ OP(fld,l) DOF(8,REG(rcx)) # fldd
+ ftst
+ fstsw REG(ax)
+ sahf
+ je asm_generic_divide_by_zero
+ OP(and,q) TW(rmask,REG(rdx))
+ OP(fdivr,l) DOF(8,REG(rdx))
+ jmp asm_generic_flonum_result
+
+asm_generic_divide_by_zero:
+ fstp ST(0) # Pop second arg
+
+asm_generic_divide_fail:
+ OP(push,q) REG(rbx)
+ OP(push,q) REG(rdx)
+ OP(mov,b) TW(IMM(HEX(23)),REG(al))
+ jmp scheme_to_interface')
+\f
+define(define_binary_predicate,
+`declare_alignment(2)
+define_hook_label(generic_$1)
+ OP(pop,q) REG(rdx)
+ OP(pop,q) REG(rbx)
+ OP(mov,q) TW(REG(rdx),REG(rax))
+ OP(mov,q) TW(REG(rbx),REG(rcx))
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax))
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rcx))
+ OP(cmp,b) TW(REG(al),REG(cl))
+ jne asm_generic_$1_fail
+ OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
+ jne asm_generic_$1_fail
+
+ OP(shl,q) TW(IMM(TC_LENGTH),REG(rdx))
+ OP(shl,q) TW(IMM(TC_LENGTH),REG(rbx))
+ OP(cmp,q) TW(REG(rbx),REG(rdx))
+ $3 asm_generic_return_sharp_t
+ jmp asm_generic_return_sharp_f
+
+asm_generic_$1_fail:
+ OP(push,q) REG(rbx)
+ OP(push,q) REG(rdx)
+ OP(mov,b) TW(IMM(HEX($2)),REG(al))
+ jmp scheme_to_interface')
+\f
+IF387(`define_unary_operation(decrement,22,sub,fsubr)
+define_unary_operation(increment,26,add,fadd)
+
+define_unary_predicate(negative,2a,jl,jb)
+define_unary_predicate(positive,2c,jg,ja)
+define_unary_predicate(zero,2d,je,je)
+
+# define_binary_operation(name,index,fix*fix,flo*flo, fixup)
+# define_binary_operation( $1, $2, $3, $4, $5)
+# The fixup is optional; only multiplication needs it to shift the
+# result back down by six bits.
+define_binary_operation(add,2b,add,fadd)
+define_binary_operation(subtract,28,sub,fsub)
+define_binary_operation(multiply,29,imul,fmul,
+ `OP(shr,q) TW(IMM(6),REG(rax))')
+# Divide needs to check for 0, so we cant really use the following
+# define_binary_operation(divide,23,NONE,fdiv)
+
+# define_binary_predicate(name,index,fix*fix,flo*flo)
+define_binary_predicate(equal,24,je,je)
+define_binary_predicate(greater,25,jg,ja)
+define_binary_predicate(less,27,jl,jb)')
+
+IFN387(`define_jump_indirection(generic_decrement,22)
+define_jump_indirection(generic_divide,23)
+define_jump_indirection(generic_equal,24)
+define_jump_indirection(generic_greater,25)
+define_jump_indirection(generic_increment,26)
+define_jump_indirection(generic_less,27)
+define_jump_indirection(generic_subtract,28)
+define_jump_indirection(generic_multiply,29)
+define_jump_indirection(generic_negative,2a)
+define_jump_indirection(generic_add,2b)
+define_jump_indirection(generic_positive,2c)
+define_jump_indirection(generic_zero,2d)')
+
+# These don't currently differ according to whether there
+# is a 387 or not.
+
+define_jump_indirection(generic_quotient,37)
+define_jump_indirection(generic_remainder,38)
+define_jump_indirection(generic_modulo,39)
+
+define_jump_indirection(nofp_decrement,22)
+define_jump_indirection(nofp_divide,23)
+define_jump_indirection(nofp_equal,24)
+define_jump_indirection(nofp_greater,25)
+define_jump_indirection(nofp_increment,26)
+define_jump_indirection(nofp_less,27)
+define_jump_indirection(nofp_subtract,28)
+define_jump_indirection(nofp_multiply,29)
+define_jump_indirection(nofp_negative,2a)
+define_jump_indirection(nofp_add,2b)
+define_jump_indirection(nofp_positive,2c)
+define_jump_indirection(nofp_zero,2d)
+define_jump_indirection(nofp_quotient,37)
+define_jump_indirection(nofp_remainder,38)
+define_jump_indirection(nofp_modulo,39)
+\f
+IFDASM(`end')
+
+### Edwin Variables:
+### comment-column: 56
+### comment-start: "#"
+### End:
--- /dev/null
+/* -*-C-*-
+
+Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
+ 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+ 2006, 2007, 2008, 2009 Massachusetts Institute of Technology
+
+This file is part of MIT/GNU Scheme.
+
+MIT/GNU Scheme is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
+
+MIT/GNU Scheme is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MIT/GNU Scheme; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
+USA.
+
+*/
+
+/* Compiled code interface for AMD x86-64. */
+
+#include "cmpint.h"
+#include "extern.h"
+#include "outf.h"
+#include "errors.h"
+
+extern void * tospace_to_newspace (void *);
+\f
+bool
+read_cc_entry_type (cc_entry_type_t * cet, insn_t * address)
+{
+ return (decode_old_style_format_word (cet, (((uint16_t *) address) [-2])));
+}
+
+bool
+write_cc_entry_type (cc_entry_type_t * cet, insn_t * address)
+{
+ return (encode_old_style_format_word (cet, ((uint16_t *) address) - 2));
+}
+
+bool
+read_cc_entry_offset (cc_entry_offset_t * ceo, insn_t * address)
+{
+ uint16_t n = (((uint16_t *) address) [-1]);
+ (ceo->offset) = (n >> 1);
+ (ceo->continued_p) = ((n & 1) != 0);
+ return (false);
+}
+
+bool
+write_cc_entry_offset (cc_entry_offset_t * ceo, insn_t * address)
+{
+ if (! ((ceo->offset) < 0x4000))
+ return (true);
+ (((uint16_t *) address) [-1])
+ = (((ceo->offset) << 1) | ((ceo->continued_p) ? 1 : 0));
+ return (false);
+}
+\f
+/* Compiled closures */
+
+/* MOV RAX,imm64 has two bytes of opcode cruft before the imm64. */
+
+insn_t *
+read_compiled_closure_target (insn_t * start)
+{
+ return (* ((insn_t **) (start + CC_ENTRY_HEADER_SIZE + 2)));
+}
+
+void
+write_compiled_closure_target (insn_t * target, insn_t * start)
+{
+ (* ((insn_t **) (start + CC_ENTRY_HEADER_SIZE + 2))) = target;
+}
+
+unsigned long
+compiled_closure_count (SCHEME_OBJECT * block)
+{
+ /* `block' is a pointer to the first object after the manifest. The
+ first object following it is the entry count. */
+ return ((unsigned long) (* ((uint32_t *) block)));
+}
+
+insn_t *
+compiled_closure_start (SCHEME_OBJECT * block)
+{
+ /* Skip the 32-bit entry count. */
+ return (((insn_t *) block) + 4);
+}
+
+insn_t *
+compiled_closure_entry (insn_t * start)
+{
+ return (start + CC_ENTRY_HEADER_SIZE);
+}
+
+insn_t *
+compiled_closure_next (insn_t * start)
+{
+ return (start + CC_ENTRY_HEADER_SIZE + 12);
+}
+
+SCHEME_OBJECT *
+skip_compiled_closure_padding (insn_t * start)
+{
+ /* The padding is the same size as the entry header (format word). */
+ return ((SCHEME_OBJECT *) (start + CC_ENTRY_HEADER_SIZE));
+}
+
+SCHEME_OBJECT
+compiled_closure_entry_to_target (insn_t * entry)
+{
+ /* `entry' points to the start of the MOV RAX,imm64 instruction,
+ which has two bytes of opcode cruft before the imm64. */
+ return (MAKE_CC_ENTRY (* ((long *) (entry + 2))));
+}
+\f
+/* Execution caches (UUO links)
+
+ An execution cache is a region of memory that lives in the
+ constants section of a compiled-code block. It is an indirection
+ for calling external procedures that allows the linker to control
+ the calling process without having to find and change all the
+ places in the compiled code that refer to it.
+
+ Prior to linking, the execution cache has two pieces of
+ information: (1) the name of the procedure being called (a symbol),
+ and (2) the number of arguments that will be passed to the
+ procedure. `saddr' points to the arity at the beginning of the
+ execution cache. */
+
+SCHEME_OBJECT
+read_uuo_symbol (SCHEME_OBJECT * saddr)
+{
+ return (saddr[1]);
+}
+
+unsigned int
+read_uuo_frame_size (SCHEME_OBJECT * saddr)
+{
+ return (* ((uint16_t *) saddr));
+}
+
+insn_t *
+read_uuo_target (SCHEME_OBJECT * saddr)
+{
+ insn_t * mov_addr = ((insn_t *) (saddr + 1));
+ return (* ((insn_t **) (mov_addr + 2)));
+}
+
+insn_t *
+read_uuo_target_no_reloc (SCHEME_OBJECT * saddr)
+{
+ return (read_uuo_target (saddr));
+}
+
+void
+write_uuo_target (insn_t * target, SCHEME_OBJECT * saddr)
+{
+ /* Skip the arity. */
+ insn_t * addr = ((insn_t *) (saddr + 1));
+ (*addr++) = 0x48; /* REX.W (64-bit operand size prefix) */
+ (*addr++) = 0xB8; /* MOV RAX,imm64 */
+ (* ((insn_t **) addr)) = target;
+ addr += 8;
+ (*addr++) = 0xFF; /* JMP reg/mem64 */
+ (*addr++) = 0xE0; /* ModR/M for RAX */
+}
+\f
+#define BYTES_PER_TRAMPOLINE_ENTRY_PADDING 4
+#define OBJECTS_PER_TRAMPOLINE_ENTRY 2
+
+#define RSI_TRAMPOLINE_TO_INTERFACE_OFFSET \
+ ((COMPILER_REGBLOCK_N_FIXED + (2 * COMPILER_HOOK_SIZE)) \
+ * SIZEOF_SCHEME_OBJECT)
+
+unsigned long
+trampoline_entry_size (unsigned long n_entries)
+{
+ return (n_entries * OBJECTS_PER_TRAMPOLINE_ENTRY);
+}
+
+insn_t *
+trampoline_entry_addr (SCHEME_OBJECT * block, unsigned long index)
+{
+ return (((insn_t *) (block + 2 + (index * OBJECTS_PER_TRAMPOLINE_ENTRY)))
+ + BYTES_PER_TRAMPOLINE_ENTRY_PADDING + CC_ENTRY_HEADER_SIZE);
+}
+
+bool
+store_trampoline_insns (insn_t * entry, byte_t code)
+{
+ (*entry++) = 0xB0; /* MOV AL,code */
+ (*entry++) = code;
+ (*entry++) = 0xFF; /* CALL /2 disp32(ESI) */
+ (*entry++) = 0x96;
+ (* ((uint32_t *) entry)) = RSI_TRAMPOLINE_TO_INTERFACE_OFFSET;
+ X86_64_CACHE_SYNCHRONIZE ();
+ return (false);
+}
+\f
+#ifdef _MACH_UNIX
+# include <mach.h>
+# define VM_PROT_SCHEME (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
+#endif
+
+#define SETUP_REGISTER(hook) do \
+{ \
+ (* ((unsigned long *) (rsi_value + offset))) \
+ = ((unsigned long) (hook)); \
+ offset += (COMPILER_HOOK_SIZE * (sizeof (SCHEME_OBJECT))); \
+ declare_builtin (((unsigned long) hook), #hook); \
+} while (0)
+
+void
+x86_64_reset_hook (void)
+{
+ int offset = (COMPILER_REGBLOCK_N_FIXED * (sizeof (SCHEME_OBJECT)));
+ unsigned char * rsi_value = ((unsigned char *) Registers);
+ bool fp_support_present = (x86_64_interface_initialize ());
+
+ /* These must match machines/x86-64/lapgen.scm */
+
+ SETUP_REGISTER (asm_scheme_to_interface); /* 0 */
+ SETUP_REGISTER (asm_scheme_to_interface_call); /* 1 */
+
+ if (offset != RSI_TRAMPOLINE_TO_INTERFACE_OFFSET)
+ {
+ outf_fatal ("\nx86_64_reset_hook: RSI_TRAMPOLINE_TO_INTERFACE_OFFSET\n");
+ Microcode_Termination (TERM_EXIT);
+ }
+ SETUP_REGISTER (asm_trampoline_to_interface); /* 2 */
+
+ SETUP_REGISTER (asm_interrupt_procedure); /* 3 */
+ SETUP_REGISTER (asm_interrupt_continuation); /* 4 */
+ SETUP_REGISTER (asm_interrupt_closure); /* 5 */
+ SETUP_REGISTER (asm_interrupt_dlink); /* 6 */
+ SETUP_REGISTER (asm_primitive_apply); /* 7 */
+ SETUP_REGISTER (asm_primitive_lexpr_apply); /* 8 */
+ SETUP_REGISTER (asm_assignment_trap); /* 9 */
+ SETUP_REGISTER (asm_reference_trap); /* 10 */
+ SETUP_REGISTER (asm_safe_reference_trap); /* 11 */
+ SETUP_REGISTER (asm_link); /* 12 */
+ SETUP_REGISTER (asm_error); /* 13 */
+ SETUP_REGISTER (asm_primitive_error); /* 14 */
+ /* [TRC 20091025: This was an i386 hack for when the PC is not
+ available, which on x86-64 it always is. */
+ /* SETUP_REGISTER (asm_short_primitive_apply); */ /* 15 */
+
+ /* No more room for positive offsets without going to 32-bit
+ offsets! */
+
+ /* This is a hack to make all the hooks be addressable with byte
+ offsets (instead of longword offsets). The register block
+ extends to negative offsets as well, so all the following hooks
+ are accessed with negative offsets, and all fit in a byte. */
+
+ /* [TRC 20091029: This hack doesn't work any longer; this code
+ should be cleaned up, since we must use longword offsets anyway.]
+ */
+
+ offset = -256;
+ if (fp_support_present)
+ {
+ SETUP_REGISTER (asm_generic_add); /* -32 */
+ SETUP_REGISTER (asm_generic_subtract); /* -31 */
+ SETUP_REGISTER (asm_generic_multiply); /* -30 */
+ SETUP_REGISTER (asm_generic_divide); /* -29 */
+ SETUP_REGISTER (asm_generic_equal); /* -28 */
+ SETUP_REGISTER (asm_generic_less); /* -27 */
+ SETUP_REGISTER (asm_generic_greater); /* -26 */
+ SETUP_REGISTER (asm_generic_increment); /* -25 */
+ SETUP_REGISTER (asm_generic_decrement); /* -24 */
+ SETUP_REGISTER (asm_generic_zero); /* -23 */
+ SETUP_REGISTER (asm_generic_positive); /* -22 */
+ SETUP_REGISTER (asm_generic_negative); /* -21 */
+ SETUP_REGISTER (asm_generic_quotient); /* -20 */
+ SETUP_REGISTER (asm_generic_remainder); /* -19 */
+ SETUP_REGISTER (asm_generic_modulo); /* -18 */
+ }
+ else
+ {
+ SETUP_REGISTER (asm_nofp_add); /* -32 */
+ SETUP_REGISTER (asm_nofp_subtract); /* -31 */
+ SETUP_REGISTER (asm_nofp_multiply); /* -30 */
+ SETUP_REGISTER (asm_nofp_divide); /* -29 */
+ SETUP_REGISTER (asm_nofp_equal); /* -28 */
+ SETUP_REGISTER (asm_nofp_less); /* -27 */
+ SETUP_REGISTER (asm_nofp_greater); /* -26 */
+ SETUP_REGISTER (asm_nofp_increment); /* -25 */
+ SETUP_REGISTER (asm_nofp_decrement); /* -24 */
+ SETUP_REGISTER (asm_nofp_zero); /* -23 */
+ SETUP_REGISTER (asm_nofp_positive); /* -22 */
+ SETUP_REGISTER (asm_nofp_negative); /* -21 */
+ SETUP_REGISTER (asm_nofp_quotient); /* -20 */
+ SETUP_REGISTER (asm_nofp_remainder); /* -19 */
+ SETUP_REGISTER (asm_nofp_modulo); /* -18 */
+ }
+
+ SETUP_REGISTER (asm_sc_apply); /* -17 */
+ SETUP_REGISTER (asm_sc_apply_size_1); /* -16 */
+ SETUP_REGISTER (asm_sc_apply_size_2); /* -15 */
+ SETUP_REGISTER (asm_sc_apply_size_3); /* -14 */
+ SETUP_REGISTER (asm_sc_apply_size_4); /* -13 */
+ SETUP_REGISTER (asm_sc_apply_size_5); /* -12 */
+ SETUP_REGISTER (asm_sc_apply_size_6); /* -11 */
+ SETUP_REGISTER (asm_sc_apply_size_7); /* -10 */
+ SETUP_REGISTER (asm_sc_apply_size_8); /* -9 */
+ SETUP_REGISTER (asm_interrupt_continuation_2); /* -8 */
+ /* [TRC 20091025: The cache synchronization bug does not occur in any
+ x86-64 machines of which I am aware.]
+
+ if (x86_64_cpuid_needed)
+ SETUP_REGISTER (asm_serialize_cache); /\* -7 *\/
+ else
+ SETUP_REGISTER (asm_dont_serialize_cache); /\* -7 *\/
+ */
+
+#ifdef _MACH_UNIX
+ {
+ vm_address_t addr;
+ vm_size_t size;
+ vm_prot_t prot;
+ vm_prot_t max_prot;
+ vm_inherit_t inheritance;
+ boolean_t shared;
+ port_t object;
+ vm_offset_t offset;
+
+ addr = ((vm_address_t) Heap);
+ if ((vm_region ((task_self ()), &addr, &size, &prot, &max_prot,
+ &inheritance, &shared, &object, &offset))
+ != KERN_SUCCESS)
+ {
+ outf_fatal ( "compiler_reset: vm_region() failed.\n");
+ Microcode_Termination (TERM_EXIT);
+ /*NOTREACHED*/
+ }
+ if ((prot & VM_PROT_SCHEME) != VM_PROT_SCHEME)
+ {
+ if ((max_prot & VM_PROT_SCHEME) != VM_PROT_SCHEME)
+ {
+ outf_fatal (
+ "compiler_reset: inadequate protection for Heap.\n");
+ outf_fatal ( "maximum = 0x%lx; desired = 0x%lx\n",
+ ((unsigned long) (max_prot & VM_PROT_SCHEME)),
+ ((unsigned long) VM_PROT_SCHEME));
+ Microcode_Termination (TERM_EXIT);
+ /*NOTREACHED*/
+ }
+ if ((vm_protect ((task_self ()), ((vm_address_t) Heap),
+ (((char *) constant_end) - ((char *) Heap)),
+ 0, VM_PROT_SCHEME))
+ != KERN_SUCCESS)
+ {
+ outf_fatal ("Unable to change protection for Heap.\n");
+ outf_fatal ("actual = 0x%lx; desired = 0x%lx\n",
+ ((unsigned long) (prot & VM_PROT_SCHEME)),
+ ((unsigned long) VM_PROT_SCHEME));
+ Microcode_Termination (TERM_EXIT);
+ /*NOTREACHED*/
+ }
+ }
+ }
+#endif /* _MACH_UNIX */
+}
--- /dev/null
+/* -*-C-*-
+
+Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
+ 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
+ 2006, 2007, 2008, 2009 Massachusetts Institute of Technology
+
+This file is part of MIT/GNU Scheme.
+
+MIT/GNU Scheme is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at
+your option) any later version.
+
+MIT/GNU Scheme is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MIT/GNU Scheme; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
+USA.
+
+*/
+
+/* Compiled code interface macros for AMD x86-64. */
+
+#ifndef SCM_CMPINTMD_H_INCLUDED
+#define SCM_CMPINTMD_H_INCLUDED 1
+\f
+/*
+
+Problems with the AMD x86-64 instruction set architecture
+====================================================
+
+1. Jumps are PC-relative. There are absolute jumps, assuming the PC
+ is in a data location, or with immediate destinations that include
+ a segment descriptor (16 bits). The short forms have a PC-relative
+ offset defined with respect to the immediately following
+ instruction.
+
+Problem: Closures and execute caches need their address in old space
+ in order to be relocated correctly.
+
+Fix:
+
+For execute caches we can define a new linker field, called
+load-relocation-address which on every GC/relocation stores the new
+address and the old contents into global variables and stores the new
+address in the field. Alternatively the difference between the new
+address and the old contents can be stored into a single global
+variable, and this can be used, together with the new address of each
+cache, to find the old code.
+
+For closures the code that reads the header (manifest closure) can do
+the same.
+
+
+2. The CALL and JMP instructions do not accept 64-bit displacements.
+
+Problem: We want heaps bigger than 4 GB.
+
+Fix:
+
+Assemble more than one instruction for closure entry points, expanding
+them even more. Yech.
+
+
+3. The stack pointer register (RSP) cannot be used as the base in
+ (base + displacement) addressing mode.
+
+Problem: Common operation in the compiler, which assumes direct access
+ to the stack.
+
+Fix: Use base + indexed mode, which allows specification of RSP as
+ base and nullification of the index (by using RSP again). This is
+ one byte longer than otherwise, but...
+
+
+Register assignments
+====================
+
+RAX (0) Unassigned
+RCX (1) Unassigned
+RDX (2) Unassigned
+RBX (3) Unassigned
+
+RSP (4) Stack Pointer
+RBP (5) Register Mask
+RSI (6) Pointer to register block, etc.
+RDI (7) Free Pointer
+
+R8-R15 Unassigned
+
+The dynamic link and value "registers" are not processor registers.
+Slots in the register array must be reserved for them.
+[TRC 20091025: Later, we ought to use machine registers for these.]
+
+The Free Pointer is RDI because RDI is the implicit base register for
+the memory-to-memory move instructions, and the string store
+instruction. Perhaps we can make use of it.
+
+The pointer to register block is not held in RBP (the processor's
+"frame" register is typically used) because its most common use, (RBP)
+(address syllable for memory memtop) takes more bytes than (RSI).
+\f
+Encodings and layout of various control features
+================================================
+
+Assumptions:
+
+The processor will be in 64-bit address and operand mode. Thus
+instructions use 64-bit operands, and displacements for addressing
+modes and jump instructions are all 64 bits by default.
+
+ Offset Contents Encoding
+
+
+- Execute cache entry encoding:
+
+ Before linking
+
+ 0 16-bit arity \
+ 2 zero [TC_FIXNUM | arity]
+ 7 0x1A /
+entry 8 symbol
+ 16 <eight bytes of padding>
+ 24 <next cache>
+
+ After linking
+
+ 0 16-bit arity
+ 2 zero
+ 7 0x1A
+entry 8 MOV RAX,imm64 0x48 0xB8
+ 10 <address>
+ 18 JMP (RAX) 0xFF 0xE0
+ 19-23 <four bytes of padding>
+ 24 <next cache>
+
+
+- Closures:
+
+The only reason for a 32-bit entry count is to align everything
+nicely.
+
+ 0 <closure manifest>
+ 8 <entry count>
+ 12 <type/arity info> \__ format word
+ 14 <gc offset> /
+entry0 16 MOV RAX,imm64 0x48 0xB8
+ 18 <address>
+ 26 CALL (RAX) 0xFF 0xD0
+ 28 <four bytes of padding or next format word>
+ ...
+ 16*(n+1) <variables>
+
+
+- Trampoline encoding:
+
+ -8 <padding>
+ -4 <type/arity info>
+ -2 <gc offset>
+entry 0 MOV AL,code 0xB0, code-byte
+ 2 CALL n(RSI) 0xFF 0x96 n-longword
+ 8 <trampoline dependent storage>
+
+
+[TRC 20091027: The next two are wrong; need to update.]
+
+- GC & interrupt check at procedure/continuation entry:
+
+gc_lab -7 CALL n(RSI) 0xFF 0x56 n-byte
+ -4 <type/arity info>
+ -2 <gc offset>
+entry 0 CMP RDI,(RSI) 0x48 0x39 0x3e
+ 3 JAE gc_lab 0x73 -12
+ 5 <real code>
+
+
+- GC & interrupt check at closure entry:
+
+gc_lab -11 ADD (RSP),&offset 0x83 0x04 0x24 offset-byte
+ -7 JMP n(RSI) 0xFF 0x66 n-byte
+ -4 <type/arity info>
+ -2 <gc offset>
+entry 0 ADD (RSP),&magic 0x81 0x04 0x24 magic-longword
+ 7 CMP RDI,(RSI) 0x39 0x3e
+ 9 JAE gc_lab 0x73 0xea (= -22)
+ 11 <real code>
+
+The magic value depends on the closure because of canonicalization.
+
+The ADD instruction at offset -11 is not present for the 0th closure
+entry, since it is the canonical entry point. Its format depends on
+the value of offset, since the sign-extending forms often suffice.
+
+offset = entry_number * entry_size
+magic = ([TC_COMPILED_ENTRY | 0] - (offset + length_of_CALL_instruction))
+
+*/
+\f
+#define ASM_RESET_HOOK x86_64_reset_hook
+#define FPE_RESET_TRAPS x86_64_interface_initialize
+
+#define CMPINT_USE_STRUCS 1
+
+/* These next definitions must agree with "cmpauxmd/x86-64.m4", which is
+ where the register block is allocated. */
+#define COMPILER_REGBLOCK_N_FIXED 16
+/* Big enough to hold 80-bit floating-point value: */
+#define COMPILER_TEMP_SIZE 2
+#define COMPILER_REGBLOCK_N_TEMPS 256
+#define COMPILER_REGBLOCK_N_HOOKS 80
+#define COMPILER_HOOK_SIZE 1
+
+#define COMPILER_REGBLOCK_EXTRA_SIZE \
+ (COMPILER_REGBLOCK_N_HOOKS * COMPILER_HOOK_SIZE)
+
+#define REGBLOCK_ALLOCATED_BY_INTERFACE true
+
+typedef byte_t insn_t;
+
+/* Number of insn_t units preceding entry address in which header
+ (type and offset info) is stored. */
+#define CC_ENTRY_HEADER_SIZE (CC_ENTRY_TYPE_SIZE + CC_ENTRY_OFFSET_SIZE)
+#define CC_ENTRY_TYPE_SIZE 2
+#define CC_ENTRY_OFFSET_SIZE 2
+
+/* Number of insn_t units preceding entry header in which GC trap
+ instructions are stored. */
+#define CC_ENTRY_GC_TRAP_SIZE 3
+\f
+#define EMBEDDED_CLOSURE_ADDRS_P 1
+
+#define DECLARE_RELOCATION_REFERENCE(name)
+
+#define START_CLOSURE_RELOCATION(scan, ref) do {} while (0)
+#define START_OPERATOR_RELOCATION(scan, ref) do {} while (0)
+
+#define OPERATOR_RELOCATION_OFFSET 0
+
+#define READ_COMPILED_CLOSURE_TARGET(a, r) \
+ read_compiled_closure_target (a)
+
+/* Size of execution cache in SCHEME_OBJECTS. */
+#define UUO_LINK_SIZE 3
+
+#define UUO_WORDS_TO_COUNT(nw) ((nw) / UUO_LINK_SIZE)
+#define UUO_COUNT_TO_WORDS(nc) ((nc) * UUO_LINK_SIZE)
+
+#define READ_UUO_TARGET(a, r) read_uuo_target (a)
+
+#define FLUSH_I_CACHE() X86_64_CACHE_SYNCHRONIZE ()
+#define FLUSH_I_CACHE_REGION(address, nwords) X86_64_CACHE_SYNCHRONIZE ()
+#define PUSH_D_CACHE_REGION(address, nwords) X86_64_CACHE_SYNCHRONIZE ()
+
+/* [TRC 20091025: The cache synchronization bug does not occur in any
+ x86-64 machines of which I am aware.] */
+
+#define X86_64_CACHE_SYNCHRONIZE() do {} while (0)
+
+/*
+#define X86_64_CACHE_SYNCHRONIZE() do \
+{ \
+ if (x86_64_cpuid_needed) \
+ x86_64_cache_synchronize (); \
+} while (false)
+*/
+\f
+#if defined(__OS2__) && (defined(__IBMC__) || defined(__WATCOMC__))
+# define ASM_ENTRY_POINT(name) (_System name)
+#elif defined(__WIN32__) && defined(__WATCOMC__)
+# define ASM_ENTRY_POINT(name) (__cdecl name)
+#else
+# define ASM_ENTRY_POINT(name) name
+#endif
+
+extern int ASM_ENTRY_POINT (x86_64_interface_initialize) (void);
+
+extern void asm_assignment_trap (void);
+extern void asm_dont_serialize_cache (void);
+extern void asm_error (void);
+extern void asm_generic_add (void);
+extern void asm_generic_decrement (void);
+extern void asm_generic_divide (void);
+extern void asm_generic_equal (void);
+extern void asm_generic_greater (void);
+extern void asm_generic_increment (void);
+extern void asm_generic_less (void);
+extern void asm_generic_modulo (void);
+extern void asm_generic_multiply (void);
+extern void asm_generic_negative (void);
+extern void asm_generic_positive (void);
+extern void asm_generic_quotient (void);
+extern void asm_generic_remainder (void);
+extern void asm_generic_subtract (void);
+extern void asm_generic_zero (void);
+extern void asm_interrupt_closure (void);
+extern void asm_interrupt_continuation (void);
+extern void asm_interrupt_continuation_2 (void);
+extern void asm_interrupt_dlink (void);
+extern void asm_interrupt_procedure (void);
+extern void asm_link (void);
+extern void asm_nofp_add (void);
+extern void asm_nofp_decrement (void);
+extern void asm_nofp_divide (void);
+extern void asm_nofp_equal (void);
+extern void asm_nofp_greater (void);
+extern void asm_nofp_increment (void);
+extern void asm_nofp_less (void);
+extern void asm_nofp_modulo (void);
+extern void asm_nofp_multiply (void);
+extern void asm_nofp_negative (void);
+extern void asm_nofp_positive (void);
+extern void asm_nofp_quotient (void);
+extern void asm_nofp_remainder (void);
+extern void asm_nofp_subtract (void);
+extern void asm_nofp_zero (void);
+extern void asm_primitive_apply (void);
+extern void asm_primitive_error (void);
+extern void asm_primitive_lexpr_apply (void);
+extern void asm_reference_trap (void);
+extern void asm_safe_reference_trap (void);
+extern void asm_sc_apply (void);
+extern void asm_sc_apply_size_1 (void);
+extern void asm_sc_apply_size_2 (void);
+extern void asm_sc_apply_size_3 (void);
+extern void asm_sc_apply_size_4 (void);
+extern void asm_sc_apply_size_5 (void);
+extern void asm_sc_apply_size_6 (void);
+extern void asm_sc_apply_size_7 (void);
+extern void asm_sc_apply_size_8 (void);
+extern void asm_scheme_to_interface (void);
+extern void asm_scheme_to_interface_call (void);
+extern void asm_serialize_cache (void);
+/* [TRC 20091025: This was an i386 hack for when the PC is not
+ available, which on x86-64 it always is. */
+/* extern void asm_short_primitive_apply (void); */
+extern void asm_trampoline_to_interface (void);
+
+/* extern void x86_64_cache_synchronize (void); */
+/* extern void start_closure_relocation (SCHEME_OBJECT *, reloc_ref_t *); */
+extern insn_t * read_compiled_closure_target (insn_t *);
+/* extern void start_operator_relocation (SCHEME_OBJECT *, reloc_ref_t *); */
+extern insn_t * read_uuo_target (SCHEME_OBJECT *);
+extern void x86_64_reset_hook (void);
+
+extern int x86_64_cpuid_needed;
+
+#endif /* !SCM_CMPINTMD_H_INCLUDED */