From e98f84f068f8661264db0b182ec87544a6833268 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 30 Oct 2009 18:14:32 -0400 Subject: [PATCH] Implement microcode auxiliaries for AMD x86-64 compiled code. --- src/microcode/cmpauxmd/x86-64.m4 | 1114 ++++++++++++++++++++++++ src/microcode/cmpintmd/x86-64-config.h | 32 + src/microcode/cmpintmd/x86-64.c | 372 ++++++++ src/microcode/cmpintmd/x86-64.h | 351 ++++++++ src/microcode/confshared.h | 5 +- src/microcode/utabmd.c | 1 + 6 files changed, 1874 insertions(+), 1 deletion(-) create mode 100644 src/microcode/cmpauxmd/x86-64.m4 create mode 100644 src/microcode/cmpintmd/x86-64-config.h create mode 100644 src/microcode/cmpintmd/x86-64.c create mode 100644 src/microcode/cmpintmd/x86-64.h diff --git a/src/microcode/cmpauxmd/x86-64.m4 b/src/microcode/cmpauxmd/x86-64.m4 new file mode 100644 index 000000000..fbf7417e3 --- /dev/null +++ b/src/microcode/cmpauxmd/x86-64.m4 @@ -0,0 +1,1114 @@ +### -*-Midas-*- +### +### Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, +### 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, +### 2004, 2005, 2006, 2007, 2008, 2009 Massachusetts Institute of +### Technology +### +### This file is part of MIT/GNU Scheme. +### +### MIT/GNU Scheme is free software; you can redistribute it and/or +### modify it under the terms of the GNU General Public License as +### published by the Free Software Foundation; either version 2 of the +### License, or (at your option) any later version. +### +### MIT/GNU Scheme is distributed in the hope that it will be useful, +### but WITHOUT ANY WARRANTY; without even the implied warranty of +### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +### General Public License for more details. +### +### You should have received a copy of the GNU General Public License +### along with MIT/GNU Scheme; if not, write to the Free Software +### Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +### 02110-1301, USA. + +### AMD x86-64 assembly language part of the compiled code interface. +### See cmpint.txt, cmpint.c, cmpint-mc68k.h, and cmpgc.h for more +### documentation. +### +### This m4 source expands into either Unix (gas) source or PC +### (masm/wasm) source. +### +### NOTE: +### Assumptions: +### +### 0) Segment registers and paging are set up for 64-bit "flat" +### operation. +### +### 1) All registers and stack locations hold a C long object. +### +### 2) The C compiler divides registers into three groups: +### - Linkage registers, used for procedure calls and global +### references. On AMD64 Unix ABI: %rbp, %rsp. +### - super temporaries, not preserved accross procedure calls and +### always usable. On AMD64 Unix ABI: everything but what is +### listed below. +### - preserved registers saved by the callee if they are written. +### On AMD64 Unix ABI: %rbp, %rbx, %r12-%r15, MXCSR, x87 control +### word. +### +### 3) Arguments, if passed on a stack, are popped by the caller +### or by the procedure return instruction (as on the VAX). Thus +### most "leaf" procedures need not worry about them. On x86-64, +### arguments beyond the sixth are passed on the stack; the first +### through sixth are passed in %rdi, %rsi, %rdx, %rcx, %r8, or +### %r9. (Non-integer arguments are passed in other ways.) +### +### 4) There is a hardware or software maintained stack for +### control. The procedure calling sequence may leave return +### addresses in registers, but they must be saved somewhere for +### nested calls and recursive procedures. On x86-64: saved on +### the stack by the CALL instruction. +### +### 5) C procedures return long values in a super temporary +### register. Two word structures are returned in super temporary +### registers as well in the AMD64 Unix ABI: %rax and %rdi. +### +### 6) Floating point registers are not preserved by this +### interface. The interface is only called from the Scheme +### interpreter, which does not use floating point data. Thus +### although the calling convention would require us to preserve +### them, they contain garbage. +### +### Compiled Scheme code uses the following register convention: +### - %rsp containts the Scheme stack pointer, not the C stack +### pointer. +### - %rsi contains a pointer to the Scheme interpreter's "register" +### block. This block contains the compiler's copy of MemTop, +### the interpreter's registers (val, env, exp, etc.), +### temporary locations for compiled code, and the addresses +### of various hooks defined in this file. +### - %rdi contains the Scheme free pointer. +### - %rbp contains the Scheme datum mask. +### The dynamic link (when needed) is in Registers[REGBLOCK_COMPILER_TEMP] +### Values are returned in Registers[REGBLOCK_VAL] +### [TRC 20091025: Later, we ought to use machine registers for +### these.] +### +### All other registers are available to the compiler. A +### caller-saves convention is used, so the registers need not be +### preserved by subprocedures. + +### The following m4 macros can be defined to change how this file is +### expanded. +### +### DASM +### If defined, expand to Intel assembly-language syntax, used by +### Microsoft assembler (MASM) and Watcom assembler (WASM). +### Otherwise, expand to AT&T syntax, used by GAS. [TRC 20091025: +### The Intel syntax probably won't work here.] +### +### WIN32 +### If defined, expand to run under Win32; implies DASM. +### +### SUPPRESS_LEADING_UNDERSCORE +### If defined, external symbol names are generated as written; +### otherwise, they have an underscore prepended to them. +### WCC386 +### Should be defined when using Watcom assembler. +### WCC386R +### Should be defined when using Watcom assembler and generating +### code to use the Watcom register-based argument conventions. +### TYPE_CODE_LENGTH +### Normally defined to be 6. Don't change this unless you know +### what you're doing. +### DISABLE_387 +### If defined, do not generate 387 floating-point instructions. +### VALGRIND_MODE +### If defined, modify code to make it work with valgrind. + +#### Utility macros and definitions + +ifdef(`WIN32', + `define(IF_WIN32,`$1')', + `define(IF_WIN32,`')') + +ifdef(`DISABLE_387', + `define(IF387,`')', + `define(IF387,`$1')') + +ifdef(`DISABLE_387', + `define(IFN387,`$1')', + `define(IFN387,`')') + +IF_WIN32(`define(DASM,1)') +ifdef(`WCC386R',`define(WCC386,1)') + +ifdef(`DASM', + `define(IFDASM,`$1')', + `define(IFDASM,`')') + +ifdef(`DASM', + `define(IFNDASM,`')', + `define(IFNDASM,`$1')') + +ifdef(`DASM', + `define(use_external_data,` extrn $1':dword)', + `define(use_external_data,`')') + +ifdef(`DASM', + `define(use_external_code,` extrn $1':near)', + `define(use_external_code,`')') + +ifdef(`DASM', + `define(export_label,` public $1')', + `define(export_label,` .globl $1')') + +IFNDASM(` .file "cmpaux-x86-64.s"') + +# GAS doesn't implement these, for no obvious reason. +IFNDASM(`define(pushad,`pusha')') +IFNDASM(`define(popad,`popa')') +IFNDASM(`define(pushfd,`pushf')') +IFNDASM(`define(popfd,`popf')') + +ifdef(`SUPPRESS_LEADING_UNDERSCORE', + `define(EVR,`$1')', + `define(EVR,`_$1')') + +# When using the Watcom C compiler with register-based calling +# conventions, source-code function names normally expand to `FOO_', +# but functions that are compiled with prefix keywords such as +# `__cdecl' or `__syscall' expand differently. References to the +# former type of name are marked with `EFR', while references to the +# latter are marked with `EPFR'. + +ifdef(`SUPPRESS_LEADING_UNDERSCORE', + `define(EPFR,`$1')', + `define(EPFR,`_$1')') + +ifdef(`WCC386R', + `define(EFR,`$1_')', + `define(EFR,`EPFR($1)')') + +define(hook_reference,`EFR(asm_$1)') + +define(define_data,`export_label(EVR($1))') + +define(define_code_label,` +export_label($1) +$1:') + +define(define_c_label,`define_code_label(EPFR($1))') +define(define_debugging_label,`define_code_label($1)') +define(define_hook_label,`define_code_label(hook_reference($1))') + +ifdef(`DASM', + `define(DECLARE_DATA_SEGMENT,` .data')', + `define(DECLARE_DATA_SEGMENT,` .data')') + +ifdef(`DASM', + `define(DECLARE_CODE_SEGMENT,` .code')', + `define(DECLARE_CODE_SEGMENT,` .text')') + +ifdef(`DASM', + `define(declare_alignment,` align $1')', + `define(declare_alignment,` .align $1')') + +ifdef(`DASM', + `define(allocate_word,`EVR($1) dw 0')', + `define(allocate_word,` .comm EVR($1),2')') + +ifdef(`DASM', + `define(allocate_longword,`EVR($1) dd 0')', + `define(allocate_longword,` .comm EVR($1),4')') + +ifdef(`DASM', + `define(allocate_quadword,`EVR($1) dq 0')', + `define(allocate_quadword,` .comm EVR($1),8')') + +ifdef(`DASM', + `define(allocate_space,`EVR($1) db $2 dup (0)')', + `define(allocate_space,`EVR($1): + .space $2')') + +ifdef(`DASM', + `define(HEX, `0$1H')', + `define(HEX, `0x$1')') + +ifdef(`DASM', + `define(OP,`$1$3')', + `define(OP,`$1$2')') + +ifdef(`DASM', + `define(TW,`$2,$1')', + `define(TW,`$1,$2')') + +ifdef(`DASM', + `define(ABS, `dword ptr $1')', + `define(ABS, `$1(%rip)')') + +ifdef(`DASM', + `define(IMM, `$1')', + `define(IMM, `$$1')') + +ifdef(`DASM', + `define(REG,`$1')', + `define(REG,`%$1')') + +ifdef(`DASM', + `define(ST,`st($1)')', + `define(ST,`%st ($1)')') + +ifdef(`DASM', + `define(IND,`dword ptr [$1]')', + `define(IND,`($1)')') + +ifdef(`DASM', + `define(BOF,`byte ptr $1[$2]')', + `define(BOF,`$1($2)')') + +ifdef(`DASM', + `define(WOF,`word ptr $1[$2]')', + `define(WOF,`$1($2)')') + +ifdef(`DASM', + `define(LOF,`dword ptr $1[$2]')', + `define(LOF,`$1($2)')') + +ifdef(`DASM', + `define(DOF,`qword ptr $1[$2]')', + `define(DOF,`$1($2)')') + +ifdef(`DASM', + `define(IDX,`dword ptr [$1] [$2]')', + `define(IDX,`($1,$2)')') + +ifdef(`DASM', + `define(SDX,`dword ptr $1[$2*$3]')', + `define(SDX,`$1(,$2,$3)')') + +ifdef(`DASM', + `define(IJMP,`$1')', + `define(IJMP,`*$1')') + +define(TC_LENGTH, ifdef(`TYPE_CODE_LENGTH', TYPE_CODE_LENGTH, 6)) +define(DATUM_LENGTH, eval(64 - TC_LENGTH)) +define(DATUM_SHIFT, eval(1 << DATUM_LENGTH)) +# This doesn't work because m4 is !@#&$*%^!#!$(%!&*@#^(. +#define(ADDRESS_MASK, eval(DATUM_SHIFT - 1)) +define(ADDRESS_MASK, HEX(3ffffffffffffff)) +# TAG doesn't work for the same reason. +#define(TAG, ($2 + ($1 * DATUM_SHIFT))) + +define(TC_FALSE,0) +define(TC_FLONUM,6) +define(TC_TRUE,8) +define(TC_FIXNUM,26) +define(TC_MANIFEST_NM_VECTOR,39) +define(TC_COMPILED_ENTRY,40) + +# TAG doesn't work due to m4 stupidity, so define these magic +# constants here. These are computed in terms of the parameters +# above. + +define(IMM_MANIFEST_NM_VECTOR_1, `IMM(HEX(9c00000000000001))') +define(IMM_TRUE, `IMM(HEX(2000000000000000))') +define(IMM_FALSE, `IMM(HEX(0000000000000000))') + +define(REGBLOCK_VAL,16) +define(REGBLOCK_COMPILER_TEMP,32) +define(REGBLOCK_LEXPR_ACTUALS,56) +define(REGBLOCK_PRIMITIVE,64) +define(REGBLOCK_CLOSURE_FREE,72) + +define(REGBLOCK_DLINK,REGBLOCK_COMPILER_TEMP) +define(REGBLOCK_UTILITY_ARG4,REGBLOCK_CLOSURE_FREE) + +define(COMPILER_REGBLOCK_N_FIXED,16) +define(COMPILER_REGBLOCK_N_HOOKS,80) +define(COMPILER_REGBLOCK_N_TEMPS,256) +define(COMPILER_FIXED_SIZE,1) +define(COMPILER_HOOK_SIZE,1) +define(COMPILER_TEMP_SIZE,2) +define(REGBLOCK_SIZE_IN_OBJECTS, + eval((COMPILER_REGBLOCK_N_FIXED*COMPILER_FIXED_SIZE) + +(COMPILER_REGBLOCK_N_HOOKS*COMPILER_HOOK_SIZE) + +(COMPILER_REGBLOCK_N_TEMPS*COMPILER_TEMP_SIZE))) + +# Define the floating-point processor control word. Always set +# round-to-even and double precision. Under Win32, mask all +# exceptions. Under unix and OS/2, mask only the inexact result +# exception. +ifdef(`WIN32', + `define(FP_CONTROL_WORD,HEX(023f))', + `define(FP_CONTROL_WORD,HEX(0220))') + +define(regs,REG(rsi)) +define(rfree,REG(rdi)) +define(rmask,REG(rbp)) + +IFDASM(`.586p +.model flat') + +DECLARE_DATA_SEGMENT() +declare_alignment(2) + +use_external_data(EVR(Free)) +use_external_data(EVR(stack_pointer)) +use_external_data(EVR(utility_table)) + +ifdef(`WIN32',` +use_external_data(EVR(RegistersPtr)) +',` +define_data(Regstart) +allocate_space(Regstart,256) + +define_data(Registers) +allocate_space(Registers,eval(REGBLOCK_SIZE_IN_OBJECTS*8)) +') + +define_data(i387_presence) +allocate_quadword(i387_presence) + +define_data(C_Stack_Pointer) +allocate_quadword(C_Stack_Pointer) + +define_data(C_Frame_Pointer) +allocate_quadword(C_Frame_Pointer) + +# [TRC 20091025: CPUID is always supported.] +# define_data(x86_64_cpuid_supported) +# allocate_quadword(x86_64_cpuid_supported) + +# [TRC 20091025: The cache synchronization bug does not occur in any +# x86-64 machines of which I am aware.] +# define_data(x86_64_cpuid_needed) +# allocate_quadword(x86_64_cpuid_needed) + +DECLARE_CODE_SEGMENT() +declare_alignment(2) + +# [TRC 20091025: We need to check for MMX/SSEn instructions too.] + +define_c_label(x86_64_interface_initialize) + OP(push,q) REG(rbp) + OP(mov,q) TW(REG(rsp),REG(rbp)) + OP(xor,q) TW(REG(rax),REG(rax)) # No 387 available + +# [TRC 20091025: The AMD64 reference manual suggests using the CPUID +# instruction to detect instruction subsets instead.] + +# Unfortunately, the `movl cr0,ecx' instruction is privileged. +# Use the deprecated `smsw cx' instruction instead. + +IF387(` +# OP(mov,q) TW(REG(cr0),REG(rcx)) # Test for 387 presence +ifdef(`VALGRIND_MODE',`',` + smsw REG(cx) + OP(mov,q) TW(IMM(HEX(12)),REG(rdx)) + OP(and,q) TW(REG(rdx),REG(rcx)) + OP(cmp,q) TW(REG(rdx),REG(rcx)) + jne x86_64_initialize_no_fp +') + OP(inc,q) REG(rax) # 387 available + OP(sub,q) TW(IMM(8),REG(rsp)) + fclex + fnstcw WOF(-2,REG(rbp)) + OP(and,w) TW(IMM(HEX(f0e0)),WOF(-2,REG(rbp))) + OP(or,w) TW(IMM(FP_CONTROL_WORD),WOF(-2,REG(rbp))) + fldcw WOF(-2,REG(rbp)) +x86_64_initialize_no_fp: +') + OP(mov,q) TW(REG(rax),ABS(EVR(i387_presence))) + +# [TRC 20091025: CPUID is always supported.] + +# Do a bunch of hair to determine if we need to do cache synchronization. +# See if the CPUID instruction is supported. + +# OP(xor,q) TW(REG(rax),REG(rax)) +# OP(mov,q) TW(REG(rax),ABS(EVR(x86_64_cpuid_supported))) +# OP(mov,q) TW(REG(rax),ABS(EVR(x86_64_cpuid_needed))) + +# First test: can we toggle the AC bit? + +# pushfd +# OP(pop,l) REG(eax) +# OP(mov,l) TW(REG(eax),REG(ecx)) +# OP(xor,l) TW(IMM(HEX(00040000)),REG(eax)) +# OP(push,l) REG(eax) +# popfd +# pushfd +# OP(pop,l) REG(eax) + +# if AC bit can't be toggled, this is a 386 (and doesn't support CPUID). + +# OP(xor,l) TW(REG(ecx),REG(eax)) +# jz no_cpuid_instr +# OP(push,l) REG(ecx) # restore EFLAGS +# popfd + +# Now test to see if the ID bit can be toggled. + +# OP(mov,l) TW(REG(ecx),REG(eax)) +# OP(xor,l) TW(IMM(HEX(00200000)),REG(eax)) +# OP(push,l) REG(eax) +# popfd +# pushfd +# OP(pop,l) REG(eax) + +# if ID bit can't be toggled, this is a 486 that doesn't support CPUID. + +# OP(xor,l) TW(REG(ecx),REG(eax)) +# jz no_cpuid_instr +# OP(push,l) REG(ecx) # restore EFLAGS +# popfd + +# Now we know that cpuid is supported. + +# OP(mov,q) TW(IMM(HEX(00000001)),ABS(EVR(x86_64_cpuid_supported))) + +# Next, use the CPUID instruction to determine the processor type. + +# OP(push,l) REG(ebx) +# OP(xor,l) TW(REG(eax),REG(eax)) +# cpuid + +# Check that CPUID accepts argument 1. + +# OP(cmp,l) TW(IMM(HEX(00000001)),REG(eax)) +# jl done_setting_up_cpuid + +# Detect "GenuineIntel". + +# OP(cmp,l) TW(IMM(HEX(756e6547)),REG(ebx)) +# jne not_intel_cpu +# OP(cmp,l) TW(IMM(HEX(49656e69)),REG(edx)) +# jne not_intel_cpu +# OP(cmp,l) TW(IMM(HEX(6c65746e)),REG(ecx)) +# jne not_intel_cpu + +# For CPU families 4 (486), 5 (Pentium), or 6 (Pentium Pro, Pentium +# II, Pentium III), don't use CPUID synchronization. + +# OP(mov,l) TW(IMM(HEX(01)),REG(eax)) +# cpuid +# OP(shr,l) TW(IMM(HEX(08)),REG(eax)) +# OP(and,l) TW(IMM(HEX(0000000F)),REG(eax)) +# OP(cmp,l) TW(IMM(HEX(4)),REG(eax)) +# jl done_setting_up_cpuid +# OP(cmp,l) TW(IMM(HEX(6)),REG(eax)) +# jg done_setting_up_cpuid +# +# jmp cpuid_not_needed +# +#not_intel_cpu: + +# Detect "AuthenticAMD". + +# OP(cmp,l) TW(IMM(HEX(68747541)),REG(ebx)) +# jne not_amd_cpu +# OP(cmp,l) TW(IMM(HEX(69746e65)),REG(edx)) +# jne not_amd_cpu +# OP(cmp,l) TW(IMM(HEX(444d4163)),REG(ecx)) +# jne not_amd_cpu + +# Problem appears to exist only on Athlon models 1, 3, and 4. + +# OP(mov,l) TW(IMM(HEX(01)),REG(eax)) +# cpuid + +# OP(mov,l) TW(REG(eax),REG(ecx)) +# OP(shr,l) TW(IMM(HEX(08)),REG(eax)) +# OP(and,l) TW(IMM(HEX(0000000F)),REG(eax)) +# OP(cmp,l) TW(IMM(HEX(6)),REG(eax)) # family 6 = Athlon +# jne done_setting_up_cpuid + +# OP(mov,l) TW(REG(ecx),REG(eax)) +# OP(shr,l) TW(IMM(HEX(04)),REG(eax)) +# OP(and,l) TW(IMM(HEX(0000000F)),REG(eax)) +# OP(cmp,l) TW(IMM(HEX(6)),REG(eax)) # model 6 and up OK +# jge done_setting_up_cpuid +# OP(cmp,l) TW(IMM(HEX(2)),REG(eax)) # model 2 OK +# je done_setting_up_cpuid + +# OP(mov,l) TW(IMM(HEX(00000001)),ABS(EVR(x86_64_cpuid_needed))) + +#not_amd_cpu: +#done_setting_up_cpuid: +# OP(pop,l) REG(ebx) +#no_cpuid_instr: + leave + ret + +define_c_label(C_to_interface) + OP(push,q) REG(rbp) # Link according + OP(mov,q) TW(REG(rsp),REG(rbp)) # to C's conventions + OP(push,q) REG(rbx) # Save callee-saves + OP(push,q) REG(r12) # registers + OP(push,q) REG(r13) + OP(push,q) REG(r14) + OP(push,q) REG(r15) + OP(mov,q) TW(REG(rdi),REG(rdx)) # Entry point + # Preserve frame ptr + OP(mov,q) TW(REG(rbp),ABS(EVR(C_Frame_Pointer))) + # Preserve stack ptr + OP(mov,q) TW(REG(rsp),ABS(EVR(C_Stack_Pointer))) + jmp EPFR(interface_to_scheme) + +define_hook_label(trampoline_to_interface) +define_debugging_label(trampoline_to_interface) + OP(pop,q) REG(rcx) # trampoline storage + jmp scheme_to_interface + +define_hook_label(scheme_to_interface_call) +define_debugging_label(scheme_to_interface_call) + OP(pop,q) REG(rcx) # arg1 = ret. add + OP(add,q) TW(IMM(4),REG(rcx)) # Skip format info +# jmp scheme_to_interface + +define_hook_label(scheme_to_interface) +define_debugging_label(scheme_to_interface) + +# These two moves must happen _before_ the ffree instructions below. +# Otherwise recovery from SIGFPE there will fail. + OP(mov,q) TW(REG(rsp),ABS(EVR(stack_pointer))) + OP(mov,q) TW(rfree,ABS(EVR(Free))) + +# [TRC 20091025: I think this should be excised.] + +IF387(` + OP(cmp,q) TW(IMM(0),ABS(EVR(i387_presence))) + je scheme_to_interface_proceed + ffree ST(0) # Free floating "regs" + ffree ST(1) + ffree ST(2) + ffree ST(3) + ffree ST(4) + ffree ST(5) + ffree ST(6) + ffree ST(7) +scheme_to_interface_proceed: +') + + OP(mov,q) TW(ABS(EVR(C_Stack_Pointer)),REG(rsp)) + OP(mov,q) TW(ABS(EVR(C_Frame_Pointer)),REG(rbp)) + + OP(sub,q) TW(IMM(16),REG(rsp)) # alloc struct return + + # Shuffle Scheme -> AMD64 calling conventions: + # struct pointer -> rdi + # rcx -> rsi + # rdx -> rdx + # rbx -> rcx + # arg4 -> r8 + # Parallel assignment problems: + # arg4 depends on rsi: do arg4->r8 first + # target depends on rcx (why?): use r11 as a temporary + # [TRC 20091025: Perhaps we can rearrange LIAR to generate + # arguments in the registers we want, to avoid this + # shuffling.] + + OP(mov,q) TW(REG(rcx),REG(r11)) + + OP(xor,q) TW(REG(rcx),REG(rcx)) + OP(mov,b) TW(REG(al),REG(cl)) + OP(mov,q) TW(SDX(EVR(utility_table),REG(rcx),8),REG(rax)) + + OP(mov,q) TW(REG(rsp),REG(rdi)) + OP(mov,q) TW(DOF(REGBLOCK_UTILITY_ARG4(),regs),REG(r8)) + OP(mov,q) TW(REG(r11),REG(rsi)) + OP(mov,q) TW(REG(rbx),REG(rcx)) + + call IJMP(REG(rax)) + +define_debugging_label(scheme_to_interface_return) + OP(pop,q) REG(rax) # pop struct return + OP(pop,q) REG(rdx) + jmp IJMP(REG(rax)) # Invoke handler + +define_c_label(interface_to_scheme) +IF387(` + OP(cmp,q) TW(IMM(0),ABS(EVR(i387_presence))) + je interface_to_scheme_proceed + ffree ST(0) # Free floating "regs" + ffree ST(1) + ffree ST(2) + ffree ST(3) + ffree ST(4) + ffree ST(5) + ffree ST(6) + ffree ST(7) +interface_to_scheme_proceed: +') + # Register block = %rsi + # Scheme offset in NT +ifdef(`WIN32', +` OP(mov,q) TW(ABS(EVR(RegistersPtr)),regs)', +` OP(lea,q) TW(ABS(EVR(Registers)),regs)') + + OP(mov,q) TW(ABS(EVR(Free)),rfree) # Free pointer = %rdi + OP(mov,q) TW(DOF(REGBLOCK_VAL(),regs),REG(rax)) # Value/dynamic link + OP(mov,q) TW(IMM(ADDRESS_MASK),rmask) # = %rbp + + OP(mov,q) TW(ABS(EVR(stack_pointer)),REG(rsp)) + OP(mov,q) TW(REG(rax),REG(rcx)) # Preserve if used + OP(and,q) TW(rmask,REG(rcx)) # Restore potential dynamic link + OP(mov,q) TW(REG(rcx),DOF(REGBLOCK_DLINK(),regs)) + jmp IJMP(REG(rdx)) + +IF_WIN32(` +use_external_code(EFR(WinntExceptionTransferHook)) +define_code_label(EFR(callWinntExceptionTransferHook)) + call EFR(WinntExceptionTransferHook) + mov rdx,rax +') + +define_c_label(interface_to_C) +IF387(` + OP(cmp,q) TW(IMM(0),ABS(EVR(i387_presence))) + je interface_to_C_proceed + ffree ST(0) # Free floating "regs" + ffree ST(1) + ffree ST(2) + ffree ST(3) + ffree ST(4) + ffree ST(5) + ffree ST(6) + ffree ST(7) +interface_to_C_proceed:') + + OP(mov,q) TW(REG(rdx),REG(rax)) # Set up result + OP(pop,q) REG(r15) # Restore callee-saves + OP(pop,q) REG(r14) # registers + OP(pop,q) REG(r13) + OP(pop,q) REG(r12) + OP(pop,q) REG(rbx) + leave + ret + +# [TRC 20091025: The cache synchronization bug does not occur in any +# x86-64 machines of which I am aware.] + +#define_code_label(EFR(x86_64_cache_synchronize)) +# OP(push,q) REG(rbp) +# OP(mov,q) TW(REG(rsp),REG(rbp)) +# OP(push,q) REG(rbx) +# OP(xor,q) TW(REG(rax),REG(rax)) +# cpuid +# OP(pop,q) REG(rbx) +# leave +# ret + +### Run the CPUID instruction for serialization. + +#define_hook_label(serialize_cache) +# pushad +# OP(xor,q) TW(REG(rax),REG(rax)) +# cpuid +# popad +# ret + +### Stub to be used in place of above on machines that don't need it. + +#define_hook_label(dont_serialize_cache) +# ret + +### Assembly language hooks used to reduce code size. +### There is no time advantage to using these over using +### scheme_to_interface (or scheme_to_interface_call), but the +### code generated by the compiler can be somewhat smaller. + +define(define_jump_indirection, +`define_hook_label($1) + OP(mov,b) TW(IMM(HEX($2)),REG(al)) + jmp scheme_to_interface') + +define(define_call_indirection, +`define_hook_label($1) + OP(mov,b) TW(IMM(HEX($2)),REG(al)) + jmp scheme_to_interface_call') + +define_call_indirection(interrupt_procedure,1a) +define_call_indirection(interrupt_continuation,1b) +define_jump_indirection(interrupt_closure,18) +define_jump_indirection(interrupt_continuation_2,3b) + +define_hook_label(interrupt_dlink) + OP(mov,q) TW(DOF(REGBLOCK_DLINK(),regs),REG(rdx)) + OP(mov,b) TW(IMM(HEX(19)),REG(al)) + jmp scheme_to_interface_call + +### +### This saves even more instructions than primitive_apply +### When the PC is not available. Instead of jumping here, +### a call instruction is used, and the longword offset to +### the primitive object follows the call instruction. +### This code loads the primitive object and merges with +### apply_primitive +### +### [TRC 20091025: But on the x86-64, we have RIP-relative +### addressing, so we don't need this.] +### + +#declare_alignment(2) +#define_hook_label(short_primitive_apply) +# OP(pop,l) REG(edx) # offset pointer +# OP(mov,l) TW(IND(REG(edx)),REG(ecx)) # offset +# # Primitive object +# OP(mov,l) TW(IDX(REG(edx),REG(ecx)),REG(ecx)) +# # Merge +# jmp hook_reference(primitive_apply) + +declare_alignment(2) +define_jump_indirection(primitive_apply,12) + +define_jump_indirection(primitive_lexpr_apply,13) +define_jump_indirection(error,15) +define_call_indirection(link,17) +define_call_indirection(assignment_trap,1d) +define_call_indirection(reference_trap,1f) +define_call_indirection(safe_reference_trap,20) +define_call_indirection(primitive_error,36) + +### Assembly language hooks used to increase speed. + +# define_jump_indirection(sc_apply,14) +# +# define(define_apply_fixed_size, +# `define_hook_label(sc_apply_size_$1) +# OP(mov,q) TW(IMM($1),REG(rdx)) +# OP(mov,b) TW(IMM(HEX(14)),REG(al)) +# jmp scheme_to_interface') + +declare_alignment(2) +define_hook_label(sc_apply) + OP(mov,q) TW(REG(rcx),REG(rax)) # Copy for type code + OP(mov,q) TW(REG(rcx),REG(rbx)) # Copy for address + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) # Select type code + OP(and,q) TW(rmask,REG(rbx)) # Select datum + OP(cmp,b) TW(IMM(TC_COMPILED_ENTRY),REG(al)) + jne asm_sc_apply_generic + # [TRC 20091025: How big are the frame sizes?] + OP(movs,bq,x) TW(BOF(-4,REG(rbx)),REG(rax)) # Extract frame size + OP(cmp,q) TW(REG(rax),REG(rdx)) # Compare to nargs+1 + jne asm_sc_apply_generic + jmp IJMP(REG(rbx)) # Invoke + +define_debugging_label(asm_sc_apply_generic) + OP(mov,q) TW(IMM(HEX(14)),REG(rax)) + jmp scheme_to_interface + +define(define_apply_fixed_size, +`declare_alignment(2) +define_hook_label(sc_apply_size_$1) + OP(mov,q) TW(REG(rcx),REG(rax)) # Copy for type code + OP(mov,q) TW(REG(rcx),REG(rbx)) # Copy for address + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) # Select type code + OP(and,q) TW(rmask,REG(rbx)) # Select datum + OP(cmp,b) TW(IMM(TC_COMPILED_ENTRY),REG(al)) + jne asm_sc_apply_generic_$1 + # [TRC 20091025: How big are the frame sizes?] + OP(cmp,b) TW(IMM($1),BOF(-4,REG(rbx))) # Compare frame size + jne asm_sc_apply_generic_$1 # to nargs+1 + jmp IJMP(REG(rbx)) + +asm_sc_apply_generic_$1: + OP(mov,q) TW(IMM($1),REG(rdx)) + OP(mov,b) TW(IMM(HEX(14)),REG(al)) + jmp scheme_to_interface') + +define_apply_fixed_size(1) +define_apply_fixed_size(2) +define_apply_fixed_size(3) +define_apply_fixed_size(4) +define_apply_fixed_size(5) +define_apply_fixed_size(6) +define_apply_fixed_size(7) +define_apply_fixed_size(8) + +### The following code is used by generic arithmetic +### whether the fixnum case is open-coded in line or not. +### This takes care of fixnums and flonums so that the common +### numeric types are much faster than the rare ones +### (bignums, ratnums, recnums) + +IF387(`declare_alignment(2) +asm_generic_flonum_result: + # The MOV instruction can take a 64-bit immediate operand only + # if the target is a register, so we store the manifest in rax + # before moving it to memory. + OP(mov,q) TW(IMM_MANIFEST_NM_VECTOR_1,REG(rax)) + OP(mov,q) TW(REG(rax), IND(rfree)) + # The OR instruction cannot take a 64-bit immediate either, so + # we need to store the tag in rax first, shift it up, and then + # OR the datum into it. + OP(mov,q) TW(IMM(TC_FLONUM),REG(rax)) + OP(shl,q) TW(IMM(DATUM_LENGTH),REG(rax)) + OP(or,q) TW(rfree,REG(rax)) + OP(fstp,l) DOF(8,rfree) # fstpd + OP(and,q) TW(rmask,IND(REG(rsp))) + OP(add,q) TW(IMM(16),rfree) + OP(mov,q) TW(REG(rax),DOF(REGBLOCK_VAL(),regs)) + ret + +declare_alignment(2) +asm_generic_fixnum_result: + OP(and,q) TW(rmask,IND(REG(rsp))) + OP(or,b) TW(IMM(TC_FIXNUM),REG(al)) + OP(ror,q) TW(IMM(TC_LENGTH),REG(rax)) + OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs)) + ret + +declare_alignment(2) +asm_generic_return_sharp_t: + OP(and,q) TW(rmask,IND(REG(rsp))) + OP(mov,q) TW(IMM_TRUE,REG(rax)) + OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs)) + ret + +declare_alignment(2) +asm_generic_return_sharp_f: + OP(and,q) TW(rmask,IND(REG(rsp))) + OP(mov,q) TW(IMM_FALSE,REG(rax)) + OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs)) + ret') + +define(define_unary_operation, +`declare_alignment(2) +define_hook_label(generic_$1) + OP(pop,q) REG(rdx) + OP(mov,q) TW(REG(rdx),REG(rax)) + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) + OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al)) + je asm_generic_$1_fix + OP(cmp,b) TW(IMM(TC_FLONUM),REG(al)) + jne asm_generic_$1_fail + OP(and,q) TW(rmask,REG(rdx)) + fld1 + OP($4,l) DOF(8,REG(rdx)) + jmp asm_generic_flonum_result + +asm_generic_$1_fix: + OP(mov,q) TW(REG(rdx),REG(rax)) + OP(shl,q) TW(IMM(TC_LENGTH),REG(rax)) + OP($3,q) TW(IMM(eval(1 << TC_LENGTH)),REG(rax)) + jno asm_generic_fixnum_result + +asm_generic_$1_fail: + OP(push,q) REG(rdx) + OP(mov,b) TW(IMM(HEX($2)),REG(al)) + jmp scheme_to_interface') + +define(define_unary_predicate, +`declare_alignment(2) +define_hook_label(generic_$1) + OP(pop,q) REG(rdx) + OP(mov,q) TW(REG(rdx),REG(rax)) + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) + OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al)) + je asm_generic_$1_fix + OP(cmp,b) TW(IMM(TC_FLONUM),REG(al)) + jne asm_generic_$1_fail + OP(and,q) TW(rmask,REG(rdx)) + OP(fld,l) DOF(8,REG(rdx)) + ftst + fstsw REG(ax) + fstp ST(0) + sahf + $4 asm_generic_return_sharp_t + jmp asm_generic_return_sharp_f + +asm_generic_$1_fix: + OP(mov,q) TW(REG(rdx),REG(rax)) + OP(shl,q) TW(IMM(TC_LENGTH),REG(rax)) + OP(cmp,q) TW(IMM(0),REG(rax)) + $3 asm_generic_return_sharp_t + jmp asm_generic_return_sharp_f + +asm_generic_$1_fail: + OP(push,q) REG(rdx) + OP(mov,b) TW(IMM(HEX($2)),REG(al)) + jmp scheme_to_interface') + +define(define_binary_operation, +`declare_alignment(2) +define_hook_label(generic_$1) + OP(pop,q) REG(rdx) + OP(pop,q) REG(rbx) + OP(mov,q) TW(REG(rdx),REG(rax)) + OP(mov,q) TW(REG(rbx),REG(rcx)) + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rcx)) + OP(cmp,b) TW(REG(al),REG(cl)) + jne asm_generic_$1_fail + OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al)) + je asm_generic_$1_fix + OP(cmp,b) TW(IMM(TC_FLONUM),REG(al)) + je asm_generic_$1_flo + +asm_generic_$1_fail: + OP(push,q) REG(rbx) + OP(push,q) REG(rdx) + OP(mov,b) TW(IMM(HEX($2)),REG(al)) + jmp scheme_to_interface + +asm_generic_$1_fix: + OP(mov,q) TW(REG(rdx),REG(rax)) + OP(mov,q) TW(REG(rbx),REG(rcx)) + OP(shl,q) TW(IMM(TC_LENGTH),REG(rax)) + OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx)) + $5 + OP($3,q) TW(REG(rcx),REG(rax)) # subq + jo asm_generic_$1_fail + jmp asm_generic_fixnum_result + +asm_generic_$1_flo: + OP(and,q) TW(rmask,REG(rdx)) + OP(and,q) TW(rmask,REG(rbx)) + OP(fld,l) DOF(8,REG(rdx)) # fldd + OP($4,l) DOF(8,REG(rbx)) # fsubl + jmp asm_generic_flonum_result') + +IF387(`declare_alignment(2) +define_hook_label(generic_divide) + OP(pop,q) REG(rdx) + OP(pop,q) REG(rbx) + OP(mov,q) TW(REG(rdx),REG(rax)) + OP(mov,q) TW(REG(rbx),REG(rcx)) + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rcx)) + OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al)) + je asm_generic_divide_fix + OP(cmp,b) TW(IMM(TC_FLONUM),REG(al)) + jne asm_generic_divide_fail + OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl)) + je asm_generic_divide_flo_flo + OP(cmp,b) TW(IMM(TC_FIXNUM),REG(cl)) + jne asm_generic_divide_fail + OP(mov,q) TW(REG(rbx),REG(rcx)) + OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx)) + je asm_generic_divide_fail + OP(and,q) TW(rmask,REG(rdx)) + OP(sar,q) TW(IMM(TC_LENGTH),REG(rcx)) + OP(fld,l) DOF(8,REG(rdx)) # fldd + OP(mov,q) TW(REG(rcx),IND(rfree)) + OP(fidiv,l) IND(rfree) + jmp asm_generic_flonum_result + +asm_generic_divide_fix: + OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl)) + jne asm_generic_divide_fail + OP(mov,q) TW(REG(rdx),REG(rcx)) + OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx)) + je asm_generic_divide_fail + OP(and,q) TW(rmask,REG(rbx)) + OP(sar,q) TW(IMM(TC_LENGTH),REG(rcx)) + OP(fld,l) DOF(8,REG(rbx)) # fldd + OP(mov,q) TW(REG(rcx),IND(rfree)) + OP(fidivr,l) IND(rfree) + jmp asm_generic_flonum_result + +asm_generic_divide_flo_flo: + OP(mov,q) TW(REG(rbx),REG(rcx)) + OP(and,q) TW(rmask,REG(rcx)) + OP(fld,l) DOF(8,REG(rcx)) # fldd + ftst + fstsw REG(ax) + sahf + je asm_generic_divide_by_zero + OP(and,q) TW(rmask,REG(rdx)) + OP(fdivr,l) DOF(8,REG(rdx)) + jmp asm_generic_flonum_result + +asm_generic_divide_by_zero: + fstp ST(0) # Pop second arg + +asm_generic_divide_fail: + OP(push,q) REG(rbx) + OP(push,q) REG(rdx) + OP(mov,b) TW(IMM(HEX(23)),REG(al)) + jmp scheme_to_interface') + +define(define_binary_predicate, +`declare_alignment(2) +define_hook_label(generic_$1) + OP(pop,q) REG(rdx) + OP(pop,q) REG(rbx) + OP(mov,q) TW(REG(rdx),REG(rax)) + OP(mov,q) TW(REG(rbx),REG(rcx)) + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rcx)) + OP(cmp,b) TW(REG(al),REG(cl)) + jne asm_generic_$1_fail + OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al)) + jne asm_generic_$1_fail + + OP(shl,q) TW(IMM(TC_LENGTH),REG(rdx)) + OP(shl,q) TW(IMM(TC_LENGTH),REG(rbx)) + OP(cmp,q) TW(REG(rbx),REG(rdx)) + $3 asm_generic_return_sharp_t + jmp asm_generic_return_sharp_f + +asm_generic_$1_fail: + OP(push,q) REG(rbx) + OP(push,q) REG(rdx) + OP(mov,b) TW(IMM(HEX($2)),REG(al)) + jmp scheme_to_interface') + +IF387(`define_unary_operation(decrement,22,sub,fsubr) +define_unary_operation(increment,26,add,fadd) + +define_unary_predicate(negative,2a,jl,jb) +define_unary_predicate(positive,2c,jg,ja) +define_unary_predicate(zero,2d,je,je) + +# define_binary_operation(name,index,fix*fix,flo*flo, fixup) +# define_binary_operation( $1, $2, $3, $4, $5) +# The fixup is optional; only multiplication needs it to shift the +# result back down by six bits. +define_binary_operation(add,2b,add,fadd) +define_binary_operation(subtract,28,sub,fsub) +define_binary_operation(multiply,29,imul,fmul, + `OP(shr,q) TW(IMM(6),REG(rax))') +# Divide needs to check for 0, so we cant really use the following +# define_binary_operation(divide,23,NONE,fdiv) + +# define_binary_predicate(name,index,fix*fix,flo*flo) +define_binary_predicate(equal,24,je,je) +define_binary_predicate(greater,25,jg,ja) +define_binary_predicate(less,27,jl,jb)') + +IFN387(`define_jump_indirection(generic_decrement,22) +define_jump_indirection(generic_divide,23) +define_jump_indirection(generic_equal,24) +define_jump_indirection(generic_greater,25) +define_jump_indirection(generic_increment,26) +define_jump_indirection(generic_less,27) +define_jump_indirection(generic_subtract,28) +define_jump_indirection(generic_multiply,29) +define_jump_indirection(generic_negative,2a) +define_jump_indirection(generic_add,2b) +define_jump_indirection(generic_positive,2c) +define_jump_indirection(generic_zero,2d)') + +# These don't currently differ according to whether there +# is a 387 or not. + +define_jump_indirection(generic_quotient,37) +define_jump_indirection(generic_remainder,38) +define_jump_indirection(generic_modulo,39) + +define_jump_indirection(nofp_decrement,22) +define_jump_indirection(nofp_divide,23) +define_jump_indirection(nofp_equal,24) +define_jump_indirection(nofp_greater,25) +define_jump_indirection(nofp_increment,26) +define_jump_indirection(nofp_less,27) +define_jump_indirection(nofp_subtract,28) +define_jump_indirection(nofp_multiply,29) +define_jump_indirection(nofp_negative,2a) +define_jump_indirection(nofp_add,2b) +define_jump_indirection(nofp_positive,2c) +define_jump_indirection(nofp_zero,2d) +define_jump_indirection(nofp_quotient,37) +define_jump_indirection(nofp_remainder,38) +define_jump_indirection(nofp_modulo,39) + +IFDASM(`end') + +### Edwin Variables: +### comment-column: 56 +### comment-start: "#" +### End: diff --git a/src/microcode/cmpintmd/x86-64-config.h b/src/microcode/cmpintmd/x86-64-config.h new file mode 100644 index 000000000..7678d4646 --- /dev/null +++ b/src/microcode/cmpintmd/x86-64-config.h @@ -0,0 +1,32 @@ +/* -*-C-*- + +Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, + 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007, 2008, 2009 Massachusetts Institute of Technology + +This file is part of MIT/GNU Scheme. + +MIT/GNU Scheme is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +MIT/GNU Scheme is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with MIT/GNU Scheme; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. + +*/ + +#ifndef SCM_CMPINTMD_CONFIG_H_INCLUDED +#define SCM_CMPINTMD_CONFIG_H_INCLUDED 1 + +#define COMPILER_PROCESSOR_TYPE COMPILER_X86_64_TYPE +#define CC_IS_NATIVE 1 + +#endif /* !SCM_CMPINTMD_CONFIG_H_INCLUDED */ diff --git a/src/microcode/cmpintmd/x86-64.c b/src/microcode/cmpintmd/x86-64.c new file mode 100644 index 000000000..a7cc7f0d4 --- /dev/null +++ b/src/microcode/cmpintmd/x86-64.c @@ -0,0 +1,372 @@ +/* -*-C-*- + +Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, + 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007, 2008, 2009 Massachusetts Institute of Technology + +This file is part of MIT/GNU Scheme. + +MIT/GNU Scheme is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +MIT/GNU Scheme is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with MIT/GNU Scheme; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. + +*/ + +/* Compiled code interface for AMD x86-64. */ + +#include "cmpint.h" +#include "extern.h" +#include "outf.h" +#include "errors.h" + +extern void * tospace_to_newspace (void *); + +bool +read_cc_entry_type (cc_entry_type_t * cet, insn_t * address) +{ + return (decode_old_style_format_word (cet, (((uint16_t *) address) [-2]))); +} + +bool +write_cc_entry_type (cc_entry_type_t * cet, insn_t * address) +{ + return (encode_old_style_format_word (cet, ((uint16_t *) address) - 2)); +} + +bool +read_cc_entry_offset (cc_entry_offset_t * ceo, insn_t * address) +{ + uint16_t n = (((uint16_t *) address) [-1]); + (ceo->offset) = (n >> 1); + (ceo->continued_p) = ((n & 1) != 0); + return (false); +} + +bool +write_cc_entry_offset (cc_entry_offset_t * ceo, insn_t * address) +{ + if (! ((ceo->offset) < 0x4000)) + return (true); + (((uint16_t *) address) [-1]) + = (((ceo->offset) << 1) | ((ceo->continued_p) ? 1 : 0)); + return (false); +} + +/* Compiled closures */ + +/* MOV RAX,imm64 has two bytes of opcode cruft before the imm64. */ + +insn_t * +read_compiled_closure_target (insn_t * start) +{ + return (* ((insn_t **) (start + CC_ENTRY_HEADER_SIZE + 2))); +} + +void +write_compiled_closure_target (insn_t * target, insn_t * start) +{ + (* ((insn_t **) (start + CC_ENTRY_HEADER_SIZE + 2))) = target; +} + +unsigned long +compiled_closure_count (SCHEME_OBJECT * block) +{ + /* `block' is a pointer to the first object after the manifest. The + first object following it is the entry count. */ + return ((unsigned long) (* ((uint32_t *) block))); +} + +insn_t * +compiled_closure_start (SCHEME_OBJECT * block) +{ + /* Skip the 32-bit entry count. */ + return (((insn_t *) block) + 4); +} + +insn_t * +compiled_closure_entry (insn_t * start) +{ + return (start + CC_ENTRY_HEADER_SIZE); +} + +insn_t * +compiled_closure_next (insn_t * start) +{ + return (start + CC_ENTRY_HEADER_SIZE + 12); +} + +SCHEME_OBJECT * +skip_compiled_closure_padding (insn_t * start) +{ + /* The padding is the same size as the entry header (format word). */ + return ((SCHEME_OBJECT *) (start + CC_ENTRY_HEADER_SIZE)); +} + +SCHEME_OBJECT +compiled_closure_entry_to_target (insn_t * entry) +{ + /* `entry' points to the start of the MOV RAX,imm64 instruction, + which has two bytes of opcode cruft before the imm64. */ + return (MAKE_CC_ENTRY (* ((long *) (entry + 2)))); +} + +/* Execution caches (UUO links) + + An execution cache is a region of memory that lives in the + constants section of a compiled-code block. It is an indirection + for calling external procedures that allows the linker to control + the calling process without having to find and change all the + places in the compiled code that refer to it. + + Prior to linking, the execution cache has two pieces of + information: (1) the name of the procedure being called (a symbol), + and (2) the number of arguments that will be passed to the + procedure. `saddr' points to the arity at the beginning of the + execution cache. */ + +SCHEME_OBJECT +read_uuo_symbol (SCHEME_OBJECT * saddr) +{ + return (saddr[1]); +} + +unsigned int +read_uuo_frame_size (SCHEME_OBJECT * saddr) +{ + return (* ((uint16_t *) saddr)); +} + +insn_t * +read_uuo_target (SCHEME_OBJECT * saddr) +{ + insn_t * mov_addr = ((insn_t *) (saddr + 1)); + return (* ((insn_t **) (mov_addr + 2))); +} + +insn_t * +read_uuo_target_no_reloc (SCHEME_OBJECT * saddr) +{ + return (read_uuo_target (saddr)); +} + +void +write_uuo_target (insn_t * target, SCHEME_OBJECT * saddr) +{ + /* Skip the arity. */ + insn_t * addr = ((insn_t *) (saddr + 1)); + (*addr++) = 0x48; /* REX.W (64-bit operand size prefix) */ + (*addr++) = 0xB8; /* MOV RAX,imm64 */ + (* ((insn_t **) addr)) = target; + addr += 8; + (*addr++) = 0xFF; /* JMP reg/mem64 */ + (*addr++) = 0xE0; /* ModR/M for RAX */ +} + +#define BYTES_PER_TRAMPOLINE_ENTRY_PADDING 4 +#define OBJECTS_PER_TRAMPOLINE_ENTRY 2 + +#define RSI_TRAMPOLINE_TO_INTERFACE_OFFSET \ + ((COMPILER_REGBLOCK_N_FIXED + (2 * COMPILER_HOOK_SIZE)) \ + * SIZEOF_SCHEME_OBJECT) + +unsigned long +trampoline_entry_size (unsigned long n_entries) +{ + return (n_entries * OBJECTS_PER_TRAMPOLINE_ENTRY); +} + +insn_t * +trampoline_entry_addr (SCHEME_OBJECT * block, unsigned long index) +{ + return (((insn_t *) (block + 2 + (index * OBJECTS_PER_TRAMPOLINE_ENTRY))) + + BYTES_PER_TRAMPOLINE_ENTRY_PADDING + CC_ENTRY_HEADER_SIZE); +} + +bool +store_trampoline_insns (insn_t * entry, byte_t code) +{ + (*entry++) = 0xB0; /* MOV AL,code */ + (*entry++) = code; + (*entry++) = 0xFF; /* CALL /2 disp32(ESI) */ + (*entry++) = 0x96; + (* ((uint32_t *) entry)) = RSI_TRAMPOLINE_TO_INTERFACE_OFFSET; + X86_64_CACHE_SYNCHRONIZE (); + return (false); +} + +#ifdef _MACH_UNIX +# include +# define VM_PROT_SCHEME (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE) +#endif + +#define SETUP_REGISTER(hook) do \ +{ \ + (* ((unsigned long *) (rsi_value + offset))) \ + = ((unsigned long) (hook)); \ + offset += (COMPILER_HOOK_SIZE * (sizeof (SCHEME_OBJECT))); \ + declare_builtin (((unsigned long) hook), #hook); \ +} while (0) + +void +x86_64_reset_hook (void) +{ + int offset = (COMPILER_REGBLOCK_N_FIXED * (sizeof (SCHEME_OBJECT))); + unsigned char * rsi_value = ((unsigned char *) Registers); + bool fp_support_present = (x86_64_interface_initialize ()); + + /* These must match machines/x86-64/lapgen.scm */ + + SETUP_REGISTER (asm_scheme_to_interface); /* 0 */ + SETUP_REGISTER (asm_scheme_to_interface_call); /* 1 */ + + if (offset != RSI_TRAMPOLINE_TO_INTERFACE_OFFSET) + { + outf_fatal ("\nx86_64_reset_hook: RSI_TRAMPOLINE_TO_INTERFACE_OFFSET\n"); + Microcode_Termination (TERM_EXIT); + } + SETUP_REGISTER (asm_trampoline_to_interface); /* 2 */ + + SETUP_REGISTER (asm_interrupt_procedure); /* 3 */ + SETUP_REGISTER (asm_interrupt_continuation); /* 4 */ + SETUP_REGISTER (asm_interrupt_closure); /* 5 */ + SETUP_REGISTER (asm_interrupt_dlink); /* 6 */ + SETUP_REGISTER (asm_primitive_apply); /* 7 */ + SETUP_REGISTER (asm_primitive_lexpr_apply); /* 8 */ + SETUP_REGISTER (asm_assignment_trap); /* 9 */ + SETUP_REGISTER (asm_reference_trap); /* 10 */ + SETUP_REGISTER (asm_safe_reference_trap); /* 11 */ + SETUP_REGISTER (asm_link); /* 12 */ + SETUP_REGISTER (asm_error); /* 13 */ + SETUP_REGISTER (asm_primitive_error); /* 14 */ + /* [TRC 20091025: This was an i386 hack for when the PC is not + available, which on x86-64 it always is. */ + /* SETUP_REGISTER (asm_short_primitive_apply); */ /* 15 */ + + /* No more room for positive offsets without going to 32-bit + offsets! */ + + /* This is a hack to make all the hooks be addressable with byte + offsets (instead of longword offsets). The register block + extends to negative offsets as well, so all the following hooks + are accessed with negative offsets, and all fit in a byte. */ + + /* [TRC 20091029: This hack doesn't work any longer; this code + should be cleaned up, since we must use longword offsets anyway.] + */ + + offset = -256; + if (fp_support_present) + { + SETUP_REGISTER (asm_generic_add); /* -32 */ + SETUP_REGISTER (asm_generic_subtract); /* -31 */ + SETUP_REGISTER (asm_generic_multiply); /* -30 */ + SETUP_REGISTER (asm_generic_divide); /* -29 */ + SETUP_REGISTER (asm_generic_equal); /* -28 */ + SETUP_REGISTER (asm_generic_less); /* -27 */ + SETUP_REGISTER (asm_generic_greater); /* -26 */ + SETUP_REGISTER (asm_generic_increment); /* -25 */ + SETUP_REGISTER (asm_generic_decrement); /* -24 */ + SETUP_REGISTER (asm_generic_zero); /* -23 */ + SETUP_REGISTER (asm_generic_positive); /* -22 */ + SETUP_REGISTER (asm_generic_negative); /* -21 */ + SETUP_REGISTER (asm_generic_quotient); /* -20 */ + SETUP_REGISTER (asm_generic_remainder); /* -19 */ + SETUP_REGISTER (asm_generic_modulo); /* -18 */ + } + else + { + SETUP_REGISTER (asm_nofp_add); /* -32 */ + SETUP_REGISTER (asm_nofp_subtract); /* -31 */ + SETUP_REGISTER (asm_nofp_multiply); /* -30 */ + SETUP_REGISTER (asm_nofp_divide); /* -29 */ + SETUP_REGISTER (asm_nofp_equal); /* -28 */ + SETUP_REGISTER (asm_nofp_less); /* -27 */ + SETUP_REGISTER (asm_nofp_greater); /* -26 */ + SETUP_REGISTER (asm_nofp_increment); /* -25 */ + SETUP_REGISTER (asm_nofp_decrement); /* -24 */ + SETUP_REGISTER (asm_nofp_zero); /* -23 */ + SETUP_REGISTER (asm_nofp_positive); /* -22 */ + SETUP_REGISTER (asm_nofp_negative); /* -21 */ + SETUP_REGISTER (asm_nofp_quotient); /* -20 */ + SETUP_REGISTER (asm_nofp_remainder); /* -19 */ + SETUP_REGISTER (asm_nofp_modulo); /* -18 */ + } + + SETUP_REGISTER (asm_sc_apply); /* -17 */ + SETUP_REGISTER (asm_sc_apply_size_1); /* -16 */ + SETUP_REGISTER (asm_sc_apply_size_2); /* -15 */ + SETUP_REGISTER (asm_sc_apply_size_3); /* -14 */ + SETUP_REGISTER (asm_sc_apply_size_4); /* -13 */ + SETUP_REGISTER (asm_sc_apply_size_5); /* -12 */ + SETUP_REGISTER (asm_sc_apply_size_6); /* -11 */ + SETUP_REGISTER (asm_sc_apply_size_7); /* -10 */ + SETUP_REGISTER (asm_sc_apply_size_8); /* -9 */ + SETUP_REGISTER (asm_interrupt_continuation_2); /* -8 */ + /* [TRC 20091025: The cache synchronization bug does not occur in any + x86-64 machines of which I am aware.] + + if (x86_64_cpuid_needed) + SETUP_REGISTER (asm_serialize_cache); /\* -7 *\/ + else + SETUP_REGISTER (asm_dont_serialize_cache); /\* -7 *\/ + */ + +#ifdef _MACH_UNIX + { + vm_address_t addr; + vm_size_t size; + vm_prot_t prot; + vm_prot_t max_prot; + vm_inherit_t inheritance; + boolean_t shared; + port_t object; + vm_offset_t offset; + + addr = ((vm_address_t) Heap); + if ((vm_region ((task_self ()), &addr, &size, &prot, &max_prot, + &inheritance, &shared, &object, &offset)) + != KERN_SUCCESS) + { + outf_fatal ( "compiler_reset: vm_region() failed.\n"); + Microcode_Termination (TERM_EXIT); + /*NOTREACHED*/ + } + if ((prot & VM_PROT_SCHEME) != VM_PROT_SCHEME) + { + if ((max_prot & VM_PROT_SCHEME) != VM_PROT_SCHEME) + { + outf_fatal ( + "compiler_reset: inadequate protection for Heap.\n"); + outf_fatal ( "maximum = 0x%lx; desired = 0x%lx\n", + ((unsigned long) (max_prot & VM_PROT_SCHEME)), + ((unsigned long) VM_PROT_SCHEME)); + Microcode_Termination (TERM_EXIT); + /*NOTREACHED*/ + } + if ((vm_protect ((task_self ()), ((vm_address_t) Heap), + (((char *) constant_end) - ((char *) Heap)), + 0, VM_PROT_SCHEME)) + != KERN_SUCCESS) + { + outf_fatal ("Unable to change protection for Heap.\n"); + outf_fatal ("actual = 0x%lx; desired = 0x%lx\n", + ((unsigned long) (prot & VM_PROT_SCHEME)), + ((unsigned long) VM_PROT_SCHEME)); + Microcode_Termination (TERM_EXIT); + /*NOTREACHED*/ + } + } + } +#endif /* _MACH_UNIX */ +} diff --git a/src/microcode/cmpintmd/x86-64.h b/src/microcode/cmpintmd/x86-64.h new file mode 100644 index 000000000..49d0f81d0 --- /dev/null +++ b/src/microcode/cmpintmd/x86-64.h @@ -0,0 +1,351 @@ +/* -*-C-*- + +Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, + 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, + 2006, 2007, 2008, 2009 Massachusetts Institute of Technology + +This file is part of MIT/GNU Scheme. + +MIT/GNU Scheme is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at +your option) any later version. + +MIT/GNU Scheme is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with MIT/GNU Scheme; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. + +*/ + +/* Compiled code interface macros for AMD x86-64. */ + +#ifndef SCM_CMPINTMD_H_INCLUDED +#define SCM_CMPINTMD_H_INCLUDED 1 + +/* + +Problems with the AMD x86-64 instruction set architecture +==================================================== + +1. Jumps are PC-relative. There are absolute jumps, assuming the PC + is in a data location, or with immediate destinations that include + a segment descriptor (16 bits). The short forms have a PC-relative + offset defined with respect to the immediately following + instruction. + +Problem: Closures and execute caches need their address in old space + in order to be relocated correctly. + +Fix: + +For execute caches we can define a new linker field, called +load-relocation-address which on every GC/relocation stores the new +address and the old contents into global variables and stores the new +address in the field. Alternatively the difference between the new +address and the old contents can be stored into a single global +variable, and this can be used, together with the new address of each +cache, to find the old code. + +For closures the code that reads the header (manifest closure) can do +the same. + + +2. The CALL and JMP instructions do not accept 64-bit displacements. + +Problem: We want heaps bigger than 4 GB. + +Fix: + +Assemble more than one instruction for closure entry points, expanding +them even more. Yech. + + +3. The stack pointer register (RSP) cannot be used as the base in + (base + displacement) addressing mode. + +Problem: Common operation in the compiler, which assumes direct access + to the stack. + +Fix: Use base + indexed mode, which allows specification of RSP as + base and nullification of the index (by using RSP again). This is + one byte longer than otherwise, but... + + +Register assignments +==================== + +RAX (0) Unassigned +RCX (1) Unassigned +RDX (2) Unassigned +RBX (3) Unassigned + +RSP (4) Stack Pointer +RBP (5) Register Mask +RSI (6) Pointer to register block, etc. +RDI (7) Free Pointer + +R8-R15 Unassigned + +The dynamic link and value "registers" are not processor registers. +Slots in the register array must be reserved for them. +[TRC 20091025: Later, we ought to use machine registers for these.] + +The Free Pointer is RDI because RDI is the implicit base register for +the memory-to-memory move instructions, and the string store +instruction. Perhaps we can make use of it. + +The pointer to register block is not held in RBP (the processor's +"frame" register is typically used) because its most common use, (RBP) +(address syllable for memory memtop) takes more bytes than (RSI). + +Encodings and layout of various control features +================================================ + +Assumptions: + +The processor will be in 64-bit address and operand mode. Thus +instructions use 64-bit operands, and displacements for addressing +modes and jump instructions are all 64 bits by default. + + Offset Contents Encoding + + +- Execute cache entry encoding: + + Before linking + + 0 16-bit arity \ + 2 zero [TC_FIXNUM | arity] + 7 0x1A / +entry 8 symbol + 16 + 24 + + After linking + + 0 16-bit arity + 2 zero + 7 0x1A +entry 8 MOV RAX,imm64 0x48 0xB8 + 10
+ 18 JMP (RAX) 0xFF 0xE0 + 19-23 + 24 + + +- Closures: + +The only reason for a 32-bit entry count is to align everything +nicely. + + 0 + 8 + 12 \__ format word + 14 / +entry0 16 MOV RAX,imm64 0x48 0xB8 + 18
+ 26 CALL (RAX) 0xFF 0xD0 + 28 + ... + 16*(n+1) + + +- Trampoline encoding: + + -8 + -4 + -2 +entry 0 MOV AL,code 0xB0, code-byte + 2 CALL n(RSI) 0xFF 0x96 n-longword + 8 + + +[TRC 20091027: The next two are wrong; need to update.] + +- GC & interrupt check at procedure/continuation entry: + +gc_lab -7 CALL n(RSI) 0xFF 0x56 n-byte + -4 + -2 +entry 0 CMP RDI,(RSI) 0x48 0x39 0x3e + 3 JAE gc_lab 0x73 -12 + 5 + + +- GC & interrupt check at closure entry: + +gc_lab -11 ADD (RSP),&offset 0x83 0x04 0x24 offset-byte + -7 JMP n(RSI) 0xFF 0x66 n-byte + -4 + -2 +entry 0 ADD (RSP),&magic 0x81 0x04 0x24 magic-longword + 7 CMP RDI,(RSI) 0x39 0x3e + 9 JAE gc_lab 0x73 0xea (= -22) + 11 + +The magic value depends on the closure because of canonicalization. + +The ADD instruction at offset -11 is not present for the 0th closure +entry, since it is the canonical entry point. Its format depends on +the value of offset, since the sign-extending forms often suffice. + +offset = entry_number * entry_size +magic = ([TC_COMPILED_ENTRY | 0] - (offset + length_of_CALL_instruction)) + +*/ + +#define ASM_RESET_HOOK x86_64_reset_hook +#define FPE_RESET_TRAPS x86_64_interface_initialize + +#define CMPINT_USE_STRUCS 1 + +/* These next definitions must agree with "cmpauxmd/x86-64.m4", which is + where the register block is allocated. */ +#define COMPILER_REGBLOCK_N_FIXED 16 +/* Big enough to hold 80-bit floating-point value: */ +#define COMPILER_TEMP_SIZE 2 +#define COMPILER_REGBLOCK_N_TEMPS 256 +#define COMPILER_REGBLOCK_N_HOOKS 80 +#define COMPILER_HOOK_SIZE 1 + +#define COMPILER_REGBLOCK_EXTRA_SIZE \ + (COMPILER_REGBLOCK_N_HOOKS * COMPILER_HOOK_SIZE) + +#define REGBLOCK_ALLOCATED_BY_INTERFACE true + +typedef byte_t insn_t; + +/* Number of insn_t units preceding entry address in which header + (type and offset info) is stored. */ +#define CC_ENTRY_HEADER_SIZE (CC_ENTRY_TYPE_SIZE + CC_ENTRY_OFFSET_SIZE) +#define CC_ENTRY_TYPE_SIZE 2 +#define CC_ENTRY_OFFSET_SIZE 2 + +/* Number of insn_t units preceding entry header in which GC trap + instructions are stored. */ +#define CC_ENTRY_GC_TRAP_SIZE 3 + +#define EMBEDDED_CLOSURE_ADDRS_P 1 + +#define DECLARE_RELOCATION_REFERENCE(name) + +#define START_CLOSURE_RELOCATION(scan, ref) do {} while (0) +#define START_OPERATOR_RELOCATION(scan, ref) do {} while (0) + +#define OPERATOR_RELOCATION_OFFSET 0 + +#define READ_COMPILED_CLOSURE_TARGET(a, r) \ + read_compiled_closure_target (a) + +/* Size of execution cache in SCHEME_OBJECTS. */ +#define UUO_LINK_SIZE 3 + +#define UUO_WORDS_TO_COUNT(nw) ((nw) / UUO_LINK_SIZE) +#define UUO_COUNT_TO_WORDS(nc) ((nc) * UUO_LINK_SIZE) + +#define READ_UUO_TARGET(a, r) read_uuo_target (a) + +#define FLUSH_I_CACHE() X86_64_CACHE_SYNCHRONIZE () +#define FLUSH_I_CACHE_REGION(address, nwords) X86_64_CACHE_SYNCHRONIZE () +#define PUSH_D_CACHE_REGION(address, nwords) X86_64_CACHE_SYNCHRONIZE () + +/* [TRC 20091025: The cache synchronization bug does not occur in any + x86-64 machines of which I am aware.] */ + +#define X86_64_CACHE_SYNCHRONIZE() do {} while (0) + +/* +#define X86_64_CACHE_SYNCHRONIZE() do \ +{ \ + if (x86_64_cpuid_needed) \ + x86_64_cache_synchronize (); \ +} while (false) +*/ + +#if defined(__OS2__) && (defined(__IBMC__) || defined(__WATCOMC__)) +# define ASM_ENTRY_POINT(name) (_System name) +#elif defined(__WIN32__) && defined(__WATCOMC__) +# define ASM_ENTRY_POINT(name) (__cdecl name) +#else +# define ASM_ENTRY_POINT(name) name +#endif + +extern int ASM_ENTRY_POINT (x86_64_interface_initialize) (void); + +extern void asm_assignment_trap (void); +extern void asm_dont_serialize_cache (void); +extern void asm_error (void); +extern void asm_generic_add (void); +extern void asm_generic_decrement (void); +extern void asm_generic_divide (void); +extern void asm_generic_equal (void); +extern void asm_generic_greater (void); +extern void asm_generic_increment (void); +extern void asm_generic_less (void); +extern void asm_generic_modulo (void); +extern void asm_generic_multiply (void); +extern void asm_generic_negative (void); +extern void asm_generic_positive (void); +extern void asm_generic_quotient (void); +extern void asm_generic_remainder (void); +extern void asm_generic_subtract (void); +extern void asm_generic_zero (void); +extern void asm_interrupt_closure (void); +extern void asm_interrupt_continuation (void); +extern void asm_interrupt_continuation_2 (void); +extern void asm_interrupt_dlink (void); +extern void asm_interrupt_procedure (void); +extern void asm_link (void); +extern void asm_nofp_add (void); +extern void asm_nofp_decrement (void); +extern void asm_nofp_divide (void); +extern void asm_nofp_equal (void); +extern void asm_nofp_greater (void); +extern void asm_nofp_increment (void); +extern void asm_nofp_less (void); +extern void asm_nofp_modulo (void); +extern void asm_nofp_multiply (void); +extern void asm_nofp_negative (void); +extern void asm_nofp_positive (void); +extern void asm_nofp_quotient (void); +extern void asm_nofp_remainder (void); +extern void asm_nofp_subtract (void); +extern void asm_nofp_zero (void); +extern void asm_primitive_apply (void); +extern void asm_primitive_error (void); +extern void asm_primitive_lexpr_apply (void); +extern void asm_reference_trap (void); +extern void asm_safe_reference_trap (void); +extern void asm_sc_apply (void); +extern void asm_sc_apply_size_1 (void); +extern void asm_sc_apply_size_2 (void); +extern void asm_sc_apply_size_3 (void); +extern void asm_sc_apply_size_4 (void); +extern void asm_sc_apply_size_5 (void); +extern void asm_sc_apply_size_6 (void); +extern void asm_sc_apply_size_7 (void); +extern void asm_sc_apply_size_8 (void); +extern void asm_scheme_to_interface (void); +extern void asm_scheme_to_interface_call (void); +extern void asm_serialize_cache (void); +/* [TRC 20091025: This was an i386 hack for when the PC is not + available, which on x86-64 it always is. */ +/* extern void asm_short_primitive_apply (void); */ +extern void asm_trampoline_to_interface (void); + +/* extern void x86_64_cache_synchronize (void); */ +/* extern void start_closure_relocation (SCHEME_OBJECT *, reloc_ref_t *); */ +extern insn_t * read_compiled_closure_target (insn_t *); +/* extern void start_operator_relocation (SCHEME_OBJECT *, reloc_ref_t *); */ +extern insn_t * read_uuo_target (SCHEME_OBJECT *); +extern void x86_64_reset_hook (void); + +extern int x86_64_cpuid_needed; + +#endif /* !SCM_CMPINTMD_H_INCLUDED */ diff --git a/src/microcode/confshared.h b/src/microcode/confshared.h index eaf38be51..be6837643 100644 --- a/src/microcode/confshared.h +++ b/src/microcode/confshared.h @@ -225,7 +225,8 @@ typedef enum COMPILER_ALPHA_TYPE, COMPILER_MIPS_TYPE, COMPILER_C_TYPE, - COMPILER_SVM_TYPE + COMPILER_SVM_TYPE, + COMPILER_X86_64_TYPE, } cc_arch_t; #include "cmpintmd-config.h" @@ -609,6 +610,8 @@ extern void win32_stack_reset (void); #ifdef __x86_64__ # define MACHINE_TYPE "x86-64" # define CURRENT_FASL_ARCH FASL_X86_64 +# define PC_ZERO_BITS 0 +# define HEAP_IN_LOW_MEMORY 1 #endif #ifdef __ia64__ diff --git a/src/microcode/utabmd.c b/src/microcode/utabmd.c index 91fcb0d74..bc89addf0 100644 --- a/src/microcode/utabmd.c +++ b/src/microcode/utabmd.c @@ -117,6 +117,7 @@ cc_arch_name (void) case COMPILER_MIPS_TYPE: return ("mips"); case COMPILER_C_TYPE: return ("c"); case COMPILER_SVM_TYPE: return ("svm1"); + case COMPILER_X86_64_TYPE: return ("x86-64"); default: return (0); } } -- 2.25.1