continuation
(expect-no-exit-interrupt-checks)
(LAP ,@(clear-map!)
- (POP Q (R ,rcx))
+ (POP Q (R ,rbx))
#|
(MOV Q (R ,rdx) (&U ,frame-size))
,@(invoke-interface code:compiler-apply)
continuation
(expect-no-exit-interrupt-checks)
(LAP ,@(clear-map!)
- (LEA Q (R ,rcx) (@PCR ,label))
+ (LEA Q (R ,rbx) (@PCR ,label))
(MOV Q (R ,rdx) (&U ,number-pushed))
,@(invoke-interface code:compiler-lexpr-apply)))
;; It expects the procedure at the top of the stack
(expect-no-exit-interrupt-checks)
(LAP ,@(clear-map!)
- (POP Q (R ,rcx))
- (AND Q (R ,rcx) (R ,regnum:datum-mask)) ; clear type code
+ (POP Q (R ,rbx))
+ (AND Q (R ,rbx) (R ,regnum:datum-mask)) ; clear type code
(MOV Q (R ,rdx) (&U ,number-pushed))
,@(invoke-interface code:compiler-lexpr-apply)))
continuation
(expect-no-exit-interrupt-checks)
(let* ((set-extension
- (interpreter-call-argument->machine-register! extension rcx))
+ (interpreter-call-argument->machine-register! extension rbx))
(set-address
(begin (require-register! rdx)
(load-pc-relative-address (INST-EA (R ,rdx))
(LAP ,@set-extension
,@set-address
,@(clear-map!)
- (MOV Q (R ,rbx) (&U ,frame-size))
+ (MOV Q (R ,rcx) (&U ,frame-size))
,@(invoke-interface code:compiler-cache-reference-apply))))
(define-rule statement
continuation
(expect-no-entry-interrupt-checks)
(let* ((set-environment
- (interpreter-call-argument->machine-register! environment rcx))
+ (interpreter-call-argument->machine-register! environment rbx))
(set-name (object->machine-register! name rdx)))
(delete-dead-registers!)
(LAP ,@set-environment
,@set-name
,@(clear-map!)
- (MOV Q (R ,rbx) (&U ,frame-size))
+ (MOV Q (R ,rcx) (&U ,frame-size))
,@(invoke-interface code:compiler-lookup-apply))))
\f
(define-rule statement
continuation ; ignored
(if (eq? primitive compiled-error-procedure)
(LAP ,@(clear-map!)
- (MOV Q (R ,rcx) (&U ,frame-size))
+ (MOV Q (R ,rbx) (&U ,frame-size))
,@(invoke-hook entry:compiler-error))
- (LAP ,@(object->machine-register! primitive rcx)
+ (LAP ,@(object->machine-register! primitive rbx)
,@(clear-map!)
,@(let ((arity (primitive-procedure-arity primitive)))
(cond ((not (negative? arity))
;;; This is invoked by the top level of the LAP generator.
(define (generate/quotation-header environment-label free-ref-label n-sections)
- (LAP (MOV Q (R ,rcx) ,reg:environment)
- (MOV Q (@PCR ,environment-label) (R ,rcx))
+ (LAP (MOV Q (R ,rax) ,reg:environment)
+ (MOV Q (@PCR ,environment-label) (R ,rax))
(LEA Q (R ,rdx) (@PCR ,*block-label*))
- (LEA Q (R ,rbx) (@PCR ,free-ref-label))
- (MOV Q ,reg:utility-arg-4 (&U ,n-sections))
+ (LEA Q (R ,rcx) (@PCR ,free-ref-label))
+ (MOV Q (R ,r8) (&U ,n-sections))
#|
,@(invoke-interface/call code:compiler-link)
|#
n-sections)
(LAP (MOV Q (R ,rdx) (@PCR ,code-block-label))
(AND Q (R ,rdx) (R ,regnum:datum-mask))
- (LEA Q (R ,rbx) (@RO ,rdx ,free-ref-offset))
- (MOV Q (R ,rcx) ,reg:environment)
- (MOV Q (@RO ,rdx ,environment-offset) (R ,rcx))
- (MOV Q ,reg:utility-arg-4 (&U ,n-sections))
+ (LEA Q (R ,rcx) (@RO ,rdx ,free-ref-offset))
+ (MOV Q (R ,rax) ,reg:environment)
+ (MOV Q (@RO ,rdx ,environment-offset) (R ,rax))
+ (MOV Q (R ,r8) (&U ,n-sections))
#|
,@(invoke-interface/call code:compiler-link)
|#
(PUSH Q (& 0))
(LABEL ,loop)
;; Get index
- (MOV Q (R ,rcx) (@R ,rsp))
+ (MOV Q (R ,rax) (@R ,rsp))
;; Get vector
(MOV Q (R ,rdx) (@PCR ,vector-label))
;; Get n-sections for this cc-block
- (XOR Q (R ,rbx) (R ,rbx))
- (LEA Q (R ,rax) (@PCR ,bytes))
- (MOV B (R ,rbx) (@RI ,rax ,rcx 1))
+ (XOR Q (R ,r8) (R ,r8))
+ (LEA Q (R ,rcx) (@PCR ,bytes))
+ (MOV B (R ,r8) (@RI ,rcx ,rax 1))
;; address of vector
(AND Q (R ,rdx) (R ,regnum:datum-mask))
- ;; Store n-sections in arg
- (MOV Q ,reg:utility-arg-4 (R ,rbx))
;; vector-ref -> cc block
(MOV Q
(R ,rdx)
(@ROI ,rdx ,address-units-per-object
- ,rcx ,address-units-per-object))
+ ,rax ,address-units-per-object))
;; address of cc-block
(AND Q (R ,rdx) (R ,regnum:datum-mask))
;; cc-block length
- (MOV Q (R ,rbx) (@R ,rdx))
+ (MOV Q (R ,rcx) (@R ,rdx))
;; Get environment
- (MOV Q (R ,rcx) ,reg:environment)
+ (MOV Q (R ,rax) ,reg:environment)
;; Eliminate length tags
- (AND Q (R ,rbx) (R ,regnum:datum-mask))
+ (AND Q (R ,rcx) (R ,regnum:datum-mask))
;; Store environment
- (MOV Q (@RI ,rdx ,rbx ,address-units-per-object) (R ,rcx))
+ (MOV Q (@RI ,rdx ,rcx ,address-units-per-object) (R ,rax))
;; Get NMV header
- (MOV Q (R ,rcx) (@RO ,rdx ,address-units-per-object))
+ (MOV Q (R ,rax) (@RO ,rdx ,address-units-per-object))
;; Eliminate NMV tag
- (AND Q (R ,rcx) (R ,regnum:datum-mask))
+ (AND Q (R ,rax) (R ,regnum:datum-mask))
;; Address of first free reference
(LEA Q
- (R ,rbx)
+ (R ,rcx)
(@ROI ,rdx ,(* 2 address-units-per-object)
- ,rcx ,address-units-per-object))
+ ,rax ,address-units-per-object))
;; Invoke linker
,@(invoke-hook/call entry:compiler-link)
,@(make-external-label (continuation-code-word false)
### them, they contain garbage.
###
### Compiled Scheme code uses the following register convention:
-### - %rsp containts the Scheme stack pointer, not the C stack
+### - %rsp contains the Scheme stack pointer, not the C stack
### pointer.
### - %rsi contains a pointer to the Scheme interpreter's "register"
### block. This block contains the compiler's copy of MemTop,
### TYPE_CODE_LENGTH
### Normally defined to be 6. Don't change this unless you know
### what you're doing.
-### DISABLE_387
-### If defined, do not generate 387 floating-point instructions.
### VALGRIND_MODE
### If defined, modify code to make it work with valgrind.
\f
`define(IF_WIN32,`$1')',
`define(IF_WIN32,`')')
-ifdef(`DISABLE_387',
- `define(IF387,`')',
- `define(IF387,`$1')')
-
-ifdef(`DISABLE_387',
- `define(IFN387,`$1')',
- `define(IFN387,`')')
-
IF_WIN32(`define(DASM,1)')
ifdef(`WCC386R',`define(WCC386,1)')
define(REGBLOCK_VAL,16)
define(REGBLOCK_COMPILER_TEMP,32)
-define(REGBLOCK_LEXPR_ACTUALS,56)
-define(REGBLOCK_PRIMITIVE,64)
-define(REGBLOCK_CLOSURE_FREE,72)
-
define(REGBLOCK_DLINK,REGBLOCK_COMPILER_TEMP)
-define(REGBLOCK_UTILITY_ARG4,REGBLOCK_CLOSURE_FREE)
-
-define(COMPILER_REGBLOCK_N_FIXED,16)
-define(COMPILER_REGBLOCK_N_HOOKS,80)
-define(COMPILER_REGBLOCK_N_TEMPS,256)
-define(COMPILER_FIXED_SIZE,1)
-define(COMPILER_HOOK_SIZE,1)
-define(COMPILER_TEMP_SIZE,2)
-define(REGBLOCK_SIZE_IN_OBJECTS,
- eval((COMPILER_REGBLOCK_N_FIXED*COMPILER_FIXED_SIZE)
- +(COMPILER_REGBLOCK_N_HOOKS*COMPILER_HOOK_SIZE)
- +(COMPILER_REGBLOCK_N_TEMPS*COMPILER_TEMP_SIZE)))
# Define the floating-point processor control word. Always set
# round-to-even and double precision. Under Win32, mask all
ifdef(`WIN32',`
use_external_data(EVR(RegistersPtr))
',`
-define_data(Regstart)
-allocate_space(Regstart,256)
-
-define_data(Registers)
-allocate_space(Registers,eval(REGBLOCK_SIZE_IN_OBJECTS*8))
+use_external_data(EVR(Registers))
')
-define_data(i387_presence)
-allocate_quadword(i387_presence)
-
define_data(C_Stack_Pointer)
allocate_quadword(C_Stack_Pointer)
define_data(C_Frame_Pointer)
allocate_quadword(C_Frame_Pointer)
-
-# [TRC 20091025: CPUID is always supported.]
-# define_data(x86_64_cpuid_supported)
-# allocate_quadword(x86_64_cpuid_supported)
-
-# [TRC 20091025: The cache synchronization bug does not occur in any
-# x86-64 machines of which I am aware.]
-# define_data(x86_64_cpuid_needed)
-# allocate_quadword(x86_64_cpuid_needed)
\f
DECLARE_CODE_SEGMENT()
declare_alignment(2)
-# [TRC 20091025: We need to check for MMX/SSEn instructions too.]
-
-define_c_label(x86_64_interface_initialize)
- OP(push,q) REG(rbp)
- OP(mov,q) TW(REG(rsp),REG(rbp))
- OP(xor,q) TW(REG(rax),REG(rax)) # No 387 available
-
-# [TRC 20091025: The AMD64 reference manual suggests using the CPUID
-# instruction to detect instruction subsets instead.]
-
-# Unfortunately, the `movl cr0,ecx' instruction is privileged.
-# Use the deprecated `smsw cx' instruction instead.
-
-IF387(`
-# OP(mov,q) TW(REG(cr0),REG(rcx)) # Test for 387 presence
-ifdef(`VALGRIND_MODE',`',`
- smsw REG(cx)
- OP(mov,q) TW(IMM(HEX(12)),REG(rdx))
- OP(and,q) TW(REG(rdx),REG(rcx))
- OP(cmp,q) TW(REG(rdx),REG(rcx))
- jne x86_64_initialize_no_fp
-')
- OP(inc,q) REG(rax) # 387 available
- OP(sub,q) TW(IMM(8),REG(rsp))
- fclex
- fnstcw WOF(-2,REG(rbp))
- OP(and,w) TW(IMM(HEX(f0e0)),WOF(-2,REG(rbp)))
- OP(or,w) TW(IMM(FP_CONTROL_WORD),WOF(-2,REG(rbp)))
- fldcw WOF(-2,REG(rbp))
-x86_64_initialize_no_fp:
-')
- OP(mov,q) TW(REG(rax),ABS(EVR(i387_presence)))
-
-# [TRC 20091025: CPUID is always supported.]
-
-# Do a bunch of hair to determine if we need to do cache synchronization.
-# See if the CPUID instruction is supported.
-
-# OP(xor,q) TW(REG(rax),REG(rax))
-# OP(mov,q) TW(REG(rax),ABS(EVR(x86_64_cpuid_supported)))
-# OP(mov,q) TW(REG(rax),ABS(EVR(x86_64_cpuid_needed)))
-
-# First test: can we toggle the AC bit?
-
-# pushfd
-# OP(pop,l) REG(eax)
-# OP(mov,l) TW(REG(eax),REG(ecx))
-# OP(xor,l) TW(IMM(HEX(00040000)),REG(eax))
-# OP(push,l) REG(eax)
-# popfd
-# pushfd
-# OP(pop,l) REG(eax)
-
-# if AC bit can't be toggled, this is a 386 (and doesn't support CPUID).
-
-# OP(xor,l) TW(REG(ecx),REG(eax))
-# jz no_cpuid_instr
-# OP(push,l) REG(ecx) # restore EFLAGS
-# popfd
-
-# Now test to see if the ID bit can be toggled.
-
-# OP(mov,l) TW(REG(ecx),REG(eax))
-# OP(xor,l) TW(IMM(HEX(00200000)),REG(eax))
-# OP(push,l) REG(eax)
-# popfd
-# pushfd
-# OP(pop,l) REG(eax)
-
-# if ID bit can't be toggled, this is a 486 that doesn't support CPUID.
-
-# OP(xor,l) TW(REG(ecx),REG(eax))
-# jz no_cpuid_instr
-# OP(push,l) REG(ecx) # restore EFLAGS
-# popfd
-
-# Now we know that cpuid is supported.
-
-# OP(mov,q) TW(IMM(HEX(00000001)),ABS(EVR(x86_64_cpuid_supported)))
-
-# Next, use the CPUID instruction to determine the processor type.
-
-# OP(push,l) REG(ebx)
-# OP(xor,l) TW(REG(eax),REG(eax))
-# cpuid
-
-# Check that CPUID accepts argument 1.
-
-# OP(cmp,l) TW(IMM(HEX(00000001)),REG(eax))
-# jl done_setting_up_cpuid
-
-# Detect "GenuineIntel".
-
-# OP(cmp,l) TW(IMM(HEX(756e6547)),REG(ebx))
-# jne not_intel_cpu
-# OP(cmp,l) TW(IMM(HEX(49656e69)),REG(edx))
-# jne not_intel_cpu
-# OP(cmp,l) TW(IMM(HEX(6c65746e)),REG(ecx))
-# jne not_intel_cpu
-
-# For CPU families 4 (486), 5 (Pentium), or 6 (Pentium Pro, Pentium
-# II, Pentium III), don't use CPUID synchronization.
-
-# OP(mov,l) TW(IMM(HEX(01)),REG(eax))
-# cpuid
-# OP(shr,l) TW(IMM(HEX(08)),REG(eax))
-# OP(and,l) TW(IMM(HEX(0000000F)),REG(eax))
-# OP(cmp,l) TW(IMM(HEX(4)),REG(eax))
-# jl done_setting_up_cpuid
-# OP(cmp,l) TW(IMM(HEX(6)),REG(eax))
-# jg done_setting_up_cpuid
+# C_to_interface passes control from C into Scheme. To C it is a
+# unary procedure; its one argument is passed in rdi. It saves the
+# state of the C world (the C frame pointer and stack pointer) and
+# then passes control to interface_to_scheme to set up the state of
+# the Scheme world.
#
-# jmp cpuid_not_needed
-#
-#not_intel_cpu:
-
-# Detect "AuthenticAMD".
-
-# OP(cmp,l) TW(IMM(HEX(68747541)),REG(ebx))
-# jne not_amd_cpu
-# OP(cmp,l) TW(IMM(HEX(69746e65)),REG(edx))
-# jne not_amd_cpu
-# OP(cmp,l) TW(IMM(HEX(444d4163)),REG(ecx))
-# jne not_amd_cpu
-
-# Problem appears to exist only on Athlon models 1, 3, and 4.
-
-# OP(mov,l) TW(IMM(HEX(01)),REG(eax))
-# cpuid
-
-# OP(mov,l) TW(REG(eax),REG(ecx))
-# OP(shr,l) TW(IMM(HEX(08)),REG(eax))
-# OP(and,l) TW(IMM(HEX(0000000F)),REG(eax))
-# OP(cmp,l) TW(IMM(HEX(6)),REG(eax)) # family 6 = Athlon
-# jne done_setting_up_cpuid
-
-# OP(mov,l) TW(REG(ecx),REG(eax))
-# OP(shr,l) TW(IMM(HEX(04)),REG(eax))
-# OP(and,l) TW(IMM(HEX(0000000F)),REG(eax))
-# OP(cmp,l) TW(IMM(HEX(6)),REG(eax)) # model 6 and up OK
-# jge done_setting_up_cpuid
-# OP(cmp,l) TW(IMM(HEX(2)),REG(eax)) # model 2 OK
-# je done_setting_up_cpuid
-
-# OP(mov,l) TW(IMM(HEX(00000001)),ABS(EVR(x86_64_cpuid_needed)))
-
-#not_amd_cpu:
-#done_setting_up_cpuid:
-# OP(pop,l) REG(ebx)
-#no_cpuid_instr:
- leave
- ret
-
# Note: The AMD64 ABI mandates that on entry to a function, RSP - 8
# must be a multiple of 0x10; that is, the stack must be 128-bit
# aligned. We push six quadwords onto the stack, but there is already
define_hook_label(trampoline_to_interface)
define_debugging_label(trampoline_to_interface)
- OP(pop,q) REG(rcx) # trampoline storage
+ OP(pop,q) REG(rbx) # trampoline storage
jmp scheme_to_interface
define_hook_label(scheme_to_interface_call)
define_debugging_label(scheme_to_interface_call)
- OP(pop,q) REG(rcx) # arg1 = ret. add
- OP(add,q) TW(IMM(4),REG(rcx)) # Skip format info
+ OP(pop,q) REG(rbx) # arg1 = ret. add
+ OP(add,q) TW(IMM(4),REG(rbx)) # Skip format info
# jmp scheme_to_interface
\f
+# scheme_to_interface passes control from compiled Scheme code to a
+# microcode utility. The arguments for the utility go respectively in
+# rbx, rdx, rcx, and r8. This loosely matches the AMD64 calling
+# convention, where arguments go respectively in rdi, rsi, rdx, rcx,
+# and r8. The differences are that scheme_to_interface uses rdi as an
+# implicit first argument to the utility, and rsi is used in compiled
+# code for the registers block, since the compiler can't allocate it
+# as a general-purpose register because it doesn't admit byte-size
+# operations. Moreover, Scheme uses rdi as the free pointer register,
+# which we have to save here in a location unknown to Scheme (the C
+# `Free' variable), so it can't be set by compiled code.
+
define_hook_label(scheme_to_interface)
define_debugging_label(scheme_to_interface)
-
-# These two moves must happen _before_ the ffree instructions below.
-# Otherwise recovery from SIGFPE there will fail.
OP(mov,q) TW(REG(rsp),ABS(EVR(stack_pointer)))
OP(mov,q) TW(rfree,ABS(EVR(Free)))
-
-# [TRC 20091025: I think this should be excised.]
-
-IF387(`
- OP(cmp,q) TW(IMM(0),ABS(EVR(i387_presence)))
- je scheme_to_interface_proceed
- ffree ST(0) # Free floating "regs"
- ffree ST(1)
- ffree ST(2)
- ffree ST(3)
- ffree ST(4)
- ffree ST(5)
- ffree ST(6)
- ffree ST(7)
-scheme_to_interface_proceed:
-')
-
OP(mov,q) TW(ABS(EVR(C_Stack_Pointer)),REG(rsp))
OP(mov,q) TW(ABS(EVR(C_Frame_Pointer)),REG(rbp))
OP(sub,q) TW(IMM(16),REG(rsp)) # alloc struct return
-
- # Shuffle Scheme -> AMD64 calling conventions:
- # struct pointer -> rdi
- # rcx -> rsi
- # rdx -> rdx
- # rbx -> rcx
- # arg4 -> r8
- # Parallel assignment problems:
- # arg4 depends on rsi: do arg4->r8 first
- # target depends on rcx (why?): use r11 as a temporary
- # [TRC 20091025: Perhaps we can rearrange LIAR to generate
- # arguments in the registers we want, to avoid this
- # shuffling.]
-
- OP(mov,q) TW(REG(rcx),REG(r11))
-
- OP(xor,q) TW(REG(rcx),REG(rcx))
- OP(mov,b) TW(REG(al),REG(cl))
- OP(lea,q) TW(ABS(EVR(utility_table)),REG(rax))
- OP(mov,q) TW(SDX(,REG(rax),REG(rcx),8),REG(rax))
-
- OP(mov,q) TW(REG(rsp),REG(rdi))
- OP(mov,q) TW(DOF(REGBLOCK_UTILITY_ARG4(),regs),REG(r8))
- OP(mov,q) TW(REG(r11),REG(rsi))
- OP(mov,q) TW(REG(rbx),REG(rcx))
+ OP(mov,q) TW(REG(rsp),REG(rdi)) # Structure is first argument.
+ OP(mov,q) TW(REG(rbx),REG(rsi)) # rbx -> second argument.
+
+ # Find the utility. rbx is now free as a temporary register
+ # to hold the utility table. rax initially stores the utility
+ # number in its low eight bits and possibly garbage in the
+ # rest; mask it off and then use it as an index into the
+ # utility table, scaled by 8 (bytes per pointer).
+ OP(lea,q) TW(ABS(EVR(utility_table)),REG(rbx))
+ OP(and,q) TW(IMM(HEX(ff)),REG(rax))
+ OP(mov,q) TW(SDX(,REG(rbx),REG(rax),8),REG(rax))
call IJMP(REG(rax))
jmp IJMP(REG(rax)) # Invoke handler
define_c_label(interface_to_scheme)
-IF387(`
- OP(cmp,q) TW(IMM(0),ABS(EVR(i387_presence)))
- je interface_to_scheme_proceed
- ffree ST(0) # Free floating "regs"
- ffree ST(1)
- ffree ST(2)
- ffree ST(3)
- ffree ST(4)
- ffree ST(5)
- ffree ST(6)
- ffree ST(7)
-interface_to_scheme_proceed:
-')
- # Register block = %rsi
- # Scheme offset in NT
-ifdef(`WIN32',
+ifdef(`WIN32', # Register block = %rsi
` OP(mov,q) TW(ABS(EVR(RegistersPtr)),regs)',
` OP(lea,q) TW(ABS(EVR(Registers)),regs)')
-
OP(mov,q) TW(ABS(EVR(Free)),rfree) # Free pointer = %rdi
OP(mov,q) TW(DOF(REGBLOCK_VAL(),regs),REG(rax)) # Value/dynamic link
OP(mov,q) TW(IMM(ADDRESS_MASK),rmask) # = %rbp
-
OP(mov,q) TW(ABS(EVR(stack_pointer)),REG(rsp))
OP(mov,q) TW(REG(rax),REG(rcx)) # Preserve if used
OP(and,q) TW(rmask,REG(rcx)) # Restore potential dynamic link
')
define_c_label(interface_to_C)
-IF387(`
- OP(cmp,q) TW(IMM(0),ABS(EVR(i387_presence)))
- je interface_to_C_proceed
- ffree ST(0) # Free floating "regs"
- ffree ST(1)
- ffree ST(2)
- ffree ST(3)
- ffree ST(4)
- ffree ST(5)
- ffree ST(6)
- ffree ST(7)
-interface_to_C_proceed:')
-
OP(mov,q) TW(REG(rdx),REG(rax)) # Set up result
# We need a dummy register for the POP (which is three bytes
# shorter than ADD $8,RSP); since we're about to pop into r15
leave
ret
\f
-# [TRC 20091025: The cache synchronization bug does not occur in any
-# x86-64 machines of which I am aware.]
-
-#define_code_label(EFR(x86_64_cache_synchronize))
-# OP(push,q) REG(rbp)
-# OP(mov,q) TW(REG(rsp),REG(rbp))
-# OP(push,q) REG(rbx)
-# OP(xor,q) TW(REG(rax),REG(rax))
-# cpuid
-# OP(pop,q) REG(rbx)
-# leave
-# ret
-
-### Run the CPUID instruction for serialization.
-
-#define_hook_label(serialize_cache)
-# pushad
-# OP(xor,q) TW(REG(rax),REG(rax))
-# cpuid
-# popad
-# ret
-
-### Stub to be used in place of above on machines that don't need it.
-
-#define_hook_label(dont_serialize_cache)
-# ret
-\f
### Assembly language hooks used to reduce code size.
### There is no time advantage to using these over using
### scheme_to_interface (or scheme_to_interface_call), but the
OP(mov,b) TW(IMM(HEX(19)),REG(al))
jmp scheme_to_interface_call
-###
-### This saves even more instructions than primitive_apply
-### When the PC is not available. Instead of jumping here,
-### a call instruction is used, and the longword offset to
-### the primitive object follows the call instruction.
-### This code loads the primitive object and merges with
-### apply_primitive
-###
-### [TRC 20091025: But on the x86-64, we have RIP-relative
-### addressing, so we don't need this.]
-###
-
-#declare_alignment(2)
-#define_hook_label(short_primitive_apply)
-# OP(pop,l) REG(edx) # offset pointer
-# OP(mov,l) TW(IND(REG(edx)),REG(ecx)) # offset
-# # Primitive object
-# OP(mov,l) TW(IDX(REG(edx),REG(ecx)),REG(ecx))
-# # Merge
-# jmp hook_reference(primitive_apply)
-
declare_alignment(2)
define_jump_indirection(primitive_apply,12)
declare_alignment(2)
define_hook_label(sc_apply)
- OP(mov,q) TW(REG(rcx),REG(rax)) # Copy for type code
- OP(mov,q) TW(REG(rcx),REG(rbx)) # Copy for address
+ OP(mov,q) TW(REG(rbx),REG(rax)) # Copy for type code
+ OP(mov,q) TW(REG(rbx),REG(rcx)) # Copy for address
OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) # Select type code
- OP(and,q) TW(rmask,REG(rbx)) # Select datum
+ OP(and,q) TW(rmask,REG(rcx)) # Select datum
OP(cmp,b) TW(IMM(TC_COMPILED_ENTRY),REG(al))
jne asm_sc_apply_generic
- # [TRC 20091025: How big are the frame sizes?]
- OP(movs,bq,x) TW(BOF(-4,REG(rbx)),REG(rax)) # Extract frame size
+ OP(movs,bq,x) TW(BOF(-4,REG(rcx)),REG(rax)) # Extract frame size
OP(cmp,q) TW(REG(rax),REG(rdx)) # Compare to nargs+1
jne asm_sc_apply_generic
- jmp IJMP(REG(rbx)) # Invoke
+ jmp IJMP(REG(rcx)) # Invoke
define_debugging_label(asm_sc_apply_generic)
OP(mov,q) TW(IMM(HEX(14)),REG(rax))
define(define_apply_fixed_size,
`declare_alignment(2)
define_hook_label(sc_apply_size_$1)
- OP(mov,q) TW(REG(rcx),REG(rax)) # Copy for type code
- OP(mov,q) TW(REG(rcx),REG(rbx)) # Copy for address
+ OP(mov,q) TW(REG(rbx),REG(rax)) # Copy for type code
+ OP(mov,q) TW(REG(rbx),REG(rcx)) # Copy for address
OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) # Select type code
- OP(and,q) TW(rmask,REG(rbx)) # Select datum
+ OP(and,q) TW(rmask,REG(rcx)) # Select datum
OP(cmp,b) TW(IMM(TC_COMPILED_ENTRY),REG(al))
jne asm_sc_apply_generic_$1
- # [TRC 20091025: How big are the frame sizes?]
- OP(cmp,b) TW(IMM($1),BOF(-4,REG(rbx))) # Compare frame size
- jne asm_sc_apply_generic_$1 # to nargs+1
- jmp IJMP(REG(rbx))
+ OP(cmp,b) TW(IMM($1),BOF(-4,REG(rcx))) # Compare frame size
+ jne asm_sc_apply_generic_$1 # to nargs+1
+ jmp IJMP(REG(rcx))
asm_sc_apply_generic_$1:
OP(mov,q) TW(IMM($1),REG(rdx))
### numeric types are much faster than the rare ones
### (bignums, ratnums, recnums)
-IF387(`declare_alignment(2)
-asm_generic_flonum_result:
- # The MOV instruction can take a 64-bit immediate operand only
- # if the target is a register, so we store the manifest in rax
- # before moving it to memory.
- OP(mov,q) TW(IMM_MANIFEST_NM_VECTOR_1,REG(rax))
- OP(mov,q) TW(REG(rax), IND(rfree))
- # The OR instruction cannot take a 64-bit immediate either, so
- # we need to store the tag in rax first, shift it up, and then
- # OR the datum into it.
- OP(mov,q) TW(IMM(TC_FLONUM),REG(rax))
- OP(shl,q) TW(IMM(DATUM_LENGTH),REG(rax))
- OP(or,q) TW(rfree,REG(rax))
- OP(fstp,l) DOF(8,rfree) # fstpd
- OP(and,q) TW(rmask,IND(REG(rsp)))
- OP(add,q) TW(IMM(16),rfree)
- OP(mov,q) TW(REG(rax),DOF(REGBLOCK_VAL(),regs))
- ret
-
declare_alignment(2)
asm_generic_fixnum_result:
OP(and,q) TW(rmask,IND(REG(rsp)))
OP(and,q) TW(rmask,IND(REG(rsp)))
OP(mov,q) TW(IMM_FALSE,REG(rax))
OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
- ret')
+ ret
\f
define(define_unary_operation,
`declare_alignment(2)
OP(mov,q) TW(REG(rdx),REG(rax))
OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax))
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
- je asm_generic_$1_fix
- OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
jne asm_generic_$1_fail
- OP(and,q) TW(rmask,REG(rdx))
- fld1
- OP($4,l) DOF(8,REG(rdx))
- jmp asm_generic_flonum_result
asm_generic_$1_fix:
OP(mov,q) TW(REG(rdx),REG(rax))
OP(mov,q) TW(REG(rdx),REG(rax))
OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax))
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
- je asm_generic_$1_fix
- OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
jne asm_generic_$1_fail
- OP(and,q) TW(rmask,REG(rdx))
- OP(fld,l) DOF(8,REG(rdx))
- ftst
- fstsw REG(ax)
- fstp ST(0)
- sahf
- $4 asm_generic_return_sharp_t
- jmp asm_generic_return_sharp_f
asm_generic_$1_fix:
OP(mov,q) TW(REG(rdx),REG(rax))
jmp scheme_to_interface')
\f
define(define_binary_operation,
+`define_binary_operation_with_fixup($1,$2,$3,
+ `OP(shl,q) TW(IMM(TC_LENGTH),REG(rax))')')
+
+define(define_binary_operation_with_fixup,
`declare_alignment(2)
define_hook_label(generic_$1)
OP(pop,q) REG(rdx)
OP(cmp,b) TW(REG(al),REG(cl))
jne asm_generic_$1_fail
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
- je asm_generic_$1_fix
- OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
- je asm_generic_$1_flo
-
-asm_generic_$1_fail:
- OP(push,q) REG(rbx)
- OP(push,q) REG(rdx)
- OP(mov,b) TW(IMM(HEX($2)),REG(al))
- jmp scheme_to_interface
+ jne asm_generic_$1_fail
asm_generic_$1_fix:
OP(mov,q) TW(REG(rdx),REG(rax))
OP(mov,q) TW(REG(rbx),REG(rcx))
- OP(shl,q) TW(IMM(TC_LENGTH),REG(rax))
+ $4 # Set up rax.
OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx))
- $5
OP($3,q) TW(REG(rcx),REG(rax)) # subq
- jo asm_generic_$1_fail
- jmp asm_generic_fixnum_result
-
-asm_generic_$1_flo:
- OP(and,q) TW(rmask,REG(rdx))
- OP(and,q) TW(rmask,REG(rbx))
- OP(fld,l) DOF(8,REG(rdx)) # fldd
- OP($4,l) DOF(8,REG(rbx)) # fsubl
- jmp asm_generic_flonum_result')
-\f
-IF387(`declare_alignment(2)
-define_hook_label(generic_divide)
- OP(pop,q) REG(rdx)
- OP(pop,q) REG(rbx)
- OP(mov,q) TW(REG(rdx),REG(rax))
- OP(mov,q) TW(REG(rbx),REG(rcx))
- OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax))
- OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rcx))
- OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
- je asm_generic_divide_fix
- OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
- jne asm_generic_divide_fail
- OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
- je asm_generic_divide_flo_flo
- OP(cmp,b) TW(IMM(TC_FIXNUM),REG(cl))
- jne asm_generic_divide_fail
- OP(mov,q) TW(REG(rbx),REG(rcx))
- OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx))
- je asm_generic_divide_fail
- OP(and,q) TW(rmask,REG(rdx))
- OP(sar,q) TW(IMM(TC_LENGTH),REG(rcx))
- OP(fld,l) DOF(8,REG(rdx)) # fldd
- OP(mov,q) TW(REG(rcx),IND(rfree))
- OP(fidiv,l) IND(rfree)
- jmp asm_generic_flonum_result
-
-asm_generic_divide_fix:
- OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
- jne asm_generic_divide_fail
- OP(mov,q) TW(REG(rdx),REG(rcx))
- OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx))
- je asm_generic_divide_fail
- OP(and,q) TW(rmask,REG(rbx))
- OP(sar,q) TW(IMM(TC_LENGTH),REG(rcx))
- OP(fld,l) DOF(8,REG(rbx)) # fldd
- OP(mov,q) TW(REG(rcx),IND(rfree))
- OP(fidivr,l) IND(rfree)
- jmp asm_generic_flonum_result
-
-asm_generic_divide_flo_flo:
- OP(mov,q) TW(REG(rbx),REG(rcx))
- OP(and,q) TW(rmask,REG(rcx))
- OP(fld,l) DOF(8,REG(rcx)) # fldd
- ftst
- fstsw REG(ax)
- sahf
- je asm_generic_divide_by_zero
- OP(and,q) TW(rmask,REG(rdx))
- OP(fdivr,l) DOF(8,REG(rdx))
- jmp asm_generic_flonum_result
-
-asm_generic_divide_by_zero:
- fstp ST(0) # Pop second arg
-
-asm_generic_divide_fail:
+ jno asm_generic_fixnum_result
+
+asm_generic_$1_fail:
OP(push,q) REG(rbx)
OP(push,q) REG(rdx)
- OP(mov,b) TW(IMM(HEX(23)),REG(al))
+ OP(mov,b) TW(IMM(HEX($2)),REG(al))
jmp scheme_to_interface')
-\f
+
define(define_binary_predicate,
`declare_alignment(2)
define_hook_label(generic_$1)
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
jne asm_generic_$1_fail
+asm_generic_$1_fix:
OP(shl,q) TW(IMM(TC_LENGTH),REG(rdx))
OP(shl,q) TW(IMM(TC_LENGTH),REG(rbx))
OP(cmp,q) TW(REG(rbx),REG(rdx))
OP(mov,b) TW(IMM(HEX($2)),REG(al))
jmp scheme_to_interface')
\f
-IF387(`define_unary_operation(decrement,22,sub,fsubr)
-define_unary_operation(increment,26,add,fadd)
-
-define_unary_predicate(negative,2a,jl,jb)
-define_unary_predicate(positive,2c,jg,ja)
-define_unary_predicate(zero,2d,je,je)
-
-# define_binary_operation(name,index,fix*fix,flo*flo, fixup)
-# define_binary_operation( $1, $2, $3, $4, $5)
-# The fixup is optional; only multiplication needs it to shift the
-# result back down by six bits.
-define_binary_operation(add,2b,add,fadd)
-define_binary_operation(subtract,28,sub,fsub)
-define_binary_operation(multiply,29,imul,fmul,
- `OP(shr,q) TW(IMM(6),REG(rax))')
-# Divide needs to check for 0, so we cant really use the following
-# define_binary_operation(divide,23,NONE,fdiv)
-
-# define_binary_predicate(name,index,fix*fix,flo*flo)
-define_binary_predicate(equal,24,je,je)
-define_binary_predicate(greater,25,jg,ja)
-define_binary_predicate(less,27,jl,jb)')
-
-IFN387(`define_jump_indirection(generic_decrement,22)
+#define_unary_operation(decrement,22,sub)
+#define_unary_operation(increment,26,add)
+
+#define_unary_predicate(negative,2a,jl)
+#define_unary_predicate(positive,2c,jg)
+#define_unary_predicate(zero,2d,je)
+
+# define_binary_operation(name,index,op)
+# define_binary_operation( $1, $2,$3)
+#define_binary_operation(add,2b,add)
+#define_binary_operation(subtract,28,sub)
+
+# No fixup -- leave it unshifted.
+#define_binary_operation_with_fixup(multiply,29,imul)
+
+# define_binary_predicate(name,index,jcc)
+#define_binary_predicate(equal,24,je)
+#define_binary_predicate(greater,25,jg)
+#define_binary_predicate(less,27,jl)
+
+# At the moment, there is no advantage to using the above code, and in
+# fact using it is a waste, since the compiler open-codes the fixnum
+# case already. Later, the above code will also handle floating-point
+# arguments, which the compiler does not open-code.
+
+define_jump_indirection(generic_decrement,22)
define_jump_indirection(generic_divide,23)
define_jump_indirection(generic_equal,24)
define_jump_indirection(generic_greater,25)
define_jump_indirection(generic_negative,2a)
define_jump_indirection(generic_add,2b)
define_jump_indirection(generic_positive,2c)
-define_jump_indirection(generic_zero,2d)')
-
-# These don't currently differ according to whether there
-# is a 387 or not.
-
+define_jump_indirection(generic_zero,2d)
define_jump_indirection(generic_quotient,37)
define_jump_indirection(generic_remainder,38)
define_jump_indirection(generic_modulo,39)
-
-define_jump_indirection(nofp_decrement,22)
-define_jump_indirection(nofp_divide,23)
-define_jump_indirection(nofp_equal,24)
-define_jump_indirection(nofp_greater,25)
-define_jump_indirection(nofp_increment,26)
-define_jump_indirection(nofp_less,27)
-define_jump_indirection(nofp_subtract,28)
-define_jump_indirection(nofp_multiply,29)
-define_jump_indirection(nofp_negative,2a)
-define_jump_indirection(nofp_add,2b)
-define_jump_indirection(nofp_positive,2c)
-define_jump_indirection(nofp_zero,2d)
-define_jump_indirection(nofp_quotient,37)
-define_jump_indirection(nofp_remainder,38)
-define_jump_indirection(nofp_modulo,39)
\f
IFDASM(`end')