From 5036bb89f29a985b0341777afdf17497c3b46f0f Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sun, 30 Dec 2018 21:28:19 +0000 Subject: [PATCH] Convert x86-64 to use rax as value register. WARNING: This changes the amd64 compiled code interface so that new compiled code requires a new microcode and vice versa. --- src/compiler/machines/x86-64/lapgen.scm | 4 +- src/compiler/machines/x86-64/machin.scm | 14 ++--- src/microcode/cmpauxmd/x86-64.m4 | 72 ++++++++++++++----------- src/microcode/cmpintmd/x86-64.c | 13 ++--- src/microcode/cmpintmd/x86-64.h | 9 ++-- 5 files changed, 64 insertions(+), 48 deletions(-) diff --git a/src/compiler/machines/x86-64/lapgen.scm b/src/compiler/machines/x86-64/lapgen.scm index 9d24fe7da..653721a87 100644 --- a/src/compiler/machines/x86-64/lapgen.scm +++ b/src/compiler/machines/x86-64/lapgen.scm @@ -727,11 +727,11 @@ USA. (LABEL ,hook-context)))) (define-integrable (invoke-interface code) - (LAP (MOV B (R ,rax) (& ,code)) + (LAP (MOV B (R ,r9) (& ,code)) ,@(invoke-hook entry:compiler-scheme-to-interface))) (define-integrable (invoke-interface/call code) - (LAP (MOV B (R ,rax) (& ,code)) + (LAP (MOV B (R ,r9) (& ,code)) ,@(invoke-hook/call entry:compiler-scheme-to-interface/call))) (define-syntax define-entries diff --git a/src/compiler/machines/x86-64/machin.scm b/src/compiler/machines/x86-64/machin.scm index 08c3bf1b9..35e9bfbfd 100644 --- a/src/compiler/machines/x86-64/machin.scm +++ b/src/compiler/machines/x86-64/machin.scm @@ -197,6 +197,7 @@ USA. (define-integrable regnum:datum-mask rbp) (define-integrable regnum:regs-pointer rsi) (define-integrable regnum:free-pointer rdi) +(define-integrable regnum:value rax) (define-integrable (machine-register-known-value register) register ; ignored @@ -281,12 +282,13 @@ USA. (and (rtl:machine-constant? offset) (= (rtl:machine-constant-value offset) offset-value))))) - -(define-integrable (interpreter-value-register) - (interpreter-block-register register-block/value-offset)) + +(define (interpreter-value-register) + (rtl:make-machine-register regnum:value)) (define (interpreter-value-register? expression) - (interpreter-block-register? expression register-block/value-offset)) + (and (rtl:register? expression) + (= (rtl:register-number expression) regnum:value))) (define (interpreter-environment-register) (interpreter-block-register register-block/environment-offset)) @@ -325,10 +327,8 @@ USA. (case rtl-register ((STACK-POINTER) (interpreter-stack-pointer)) - #| ((VALUE) (interpreter-value-register)) - |# ((FREE) (interpreter-free-pointer)) ((INTERPRETER-CALL-RESULT:ACCESS) @@ -354,8 +354,10 @@ USA. register-block/int-mask-offset) ((STACK-GUARD) register-block/stack-guard-offset) + #| ((VALUE) register-block/value-offset) + |# ((ENVIRONMENT) register-block/environment-offset) ((DYNAMIC-LINK TEMPORARY) diff --git a/src/microcode/cmpauxmd/x86-64.m4 b/src/microcode/cmpauxmd/x86-64.m4 index ce5a808b1..27e8ed1c9 100644 --- a/src/microcode/cmpauxmd/x86-64.m4 +++ b/src/microcode/cmpauxmd/x86-64.m4 @@ -422,7 +422,7 @@ define_c_label(C_to_interface) define_hook_label(trampoline_to_interface) define_debugging_label(trampoline_to_interface) - OP(add,q) TW(IMM(16),REG(rcx)) # trampoline storage + OP(add,q) TW(IMM(24),REG(rcx)) # trampoline storage OP(mov,q) TW(REG(rcx),REG(rbx)) # argument in rbx jmp scheme_to_interface @@ -433,19 +433,31 @@ define_debugging_label(scheme_to_interface_call) # jmp scheme_to_interface # scheme_to_interface passes control from compiled Scheme code to a -# microcode utility. The arguments for the utility go respectively in -# rbx, rdx, rcx, and r8. This loosely matches the AMD64 calling -# convention, where arguments go respectively in rdi, rsi, rdx, rcx, -# and r8. The differences are that scheme_to_interface uses rdi as an -# implicit first argument to the utility, and rsi is used in compiled -# code for the registers block, since the compiler can't allocate it -# as a general-purpose register because it doesn't admit byte-size -# operations. Moreover, Scheme uses rdi as the free pointer register, -# which we have to save here in a location unknown to Scheme (the C -# `Free' variable), so it can't be set by compiled code. +# microcode utility. +# +# Arguments: +# +# rax return value +# r9b utility number (upper 56 bits of r9 are garbage) +# rbx first argument +# rdx second argument +# rcx third argument +# r8 fourth argument +# +# The microcode utility, a C function following the C amd64 calling +# convention, takes arguments in rdi (utility_result_t pointer), rsi +# (passed in as rbx), rdx, rcx, and r8. rsi is used by compiled code +# for the registers block, and rdi is used by compiled code for the +# free pointer register, which we have to save here to a location +# unknown to Scheme (the C `Free' variable). +# +# XXX Consider swapping the utility number and first argument, to save +# a byte in the instruction when selecting the utility number in +# trampolines, which saves a word per trampoline entry. define_hook_label(scheme_to_interface) define_debugging_label(scheme_to_interface) + OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs)) # Save value OP(mov,q) TW(REG(rsp),ABS(EVR(stack_pointer))) OP(mov,q) TW(rfree,ABS(EVR(Free))) OP(mov,q) TW(ABS(EVR(C_Stack_Pointer)),REG(rsp)) @@ -460,13 +472,13 @@ define_debugging_label(scheme_to_interface) OP(mov,q) TW(REG(rbx),REG(rsi)) # rbx -> second argument. # Find the utility. rbx is now free as a temporary register - # to hold the utility table. rax initially stores the utility + # to hold the utility table. r9 initially stores the utility # number in its low eight bits and possibly garbage in the # rest; mask it off and then use it as an index into the # utility table, scaled by 8 (bytes per pointer). OP(lea,q) TW(ABS(EVR(utility_table)),REG(rbx)) - OP(and,q) TW(IMM(HEX(ff)),REG(rax)) - OP(mov,q) TW(SDX(,REG(rbx),REG(rax),8),REG(rax)) + OP(and,q) TW(IMM(HEX(ff)),REG(r9)) + OP(mov,q) TW(SDX(,REG(rbx),REG(r9),8),REG(rax)) call IJMP(REG(rax)) @@ -531,12 +543,12 @@ define_c_label(interface_to_C) define(define_jump_indirection, `define_hook_label($1) - OP(mov,b) TW(IMM(HEX($2)),REG(al)) + OP(mov,b) TW(IMM(HEX($2)),REG(r9b)) jmp scheme_to_interface') define(define_call_indirection, `define_hook_label($1) - OP(mov,b) TW(IMM(HEX($2)),REG(al)) + OP(mov,b) TW(IMM(HEX($2)),REG(r9b)) jmp scheme_to_interface_call') define_call_indirection(interrupt_procedure,1a) @@ -546,7 +558,7 @@ define_jump_indirection(interrupt_continuation_2,3b) define_hook_label(interrupt_dlink) OP(mov,q) TW(QOF(REGBLOCK_DLINK(),regs),REG(rdx)) - OP(mov,b) TW(IMM(HEX(19)),REG(al)) + OP(mov,b) TW(IMM(HEX(19)),REG(r9b)) jmp scheme_to_interface_call declare_alignment(2) @@ -567,7 +579,7 @@ define_call_indirection(primitive_error,36) # define(define_apply_fixed_size, # `define_hook_label(sc_apply_size_$1) # OP(mov,q) TW(IMM($1),REG(rdx)) -# OP(mov,b) TW(IMM(HEX(14)),REG(al)) +# OP(mov,b) TW(IMM(HEX(14)),REG(r9b)) # jmp scheme_to_interface') # Stack has untagged return address, then tagged entry, rdx has @@ -575,7 +587,8 @@ define_call_indirection(primitive_error,36) # arity, set condition codes for equal, store the untagged entry # address in rcx, and store the PC in rax. Otherwise, set condition # codes for not-equal, and leave the stack alone. Either way, pop and -# return. +# return. Uses r9 as a temporary register. (rbx, rax, rcx, and rdx +# are all in use already.) declare_alignment(2) define_hook_label(apply_setup) OP(mov,q) TW(REG(rbx),REG(rax)) # Copy for type code @@ -591,8 +604,8 @@ define_hook_label(apply_setup) OP(add,q) TW(REG(rcx),REG(rax)) # rax := PC # Now check the frame size. The caller will test the flags # again for another conditional jump. - OP(movs,bq,x) TW(BOF(-4,REG(rcx)),REG(rax)) # Extract frame size - OP(cmp,q) TW(REG(rax),REG(rdx)) # Compare to nargs+1 + OP(movs,bq,x) TW(BOF(-4,REG(rcx)),REG(r9)) # Extract frame size + OP(cmp,q) TW(REG(r9),REG(rdx)) # Compare to nargs+1 jne asm_apply_setup_fail ret @@ -643,8 +656,8 @@ define_hook_label(sc_apply) jmp IJMP(REG(rax)) # Invoke entry define_debugging_label(asm_sc_apply_generic) - OP(mov,q) TW(IMM(HEX(14)),REG(rax)) - jmp scheme_to_interface + OP(mov,b) TW(IMM(HEX(14)),REG(r9b)) + jmp scheme_to_interface define(define_apply_fixed_size, `declare_alignment(2) @@ -663,7 +676,7 @@ define_hook_label(sc_apply_size_$1) asm_sc_apply_generic_$1: OP(mov,q) TW(IMM($1),REG(rdx)) - OP(mov,b) TW(IMM(HEX(14)),REG(al)) + OP(mov,b) TW(IMM(HEX(14)),REG(r9b)) jmp scheme_to_interface') define_apply_fixed_size(1) @@ -683,7 +696,6 @@ define_apply_fixed_size(8) declare_alignment(2) asm_generic_return_rax: - OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs)) OP(pop,q) REG(rcx) OP(and,q) TW(rmask,REG(rcx)) jmp IJMP(REG(rcx)) # Invoke return @@ -739,7 +751,7 @@ asm_generic_$1_fix: asm_generic_$1_fail: OP(push,q) REG(rdx) - OP(mov,b) TW(IMM(HEX($2)),REG(al)) + OP(mov,b) TW(IMM(HEX($2)),REG(r9b)) jmp scheme_to_interface') define(define_unary_predicate, @@ -769,7 +781,7 @@ asm_generic_$1_fix: asm_generic_$1_fail: OP(push,q) REG(rdx) - OP(mov,b) TW(IMM(HEX($2)),REG(al)) + OP(mov,b) TW(IMM(HEX($2)),REG(r9b)) jmp scheme_to_interface') define(define_binary_operation, @@ -810,7 +822,7 @@ asm_generic_$1_fix: asm_generic_$1_fail: OP(push,q) REG(rbx) OP(push,q) REG(rdx) - OP(mov,b) TW(IMM(HEX($2)),REG(al)) + OP(mov,b) TW(IMM(HEX($2)),REG(r9b)) jmp scheme_to_interface') define(define_binary_predicate, @@ -851,7 +863,7 @@ asm_generic_$1_fix: asm_generic_$1_fail: OP(push,q) REG(rbx) OP(push,q) REG(rdx) - OP(mov,b) TW(IMM(HEX($2)),REG(al)) + OP(mov,b) TW(IMM(HEX($2)),REG(r9b)) jmp scheme_to_interface') # Division is hairy. I'm not sure whether this will do the right @@ -933,7 +945,7 @@ asm_generic_divide_flo_by_flo: asm_generic_divide_fail: OP(push,q) REG(rbx) OP(push,q) REG(rdx) - OP(mov,b) TW(IMM(HEX(23)),REG(al)) + OP(mov,b) TW(IMM(HEX(23)),REG(r9b)) jmp scheme_to_interface define_unary_operation(decrement,22,sub,subsd) diff --git a/src/microcode/cmpintmd/x86-64.c b/src/microcode/cmpintmd/x86-64.c index 5c8563572..26b3cd9e0 100644 --- a/src/microcode/cmpintmd/x86-64.c +++ b/src/microcode/cmpintmd/x86-64.c @@ -256,7 +256,7 @@ write_uuo_target (insn_t * target, SCHEME_OBJECT * saddr) } #define BYTES_PER_TRAMPOLINE_ENTRY_PADDING 4 -#define OBJECTS_PER_TRAMPOLINE_ENTRY 3 +#define OBJECTS_PER_TRAMPOLINE_ENTRY 4 #define RSI_TRAMPOLINE_TO_INTERFACE_OFFSET \ ((COMPILER_REGBLOCK_N_FIXED + (2 * COMPILER_HOOK_SIZE)) \ @@ -285,11 +285,12 @@ bool store_trampoline_insns (insn_t * entry, uint8_t code) { (* ((int64_t *) (&entry[0]))) = 8; - (entry[8]) = 0xb0; /* MOVB RAX,imm8 */ - (entry[9]) = code; - (entry[10]) = 0xff; /* JMP r/m64 */ - (entry[11]) = 0xa6; /* disp32(RSI) */ - (* ((uint32_t *) (&entry[12]))) = RSI_TRAMPOLINE_TO_INTERFACE_OFFSET; + (entry[8]) = 0x41; /* MOVB R9,imm8 */ + (entry[9]) = 0xb1; + (entry[10]) = code; + (entry[11]) = 0xff; /* JMP r/m64 */ + (entry[12]) = 0xa6; /* disp32(RSI) */ + (* ((uint32_t *) (&entry[13]))) = RSI_TRAMPOLINE_TO_INTERFACE_OFFSET; return (false); } diff --git a/src/microcode/cmpintmd/x86-64.h b/src/microcode/cmpintmd/x86-64.h index 6189ab156..579832eae 100644 --- a/src/microcode/cmpintmd/x86-64.h +++ b/src/microcode/cmpintmd/x86-64.h @@ -114,11 +114,12 @@ entry1 32 ... -4 -2 entry 0 08 00 00 00 00 00 00 00 - 8 MOV AL,code b0 - 10 JMP n(RSI) ff a6 - 16 + 8 MOVB R9,code 41 b1 + 11 JMP n(RSI) ff a6 + 17 + 24 - Distance from address in rcx to storage: 16. + Distance from address in rcx to storage: 24. */ -- 2.25.1