From 987d9624b221bac31f6413ff3a6fe17b5a90a2fb Mon Sep 17 00:00:00 2001 From: Taylor R Campbell <campbell@mumble.net> Date: Mon, 31 Dec 2018 21:08:22 +0000 Subject: [PATCH] Make entries point to _after_ the PC offset. This saves a jump in closure headers, and makes non-closure entries have a nice PC offset of 0 rather than an awkward PC offset of 8. However, this causes all indirect calls to have an additional offset of -8 in the addressing mode -- not clear yet how much this hurts. WARNING: This changes the amd64 compiled code interface so that new compiled code requires a new microcode and vice versa. Further, you must set compiler:cross-compiling? to #t to compile the system, because compiled code block offsets are now in a different place relative to compiled entries, so the native fasdumper of an old microcode can't handle compiled entries produced by a new compiler. --- src/compiler/machines/x86-64/lapgen.scm | 14 +-- src/compiler/machines/x86-64/machin.scm | 18 +-- src/compiler/machines/x86-64/rules1.scm | 22 ++-- src/compiler/machines/x86-64/rules3.scm | 149 +++++++++++------------- src/microcode/cmpauxmd/x86-64.m4 | 18 +-- src/microcode/cmpintmd/x86-64.c | 66 +++++------ src/microcode/cmpintmd/x86-64.h | 46 ++++---- 7 files changed, 163 insertions(+), 170 deletions(-) diff --git a/src/compiler/machines/x86-64/lapgen.scm b/src/compiler/machines/x86-64/lapgen.scm index df1e76c6f..44241a11a 100644 --- a/src/compiler/machines/x86-64/lapgen.scm +++ b/src/compiler/machines/x86-64/lapgen.scm @@ -114,8 +114,8 @@ USA. (set! *external-labels* (cons label *external-labels*)) (LAP (WORD U ,code) (BLOCK-OFFSET ,label) - (LABEL ,label) - (QUAD U 8))) + (QUAD U 0) + (LABEL ,label))) (define-integrable (make-code-word min max) (+ (* #x100 min) max)) @@ -218,11 +218,11 @@ USA. (move-to-alias-register! source (register-type target) target) (LAP)) -(define (load-pc-relative target label-expr offset) - (LAP (MOV Q ,target (@PCRO ,label-expr ,offset)))) +(define (load-pc-relative target label-expr) + (LAP (MOV Q ,target (@PCR ,label-expr)))) -(define (load-pc-relative-address target label-expr offset) - (LAP (LEA Q ,target (@PCRO ,label-expr ,offset)))) +(define (load-pc-relative-address target label-expr) + (LAP (LEA Q ,target (@PCR ,label-expr)))) (define (compare/register*register reg1 reg2) (cond ((register-alias reg1 'GENERAL) @@ -723,7 +723,7 @@ USA. (define (invoke-hook/reentry entry) (let ((label (generate-label 'HOOK-REENTRY))) - (LAP (LEA Q (R ,rbx) (@PCRO ,label 4)) ;Skip format word. + (LAP (LEA Q (R ,rbx) (@PCRO ,label 12)) ;Skip format word and PC offset. ,@(invoke-hook entry) (LABEL ,label)))) diff --git a/src/compiler/machines/x86-64/machin.scm b/src/compiler/machines/x86-64/machin.scm index 35e9bfbfd..96a1cfe92 100644 --- a/src/compiler/machines/x86-64/machin.scm +++ b/src/compiler/machines/x86-64/machin.scm @@ -88,14 +88,15 @@ USA. (define-integrable address-units-per-closure-manifest address-units-per-object) (define-integrable address-units-per-entry-format-code 4) (define-integrable address-units-per-closure-entry-count 4) -(define-integrable address-units-per-closure-padding 4) +(define-integrable address-units-per-closure-padding -4) -;;; Just a 64-bit offset and four bytes of padding. -(define-integrable address-units-per-closure-entry-instructions 12) +(define-integrable address-units-per-closure-pc-offset 8) +(define-integrable address-units-per-closure-entry-padding 4) (define-integrable address-units-per-closure-entry (+ address-units-per-entry-format-code - address-units-per-closure-entry-instructions)) + address-units-per-closure-pc-offset + address-units-per-closure-entry-padding)) ;;; Note: ;;; @@ -119,7 +120,7 @@ USA. (define (closure-first-offset nentries entry) (if (zero? nentries) 1 - (* (- nentries entry) closure-entry-size))) + (* (- nentries entry 1) closure-entry-size))) ;;; Given the number of entry points in a closure, return the distance ;;; in objects from the address of the manifest closure to the address @@ -128,9 +129,10 @@ USA. (define (closure-object-first-offset nentries) (if (zero? nentries) 1 ;One vector manifest. - ;; One object for the closure manifest, and one object for the - ;; leading entry count and the trailing padding. - (+ 2 (* nentries closure-entry-size)))) + ;; One object for the closure manifest, half an object for the + ;; leading entry count, and minus half an object for the trailing + ;; non-padding. + (+ 1 (* nentries closure-entry-size)))) ;;; Given the number of entries in a closure, and the indices of two ;;; entries, return the number of bytes separating the two entries. diff --git a/src/compiler/machines/x86-64/rules1.scm b/src/compiler/machines/x86-64/rules1.scm index 68614917f..899b89f1b 100644 --- a/src/compiler/machines/x86-64/rules1.scm +++ b/src/compiler/machines/x86-64/rules1.scm @@ -177,15 +177,14 @@ USA. (ASSIGN (REGISTER (? target)) (ENTRY:PROCEDURE (? label))) (load-pc-relative-address (target-register-reference target) - (rtl-procedure/external-label (label->object label)) - 0)) + (rtl-procedure/external-label (label->object label)))) (define-rule statement (ASSIGN (REGISTER (? target)) (ENTRY:CONTINUATION (? label))) (let* ((target (target-register-reference target)) (get-pc (generate-label 'GET-PC))) (LAP (CALL (@PCR ,get-pc)) - (JMP (@PCRO ,label 8)) + (JMP (@PCR ,label)) (LABEL ,get-pc) (POP Q ,target)))) @@ -198,8 +197,7 @@ USA. (load-pc-relative-address/typed (target-register-reference target) type (rtl-procedure/external-label - (label->object label)) - 0)) + (label->object label)))) (define-rule statement ;; This is an intermediate rule -- not intended to produce code. @@ -210,7 +208,7 @@ USA. (let* ((target (target-register-reference target)) (pushed (generate-label 'PUSHED))) (LAP (CALL (@PCR ,pushed)) - (JMP (@PCRO ,label 8)) + (JMP (@PCR ,label)) (LABEL ,pushed) (POP Q ,target) ,@(affix-type target type)))) @@ -222,21 +220,19 @@ USA. (assert (= type type-code:compiled-return)) (let ((pushed (generate-label 'PUSHED))) (LAP (CALL (@PCR ,pushed)) - (JMP (@PCRO ,label 8)) + (JMP (@PCR ,label)) (LABEL ,pushed) ,@(affix-type (INST-EA (@R 4)) type)))) (define-rule statement (ASSIGN (REGISTER (? target)) (VARIABLE-CACHE (? name))) (load-pc-relative (target-register-reference target) - (free-reference-label name) - 0)) + (free-reference-label name))) (define-rule statement (ASSIGN (REGISTER (? target)) (ASSIGNMENT-CACHE (? name))) (load-pc-relative (target-register-reference target) - (free-assignment-label name) - 0)) + (free-assignment-label name))) (define-rule statement (ASSIGN (REGISTER (? target)) (OBJECT->DATUM (CONSTANT (? constant)))) @@ -432,8 +428,8 @@ USA. (target (target-register-reference target))) (LAP (LEA Q ,target ,source)))) -(define (load-pc-relative-address/typed target type label offset) - (LAP (LEA Q ,target (@PCRO ,label ,offset)) +(define (load-pc-relative-address/typed target type label) + (LAP (LEA Q ,target (@PCR ,label)) ,@(affix-type target type)) #| ;++ This is pretty horrid, especially since it happens for every diff --git a/src/compiler/machines/x86-64/rules3.scm b/src/compiler/machines/x86-64/rules3.scm index 3193f00e6..c240ae356 100644 --- a/src/compiler/machines/x86-64/rules3.scm +++ b/src/compiler/machines/x86-64/rules3.scm @@ -62,67 +62,61 @@ USA. (INVOCATION:APPLY (? frame-size) (? continuation)) continuation (expect-no-exit-interrupt-checks) - (let ((generic (generate-label 'GENERIC))) - (LAP ,@(clear-map!) - (POP Q (R ,rbx)) - #| - (MOV Q (R ,rdx) (&U ,frame-size)) - ,@(invoke-interface code:compiler-apply) - |# - #| - ,@(case frame-size - ((1) (invoke-hook entry:compiler-shortcircuit-apply-size-1)) - ((2) (invoke-hook entry:compiler-shortcircuit-apply-size-2)) - ((3) (invoke-hook entry:compiler-shortcircuit-apply-size-3)) - ((4) (invoke-hook entry:compiler-shortcircuit-apply-size-4)) - ((5) (invoke-hook entry:compiler-shortcircuit-apply-size-5)) - ((6) (invoke-hook entry:compiler-shortcircuit-apply-size-6)) - ((7) (invoke-hook entry:compiler-shortcircuit-apply-size-7)) - ((8) (invoke-hook entry:compiler-shortcircuit-apply-size-8)) - (else - (LAP (MOV Q (R ,rdx) (&U ,frame-size)) - ,@(invoke-hook entry:compiler-shortcircuit-apply)))) - |# - #| - (POP Q (R ,rcx)) ;Pop tagged entry into RCX. - (MOV Q (R ,rax) (R ,rcx)) ;Copy tagged entry into RAX. - (SHR Q (R ,rax) (&U ,scheme-datum-width)) ;Select tag in RAX. - (AND Q (R ,rcx) (R ,regnum:datum-mask)) ;Select datum in RCX. - (CMP B (R ,rax) (&U ,(ucode-type COMPILED-ENTRY))) ;Check tag. - (JNE (@PCR ,generic)) ;Bail if not compiled entry. - (CMP B (@RO ,rcx -4) (&U ,frame-size)) ;Check arity. - (JNE (@PCR ,generic)) ;Bail if not exact arity match. - (MOV Q (R ,rax) (@R ,rcx)) ;Load offset into RAX. - (ADD Q (R ,rax) (R ,rcx)) ;Add offset to entry address in RAX. - (JMP (R ,rax)) - (LABEL ,generic) - ,@(invoke-hook entry:compiler-shortcircuit-apply) - |# - ,@(case frame-size - ((1) (invoke-hook/subroutine entry:compiler-apply-setup-size-1)) - ((2) (invoke-hook/subroutine entry:compiler-apply-setup-size-2)) - ((3) (invoke-hook/subroutine entry:compiler-apply-setup-size-3)) - ((4) (invoke-hook/subroutine entry:compiler-apply-setup-size-4)) - ((5) (invoke-hook/subroutine entry:compiler-apply-setup-size-5)) - ((6) (invoke-hook/subroutine entry:compiler-apply-setup-size-6)) - ((7) (invoke-hook/subroutine entry:compiler-apply-setup-size-7)) - ((8) (invoke-hook/subroutine entry:compiler-apply-setup-size-8)) - (else - (LAP (MOV Q (R ,rdx) (&U ,frame-size)) - ,@(invoke-hook/subroutine entry:compiler-apply-setup)))) - (JMP (R ,rax))))) + (LAP ,@(clear-map!) + (POP Q (R ,rbx)) + #| + (MOV Q (R ,rdx) (&U ,frame-size)) + ,@(invoke-interface code:compiler-apply) + |# + #| + ,@(case frame-size + ((1) (invoke-hook entry:compiler-shortcircuit-apply-size-1)) + ((2) (invoke-hook entry:compiler-shortcircuit-apply-size-2)) + ((3) (invoke-hook entry:compiler-shortcircuit-apply-size-3)) + ((4) (invoke-hook entry:compiler-shortcircuit-apply-size-4)) + ((5) (invoke-hook entry:compiler-shortcircuit-apply-size-5)) + ((6) (invoke-hook entry:compiler-shortcircuit-apply-size-6)) + ((7) (invoke-hook entry:compiler-shortcircuit-apply-size-7)) + ((8) (invoke-hook entry:compiler-shortcircuit-apply-size-8)) + (else + (LAP (MOV Q (R ,rdx) (&U ,frame-size)) + ,@(invoke-hook entry:compiler-shortcircuit-apply)))) + |# + #| + (POP Q (R ,rcx)) ;Pop tagged entry into RCX. + (MOV Q (R ,rax) (R ,rcx)) ;Copy tagged entry into RAX. + (SHR Q (R ,rax) (&U ,scheme-datum-width)) ;Select tag in RAX. + (AND Q (R ,rcx) (R ,regnum:datum-mask)) ;Select datum in RCX. + (CMP B (R ,rax) (&U ,(ucode-type COMPILED-ENTRY))) ;Check tag. + (JNE (@PCR ,generic)) ;Bail if not compiled entry. + (CMP B (@RO ,rcx -4) (&U ,frame-size)) ;Check arity. + (JNE (@PCR ,generic)) ;Bail if not exact arity match. + (MOV Q (R ,rax) (@RO ,rcx -8)) ;Load offset into RAX. + (ADD Q (R ,rax) (R ,rcx)) ;Add offset to entry address in RAX. + (JMP (R ,rax)) + (LABEL ,generic) + ,@(invoke-hook entry:compiler-shortcircuit-apply) + |# + ,@(case frame-size + ((1) (invoke-hook/subroutine entry:compiler-apply-setup-size-1)) + ((2) (invoke-hook/subroutine entry:compiler-apply-setup-size-2)) + ((3) (invoke-hook/subroutine entry:compiler-apply-setup-size-3)) + ((4) (invoke-hook/subroutine entry:compiler-apply-setup-size-4)) + ((5) (invoke-hook/subroutine entry:compiler-apply-setup-size-5)) + ((6) (invoke-hook/subroutine entry:compiler-apply-setup-size-6)) + ((7) (invoke-hook/subroutine entry:compiler-apply-setup-size-7)) + ((8) (invoke-hook/subroutine entry:compiler-apply-setup-size-8)) + (else + (LAP (MOV Q (R ,rdx) (&U ,frame-size)) + ,@(invoke-hook/subroutine entry:compiler-apply-setup)))) + (JMP (R ,rax)))) (define-rule statement (INVOCATION:JUMP (? frame-size) (? continuation) (? label)) frame-size continuation (expect-no-exit-interrupt-checks) (LAP ,@(clear-map!) - ;; Every label for code we can jump to starts with a 64-bit - ;; offset to the actual code, always equal to 8. We could - ;; invent the bookkeeping to map the external label to the - ;; actual code label, but that's more work than I want to do - ;; right now. - (JMP (@PCRO ,label 8)))) + (JMP (@PCR ,label)))) (define-rule statement (INVOCATION:COMPUTED-JUMP (? frame-size) (? continuation)) @@ -132,7 +126,7 @@ USA. (LAP ,@(clear-map!) (POP Q (R ,rcx)) (AND Q (R ,rcx) (R ,regnum:datum-mask)) ;clear type code - (MOV Q (R ,rax) (@R ,rcx)) ;rax := PC offset + (MOV Q (R ,rax) (@RO ,rcx -8)) ;rax := PC offset (ADD Q (R ,rax) (R ,rcx)) ;rax := PC (JMP (R ,rax)))) @@ -180,8 +174,7 @@ USA. (set-address (begin (require-register! rdx) (load-pc-relative-address (INST-EA (R ,rdx)) - *block-label* - 0)))) + *block-label*)))) (delete-dead-registers!) (LAP ,@set-extension ,@set-address @@ -506,10 +499,11 @@ USA. (temp (temporary-register-reference)) (data-offset address-units-per-closure-manifest) (format-offset (+ data-offset address-units-per-closure-entry-count)) - (pc-offset (+ format-offset address-units-per-entry-format-code)) + (offset-offset (+ format-offset address-units-per-entry-format-code)) + (entry-offset (+ offset-offset address-units-per-closure-pc-offset)) (slots-offset - (+ pc-offset - address-units-per-closure-entry-instructions + (+ entry-offset + address-units-per-closure-entry-padding address-units-per-closure-padding)) (free-offset (+ slots-offset (* (+ 1 size) address-units-per-object)))) @@ -519,7 +513,7 @@ USA. (MOV L (@RO ,regnum:free-pointer ,data-offset) (&U 1)) ,@(generate-closure-entry procedure-label min max format-offset temp) ;; Load the address of the entry instruction into TARGET. - (LEA Q ,target (@RO ,regnum:free-pointer ,pc-offset)) + (LEA Q ,target (@RO ,regnum:free-pointer ,entry-offset)) ;; Bump FREE. ,@(with-signed-immediate-operand free-offset (lambda (addend) @@ -545,8 +539,10 @@ USA. (let* ((data-offset address-units-per-closure-manifest) (first-format-offset (+ data-offset address-units-per-closure-entry-count)) - (first-pc-offset + (first-offset-offset (+ first-format-offset address-units-per-entry-format-code)) + (first-entry-offset + (+ first-offset-offset address-units-per-closure-pc-offset)) (free-offset (+ first-format-offset (* nentries address-units-per-closure-entry) @@ -555,7 +551,7 @@ USA. (MOV Q (@R ,regnum:free-pointer) ,temp) (MOV L (@RO ,regnum:free-pointer ,data-offset) (&U ,nentries)) ,@(generate-entries entries first-format-offset) - (LEA Q ,target (@RO ,regnum:free-pointer ,first-pc-offset)) + (LEA Q ,target (@RO ,regnum:free-pointer ,first-entry-offset)) ,@(with-signed-immediate-operand free-offset (lambda (addend) (LAP (ADD Q (R ,regnum:free-pointer) ,addend)))) @@ -567,17 +563,16 @@ USA. (define (generate-closure-entry label min max offset temp) (let* ((procedure-label (rtl-procedure/external-label (label->object label))) - (addr-offset (+ offset address-units-per-entry-format-code)) - (padding-offset (+ addr-offset 8))) - padding-offset + (offset-offset (+ offset address-units-per-entry-format-code)) + (entry-offset (+ offset-offset address-units-per-closure-pc-offset))) (LAP (MOV L (@RO ,regnum:free-pointer ,offset) - (&U ,(make-closure-code-longword min max addr-offset))) - ;; Set temp := procedure-label + 8 - addr-offset. - (LEA Q ,temp (@PCR (- (+ ,procedure-label 8) ,addr-offset))) - ;; Set temp := procedure-label + 8 - addr-offset - free. + (&U ,(make-closure-code-longword min max entry-offset))) + ;; Set temp := procedure-label - entry-offset. + (LEA Q ,temp (@PCR (- ,procedure-label ,entry-offset))) + ;; Set temp := procedure-label - entry-offset - free. (SUB Q ,temp (R ,regnum:free-pointer)) - ;; Store temp = procedure-label + 8 - (free + addr-offset). - (MOV Q (@RO ,regnum:free-pointer ,addr-offset) ,temp)))) + ;; Store temp = procedure-label - (free + entry-offset). + (MOV Q (@RO ,regnum:free-pointer ,offset-offset) ,temp)))) (define (generate/closure-header internal-label nentries) (let* ((rtl-proc (label->object internal-label)) @@ -591,13 +586,7 @@ USA. (MOV Q (R ,rax) (&U ,(make-non-pointer-literal type 0))) (OR Q (R ,rcx) (R ,rax)) (PUSH Q (R ,rcx)) - ;; Jump past a bogus faux offset. We need this because - ;; INVOCATION:JUMP jumps to the label + 8, and at the moment - ;; I haven't found a good way to make it skip the +8 part - ;; for closures. - (JMP (@PCRO ,internal-label 8)) - (LABEL ,internal-label) - (QUAD U 8))) + (LABEL ,internal-label))) (cond ((zero? nentries) (LAP (EQUATE ,external-label ,internal-label) ,@(simple-procedure-header diff --git a/src/microcode/cmpauxmd/x86-64.m4 b/src/microcode/cmpauxmd/x86-64.m4 index 38dbabaf6..a5eb18e48 100644 --- a/src/microcode/cmpauxmd/x86-64.m4 +++ b/src/microcode/cmpauxmd/x86-64.m4 @@ -422,7 +422,7 @@ define_c_label(C_to_interface) define_hook_label(trampoline_to_interface) define_debugging_label(trampoline_to_interface) - OP(add,q) TW(IMM(24),REG(rcx)) # trampoline storage + OP(add,q) TW(IMM(16),REG(rcx)) # trampoline storage OP(mov,q) TW(REG(rcx),REG(rbx)) # argument in rbx jmp scheme_to_interface @@ -603,11 +603,11 @@ define_hook_label(apply_setup) # We now have a compiled entry, so it is safe to compute the # PC. Do that first, because it sets flags, which are used by # the caller. - OP(mov,q) TW(IND(REG(rcx)),REG(rax)) # rax := PC offset + OP(mov,q) TW(BOF(-8,REG(rcx)),REG(rax)) # rax := PC offset OP(add,q) TW(REG(rcx),REG(rax)) # rax := PC # Now check the frame size. The caller will test the flags # again for another conditional jump. - OP(movs,bq,x) TW(BOF(-4,REG(rcx)),REG(r9)) # Extract frame size + OP(movs,bq,x) TW(BOF(-12,REG(rcx)),REG(r9)) # Extract frame size OP(cmp,q) TW(REG(r9),REG(rdx)) # Compare to nargs+1 jne asm_apply_setup_fail ret @@ -627,9 +627,9 @@ define_hook_label(apply_setup_size_$1) OP(and,q) TW(rmask,REG(rcx)) # Select datum OP(cmp,b) TW(IMM(TC_COMPILED_ENTRY),REG(al)) jne asm_apply_setup_size_$1_fail - OP(mov,q) TW(IND(REG(rcx)),REG(rax)) # rax := PC offset + OP(mov,q) TW(BOF(-8,REG(rcx)),REG(rax)) # rax := PC offset OP(add,q) TW(REG(rcx),REG(rax)) # rax := PC - OP(cmp,b) TW(IMM($1),BOF(-4,REG(rcx))) # Compare frame size + OP(cmp,b) TW(IMM($1),BOF(-12,REG(rcx))) # Compare frame size jne asm_apply_setup_size_$1_fail # to nargs+1 ret @@ -655,10 +655,10 @@ define_hook_label(sc_apply) OP(and,q) TW(rmask,REG(rcx)) # Select datum OP(cmp,b) TW(IMM(TC_COMPILED_ENTRY),REG(al)) jne asm_sc_apply_generic - OP(movs,bq,x) TW(BOF(-4,REG(rcx)),REG(rax)) # Extract frame size + OP(movs,bq,x) TW(BOF(-12,REG(rcx)),REG(rax)) # Extract frame size OP(cmp,q) TW(REG(rax),REG(rdx)) # Compare to nargs+1 jne asm_sc_apply_generic - OP(mov,q) TW(IND(REG(rcx)),REG(rax)) # rax := PC offset + OP(mov,q) TW(BOF(-8,REG(rcx)),REG(rax)) # rax := PC offset OP(add,q) TW(REG(rcx),REG(rax)) # rax := PC jmp IJMP(REG(rax)) # Invoke entry @@ -675,9 +675,9 @@ define_hook_label(sc_apply_size_$1) OP(and,q) TW(rmask,REG(rcx)) # Select datum OP(cmp,b) TW(IMM(TC_COMPILED_ENTRY),REG(al)) jne asm_sc_apply_generic_$1 - OP(cmp,b) TW(IMM($1),BOF(-4,REG(rcx))) # Compare frame size + OP(cmp,b) TW(IMM($1),BOF(-12,REG(rcx))) # Compare frame size jne asm_sc_apply_generic_$1 # to nargs+1 - OP(mov,q) TW(IND(REG(rcx)),REG(rax)) # rax := PC offset + OP(mov,q) TW(BOF(-8,REG(rcx)),REG(rax)) # rax := PC offset OP(add,q) TW(REG(rcx),REG(rax)) # rax := PC jmp IJMP(REG(rax)) # Invoke entry diff --git a/src/microcode/cmpintmd/x86-64.c b/src/microcode/cmpintmd/x86-64.c index 26b3cd9e0..195d75d09 100644 --- a/src/microcode/cmpintmd/x86-64.c +++ b/src/microcode/cmpintmd/x86-64.c @@ -37,19 +37,19 @@ extern void * newspace_to_tospace (void *); bool read_cc_entry_type (cc_entry_type_t * cet, insn_t * address) { - return (decode_old_style_format_word (cet, (((uint16_t *) address) [-2]))); + return (decode_old_style_format_word (cet, (((uint16_t *) address) [-6]))); } bool write_cc_entry_type (cc_entry_type_t * cet, insn_t * address) { - return (encode_old_style_format_word (cet, ((uint16_t *) address) - 2)); + return (encode_old_style_format_word (cet, (((uint16_t *) address) - 6))); } bool read_cc_entry_offset (cc_entry_offset_t * ceo, insn_t * address) { - uint16_t n = (((uint16_t *) address) [-1]); + uint16_t n = (((uint16_t *) address) [-5]); (ceo->offset) = (n >> 1); (ceo->continued_p) = ((n & 1) != 0); return (false); @@ -60,7 +60,7 @@ write_cc_entry_offset (cc_entry_offset_t * ceo, insn_t * address) { if (! ((ceo->offset) < 0x4000)) return (true); - (((uint16_t *) address) [-1]) + (((uint16_t *) address) [-5]) = (((ceo->offset) << 1) | ((ceo->continued_p) ? 1 : 0)); return (false); } @@ -69,11 +69,11 @@ insn_t * cc_return_address_to_entry_address (insn_t * pc) { if ((pc[0]) == 0xeb) /* JMP rel8 */ - return ((pc + 2) + (* ((int8_t *) &pc[1])) - 8); + return ((pc + 2) + (* ((int8_t *) &pc[1]))); else if ((pc[0]) == 0xe9) /* JMP rel32 */ - return ((pc + 5) + (* ((int32_t *) &pc[1])) - 8); + return ((pc + 5) + (* ((int32_t *) &pc[1]))); else - return (pc - 8); + return (pc); } /* Compiled closures */ @@ -110,7 +110,7 @@ read_compiled_closure_target (insn_t * start, reloc_ref_t * ref) /* If we're relocating, find where base was in the oldspace. */ if (ref) base += (ref->old_addr - ref->new_addr); - return (base + (* ((int64_t *) addr)) - 8); + return (base + (((int64_t *) addr)[-1])); } /* write_compiled_closure_target(target, start) @@ -124,8 +124,8 @@ void write_compiled_closure_target (insn_t * target, insn_t * start) { insn_t * addr = (start + CC_ENTRY_HEADER_SIZE); - (* ((int64_t *) addr)) = - (target - ((insn_t *) (tospace_to_newspace (addr))) + 8); + (((int64_t *) addr)[-1]) = + (target - ((insn_t *) (tospace_to_newspace (addr)))); } unsigned long @@ -152,20 +152,20 @@ compiled_closure_entry (insn_t * start) insn_t * compiled_closure_next (insn_t * start) { - return (start + CC_ENTRY_HEADER_SIZE + 12); + return (start + CC_ENTRY_HEADER_SIZE + 4); } SCHEME_OBJECT * skip_compiled_closure_padding (insn_t * start) { - /* The padding is the same size as the entry header (format word). */ - return ((SCHEME_OBJECT *) (start + CC_ENTRY_HEADER_SIZE)); + /* The last entry is _not_ padded, so undo the padding skip. */ + return ((SCHEME_OBJECT *) (start - 4)); } SCHEME_OBJECT compiled_closure_entry_to_target (insn_t * entry) { - return (MAKE_CC_ENTRY (entry + (* ((int64_t *) entry)) - 8)); + return (MAKE_CC_ENTRY (entry + (((int64_t *) entry)[-1]))); } /* Execution caches (UUO links) @@ -224,10 +224,9 @@ write_uuo_target (insn_t * target, SCHEME_OBJECT * saddr) But if the target is a compiled closure pointing into a block somewhere else, the block may not have been relocated yet and so we don't know where the PC will be in the newspace. */ - if ((* ((int64_t *) (newspace_to_tospace (target)))) == 8) + if ((((int64_t *) (newspace_to_tospace (target)))[-1]) == 0) { - insn_t * pc = (target + 8); - ptrdiff_t jmprel32_offset = (pc - (&addr[15])); + ptrdiff_t jmprel32_offset = (target - (&addr[15])); if ((INT32_MIN <= jmprel32_offset) && (jmprel32_offset <= INT32_MAX)) { (addr[10]) = 0xe9; /* JMP rel32 */ @@ -237,21 +236,22 @@ write_uuo_target (insn_t * target, SCHEME_OBJECT * saddr) { (addr[10]) = 0x48; /* MOV RAX,imm64 */ (addr[11]) = 0xb8; - (* ((insn_t **) (&addr[12]))) = (target + 8); + (* ((insn_t **) (&addr[12]))) = target; (addr[20]) = 0xff; /* JMP RAX */ (addr[21]) = 0xe0; } } else { - (addr[10]) = 0x48; /* MOV RAX,(RCX) */ + (addr[10]) = 0x48; /* MOV RAX,-8(RCX) */ (addr[11]) = 0x8b; - (addr[12]) = 0x01; - (addr[13]) = 0x48; /* ADD RAX,RCX */ - (addr[14]) = 0x01; - (addr[15]) = 0xc8; - (addr[16]) = 0xff; /* JMP RAX */ - (addr[17]) = 0xe0; + (addr[12]) = 0x41; + (addr[13]) = 0xf8; + (addr[14]) = 0x48; /* ADD RAX,RCX */ + (addr[15]) = 0x01; + (addr[16]) = 0xc8; + (addr[17]) = 0xff; /* JMP RAX */ + (addr[18]) = 0xe0; } } @@ -278,19 +278,19 @@ trampoline_entry_addr (SCHEME_OBJECT * block, unsigned long index) insn_t * trampoline_return_addr (SCHEME_OBJECT * block, unsigned long index) { - return ((trampoline_entry_addr (block, index)) + 8); + return (trampoline_entry_addr (block, index)); } bool store_trampoline_insns (insn_t * entry, uint8_t code) { - (* ((int64_t *) (&entry[0]))) = 8; - (entry[8]) = 0x41; /* MOVB R9,imm8 */ - (entry[9]) = 0xb1; - (entry[10]) = code; - (entry[11]) = 0xff; /* JMP r/m64 */ - (entry[12]) = 0xa6; /* disp32(RSI) */ - (* ((uint32_t *) (&entry[13]))) = RSI_TRAMPOLINE_TO_INTERFACE_OFFSET; + (((int64_t *) entry)[-1]) = 0; + (entry[0]) = 0x41; /* MOVB R9,imm8 */ + (entry[1]) = 0xb1; + (entry[2]) = code; + (entry[3]) = 0xff; /* JMP r/m64 */ + (entry[4]) = 0xa6; /* disp32(RSI) */ + (* ((uint32_t *) (&entry[5]))) = RSI_TRAMPOLINE_TO_INTERFACE_OFFSET; return (false); } diff --git a/src/microcode/cmpintmd/x86-64.h b/src/microcode/cmpintmd/x86-64.h index 579832eae..1d775e783 100644 --- a/src/microcode/cmpintmd/x86-64.h +++ b/src/microcode/cmpintmd/x86-64.h @@ -83,10 +83,10 @@ entry 8 symbol 2 zero 7 0x1A entry 8 MOV RCX,imm64 48 b9 <addr64> ; entry address - 18 MOV RAX,(RCX) 48 8b 01 - 21 ADD RAX,RCX 48 01 c8 - 24 JMP RAX ff e0 - 26 <padding> + 18 MOV RAX,-8(RCX) 48 8b 41 f8 + 22 ADD RAX,RCX 48 01 c8 + 25 JMP RAX ff e0 + 27 <padding> 32 <next cache> @@ -99,27 +99,31 @@ nicely. 8 <entry count> 12 <type/arity info> \__ format word 14 <gc offset> / -entry0 16 <offset> - 24 <padding> + 16 <pc offset> +entry0 24 <padding> 28 <type/arity info> 30 <gc offset> -entry1 32 ... - ... - 16 + 16*n <variables> + 32 <pc offset> +entry1 40 <padding> + 44 <type/arity info> + 46 <gc offset> + 48 <pc offset> +entry2 ... + 8 + 16*n <variables> - Trampoline encoding: - -8 <padding> - -4 <type/arity info> - -2 <gc offset> -entry 0 <offset> 08 00 00 00 00 00 00 00 - 8 MOVB R9,code 41 b1 <code8> - 11 JMP n(RSI) ff a6 <n32> - 17 <padding> - 24 <trampoline dependent storage> + -16 <padding> + -12 <type/arity info> + -10 <gc offset> + -8 <offset> 08 00 00 00 00 00 00 00 +entry 0 MOVB R9,code 41 b1 <code8> + 3 JMP n(RSI) ff a6 <n32> + 9 <padding> + 16 <trampoline dependent storage> - Distance from address in rcx to storage: 24. + Distance from address in rcx to storage: 16. */ @@ -143,9 +147,11 @@ typedef uint8_t insn_t; /* Number of insn_t units preceding entry address in which header (type and offset info) is stored. */ -#define CC_ENTRY_HEADER_SIZE (CC_ENTRY_TYPE_SIZE + CC_ENTRY_OFFSET_SIZE) +#define CC_ENTRY_HEADER_SIZE \ + (CC_ENTRY_TYPE_SIZE + CC_ENTRY_OFFSET_SIZE + CC_ENTRY_PC_OFFSET_SIZE) #define CC_ENTRY_TYPE_SIZE 2 #define CC_ENTRY_OFFSET_SIZE 2 +#define CC_ENTRY_PC_OFFSET_SIZE 8 /* Number of insn_t units preceding entry header in which GC trap instructions are stored. This is an approximation: it matches only @@ -157,7 +163,7 @@ typedef uint8_t insn_t; #define CC_ENTRY_GC_TRAP_SIZE 6 #define CC_ENTRY_ADDRESS_PTR(e) (e) -#define CC_ENTRY_ADDRESS_PC(e) ((e) + (* ((const int64_t *) (e)))) +#define CC_ENTRY_ADDRESS_PC(e) ((e) + (((const int64_t *) (e))[-1])) #define CC_RETURN_ADDRESS_PTR(r) 0 #define CC_RETURN_ADDRESS_PC(r) (r) -- 2.25.1