From 44c8aaccd14c44f371d1d09e3c0f4c980b245ce1 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sat, 29 Dec 2018 16:40:02 +0000 Subject: [PATCH] Simplify hook calls. No need for CALL or the stack to be involved: just load PC-relative address into RBX directly. Should shave off a few bytes of code. WARNING: This changes the amd64 compiled code interface so that new compiled code requires a new microcode and vice versa. --- src/compiler/machines/x86-64/lapgen.scm | 29 ++++++++++++------------ src/compiler/machines/x86-64/rules3.scm | 30 ++++++++++++++----------- src/compiler/machines/x86-64/rules4.scm | 9 ++++---- src/compiler/machines/x86-64/rulfix.scm | 2 +- src/microcode/cmpauxmd/x86-64.m4 | 7 ++++-- 5 files changed, 43 insertions(+), 34 deletions(-) diff --git a/src/compiler/machines/x86-64/lapgen.scm b/src/compiler/machines/x86-64/lapgen.scm index 653721a87..df1e76c6f 100644 --- a/src/compiler/machines/x86-64/lapgen.scm +++ b/src/compiler/machines/x86-64/lapgen.scm @@ -712,19 +712,20 @@ USA. (define-integrable (invoke-hook entry) (LAP (JMP ,entry))) -(define (invoke-hook/call entry) - (let* ((get-pc (generate-label 'GET-PC)) - (hook-context (generate-label 'HOOK-CONTEXT))) - (LAP (CALL (@PCR ,get-pc)) - (LABEL ,get-pc) - ;; ADD r/m64,imm8 48 83 04 24 xx - ;; JMP r/m64 ff 86 yy yy yy yy - ;; Register displacement for JMP is always >=0x80, so can't - ;; fit in signed byte and thus must use 32-bit displacement. - ;; Hence xx = 0x0b = 11. - (ADD Q (@R ,rsp) (& #x0b)) - (JMP ,entry) - (LABEL ,hook-context)))) +;; Invoke a hook that will pop an untagged return address off the stack +;; and jump to it with RET, just like a C subroutine. + +(define-integrable (invoke-hook/subroutine entry) + (LAP (CALL ,entry))) + +;; Invoke a hook that expects a compiled entry address in rbx and will +;; jump to it with JMP. + +(define (invoke-hook/reentry entry) + (let ((label (generate-label 'HOOK-REENTRY))) + (LAP (LEA Q (R ,rbx) (@PCRO ,label 4)) ;Skip format word. + ,@(invoke-hook entry) + (LABEL ,label)))) (define-integrable (invoke-interface code) (LAP (MOV B (R ,r9) (& ,code)) @@ -732,7 +733,7 @@ USA. (define-integrable (invoke-interface/call code) (LAP (MOV B (R ,r9) (& ,code)) - ,@(invoke-hook/call entry:compiler-scheme-to-interface/call))) + ,@(invoke-hook/reentry entry:compiler-scheme-to-interface/call))) (define-syntax define-entries (sc-macro-transformer diff --git a/src/compiler/machines/x86-64/rules3.scm b/src/compiler/machines/x86-64/rules3.scm index 150900e23..787efe41c 100644 --- a/src/compiler/machines/x86-64/rules3.scm +++ b/src/compiler/machines/x86-64/rules3.scm @@ -99,17 +99,17 @@ USA. ,@(invoke-hook entry:compiler-shortcircuit-apply) |# ,@(case frame-size - ((1) (LAP (CALL ,entry:compiler-apply-setup-size-1))) - ((2) (LAP (CALL ,entry:compiler-apply-setup-size-2))) - ((3) (LAP (CALL ,entry:compiler-apply-setup-size-3))) - ((4) (LAP (CALL ,entry:compiler-apply-setup-size-4))) - ((5) (LAP (CALL ,entry:compiler-apply-setup-size-5))) - ((6) (LAP (CALL ,entry:compiler-apply-setup-size-6))) - ((7) (LAP (CALL ,entry:compiler-apply-setup-size-7))) - ((8) (LAP (CALL ,entry:compiler-apply-setup-size-8))) + ((1) (invoke-hook/subroutine entry:compiler-apply-setup-size-1)) + ((2) (invoke-hook/subroutine entry:compiler-apply-setup-size-2)) + ((3) (invoke-hook/subroutine entry:compiler-apply-setup-size-3)) + ((4) (invoke-hook/subroutine entry:compiler-apply-setup-size-4)) + ((5) (invoke-hook/subroutine entry:compiler-apply-setup-size-5)) + ((6) (invoke-hook/subroutine entry:compiler-apply-setup-size-6)) + ((7) (invoke-hook/subroutine entry:compiler-apply-setup-size-7)) + ((8) (invoke-hook/subroutine entry:compiler-apply-setup-size-8)) (else (LAP (MOV Q (R ,rdx) (&U ,frame-size)) - (CALL ,entry:compiler-apply-setup)))) + ,@(invoke-hook/subroutine entry:compiler-apply-setup)))) (JNE (@PCR ,generic)) (JMP (R ,rax)) (LABEL ,generic) @@ -444,7 +444,7 @@ USA. (LAP ,@(make-external-label code-word label)) (let ((gc-label (generate-label))) (LAP (LABEL ,gc-label) - ,@(invoke-hook/call entry) + ,@(invoke-hook/reentry entry) ,@(make-external-label code-word label) ,@(interrupt-check gc-label checks)))))) @@ -457,6 +457,10 @@ USA. (define-rule statement (CONTINUATION-HEADER (? internal-label)) #| + ;; Note: This is wrong -- compiler-interrupt-continuation expects a + ;; compiled return address on the stack, but this will yield compiled + ;; entry addresses. If you uncomment this, prepare to deal with the + ;; consequences. (simple-procedure-header (continuation-code-word internal-label) internal-label entry:compiler-interrupt-continuation) @@ -679,7 +683,7 @@ USA. #| ,@(invoke-interface/call code:compiler-link) |# - ,@(invoke-hook/call entry:compiler-link) + ,@(invoke-hook/reentry entry:compiler-link) ,@(make-external-label (continuation-code-word #f) (generate-label)))) @@ -696,7 +700,7 @@ USA. #| ,@(invoke-interface/call code:compiler-link) |# - ,@(invoke-hook/call entry:compiler-link) + ,@(invoke-hook/reentry entry:compiler-link) ,@(make-external-label (continuation-code-word #f) (generate-label)))) @@ -745,7 +749,7 @@ USA. (@ROI ,rdx ,(* 2 address-units-per-object) ,rax ,address-units-per-object)) ;; Invoke linker - ,@(invoke-hook/call entry:compiler-link) + ,@(invoke-hook/reentry entry:compiler-link) ,@(make-external-label (continuation-code-word false) (generate-label)) ;; Increment counter and loop diff --git a/src/compiler/machines/x86-64/rules4.scm b/src/compiler/machines/x86-64/rules4.scm index 3c20a034b..61a361b47 100644 --- a/src/compiler/machines/x86-64/rules4.scm +++ b/src/compiler/machines/x86-64/rules4.scm @@ -45,9 +45,10 @@ USA. code:compiler-safe-reference-trap code:compiler-reference-trap)) |# - ,@(invoke-hook/call (if safe? - entry:compiler-safe-reference-trap - entry:compiler-reference-trap))))) + ,@(invoke-hook/reentry + (if safe? + entry:compiler-safe-reference-trap + entry:compiler-reference-trap))))) (define-rule statement (INTERPRETER-CALL:CACHE-ASSIGNMENT (? cont) (? extension) (? value)) @@ -63,7 +64,7 @@ USA. #| ,@(invoke-interface/call code:compiler-assignment-trap) |# - ,@(invoke-hook/call entry:compiler-assignment-trap)))) + ,@(invoke-hook/reentry entry:compiler-assignment-trap)))) (define-rule statement (INTERPRETER-CALL:CACHE-UNASSIGNED? (? cont) (? extension)) diff --git a/src/compiler/machines/x86-64/rulfix.scm b/src/compiler/machines/x86-64/rulfix.scm index 0eb803436..9a0a9ad98 100644 --- a/src/compiler/machines/x86-64/rulfix.scm +++ b/src/compiler/machines/x86-64/rulfix.scm @@ -473,7 +473,7 @@ USA. ;; Clearing the map is not necessary because the hook uses ;; only rax and rcx. If the hook were changed, it would be ;; necessary to clear the map first. - ,@(invoke-hook/call entry:compiler-fixnum-shift))))) + ,@(invoke-hook/subroutine entry:compiler-fixnum-shift))))) (define (do-division target source1 source2 result-reg) (prefix-instructions! (load-machine-register! source1 rax)) diff --git a/src/microcode/cmpauxmd/x86-64.m4 b/src/microcode/cmpauxmd/x86-64.m4 index 27e8ed1c9..f98d8426c 100644 --- a/src/microcode/cmpauxmd/x86-64.m4 +++ b/src/microcode/cmpauxmd/x86-64.m4 @@ -426,10 +426,13 @@ define_debugging_label(trampoline_to_interface) OP(mov,q) TW(REG(rcx),REG(rbx)) # argument in rbx jmp scheme_to_interface +# We used to CALL this to get the return address on the stack, but now +# we use RIP-relative addressing to load directly into %rbx -- which +# doesn't ruin the return address branch target prediction stack -- so +# that this is no longer needed. define_hook_label(scheme_to_interface_call) define_debugging_label(scheme_to_interface_call) - OP(pop,q) REG(rbx) # arg1 = ret. add - OP(add,q) TW(IMM(4),REG(rbx)) # Skip format info + nop # jmp scheme_to_interface # scheme_to_interface passes control from compiled Scheme code to a -- 2.25.1