From 1f62f5c1dc6575221159605ab72e44adcafdfe7b Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 13 Nov 2009 14:52:06 -0500 Subject: [PATCH] Make x86 and x86-64 assembly hooks for out-of-line FIXNUM-LSH. I believe this now covers all cases, and is more reasonable than generating for each FIXNUM-LSH invocation the nearly two dozen instructions composing the assembly hooks. Fixes Savannah bug 27384. --- src/compiler/machines/i386/lapgen.scm | 3 +- src/compiler/machines/i386/rulfix.scm | 53 +++++-------------------- src/compiler/machines/x86-64/lapgen.scm | 3 +- src/compiler/machines/x86-64/rulfix.scm | 52 +++++------------------- src/microcode/cmpauxmd/i386.m4 | 45 +++++++++++++++++++++ src/microcode/cmpauxmd/x86-64.m4 | 47 ++++++++++++++++++++++ src/microcode/cmpintmd/i386.c | 1 + src/microcode/cmpintmd/i386.h | 1 + src/microcode/cmpintmd/x86-64.c | 2 + src/microcode/cmpintmd/x86-64.h | 1 + 10 files changed, 120 insertions(+), 88 deletions(-) diff --git a/src/compiler/machines/i386/lapgen.scm b/src/compiler/machines/i386/lapgen.scm index 988684439..125440a4f 100644 --- a/src/compiler/machines/i386/lapgen.scm +++ b/src/compiler/machines/i386/lapgen.scm @@ -672,7 +672,8 @@ USA. shortcircuit-apply-size-7 shortcircuit-apply-size-8 interrupt-continuation-2 - conditionally-serialize) + conditionally-serialize + fixnum-shift) ;; Operation tables diff --git a/src/compiler/machines/i386/rulfix.scm b/src/compiler/machines/i386/rulfix.scm index 550d04c83..39c472577 100644 --- a/src/compiler/machines/i386/rulfix.scm +++ b/src/compiler/machines/i386/rulfix.scm @@ -480,50 +480,17 @@ USA. (SAR W ,target (& ,scheme-type-width)) (IMUL W ,target ,temp)))))))) +;;; This calls an out-of-line assembly hook because it requires a lot +;;; of hair to deal with shift counts that exceed the datum width, and +;;; with negative arguments. + (define-arithmetic-method 'FIXNUM-LSH fixnum-methods/2-args - (let ((operate - (lambda (target source2) - ;; SOURCE2 is guaranteed not to be ECX because of the - ;; require-register! used below. - ;; TARGET can be ECX only if the rule has machine register - ;; ECX as the target, unlikely, but it must be handled! - (let ((with-target - (lambda (target) - (let ((jlabel (generate-label 'SHIFT-JOIN)) - (slabel (generate-label 'SHIFT-NEGATIVE)) - (zlabel (generate-label 'SHIFT-ZERO))) - (LAP (MOV W (R ,ecx) ,source2) - (SAR W (R ,ecx) (& ,scheme-type-width)) - (JS B (@PCR ,slabel)) - (CMP W (R ,ecx) (& ,scheme-datum-width)) - (JGE B (@PCR ,zlabel)) - (SHL W ,target (R ,ecx)) - (JMP B (@PCR ,jlabel)) - (LABEL ,zlabel) - (XOR W ,target ,target) - (JMP B (@PCR ,jlabel)) - (LABEL ,slabel) - (NEG W (R ,ecx)) - (CMP W (R ,ecx) (& ,scheme-datum-width)) - (JGE W (@PCR ,zlabel)) - (SHR W ,target (R ,ecx)) - ,@(word->fixnum target) - (LABEL ,jlabel)))))) - - (if (not (equal? target (INST-EA (R ,ecx)))) - (with-target target) - (let ((temp (temporary-register-reference))) - (LAP (MOV W ,temp ,target) - ,@(with-target temp) - (MOV W ,target ,temp)))))))) - (lambda (target source1 source2 overflow?) - overflow? ; ignored - (require-register! ecx) - (two-arg-register-operation operate - #f - target - source1 - source2)))) + (lambda (target source1 source2 overflow?) + (prefix-instructions! + (LAP ,@(load-machine-register! source1 eax) + ,@(load-machine-register! source2 ecx))) + (rtl-target:=machine-register! target eax) + (LAP ,@(invoke-hook/call entry:compiler-fixnum-shift)))) (define (do-division target source1 source2 result-reg) (prefix-instructions! (load-machine-register! source1 eax)) diff --git a/src/compiler/machines/x86-64/lapgen.scm b/src/compiler/machines/x86-64/lapgen.scm index 2a9546d11..f621482b7 100644 --- a/src/compiler/machines/x86-64/lapgen.scm +++ b/src/compiler/machines/x86-64/lapgen.scm @@ -780,7 +780,8 @@ USA. shortcircuit-apply-size-6 shortcircuit-apply-size-7 shortcircuit-apply-size-8 - interrupt-continuation-2) + interrupt-continuation-2 + fixnum-shift) ;; Operation tables diff --git a/src/compiler/machines/x86-64/rulfix.scm b/src/compiler/machines/x86-64/rulfix.scm index 863d17d7b..b74cc5042 100644 --- a/src/compiler/machines/x86-64/rulfix.scm +++ b/src/compiler/machines/x86-64/rulfix.scm @@ -427,51 +427,17 @@ USA. (SAR Q ,target (&U ,scheme-type-width)) (IMUL Q ,target ,temp)))))))) -;++ This is absurd -- it should just be an assembly hook. +;;; This calls an out-of-line assembly hook because it requires a lot +;;; of hair to deal with shift counts that exceed the datum width, and +;;; with negative arguments. (define-arithmetic-method 'FIXNUM-LSH fixnum-methods/2-args - (let ((operate - (lambda (target source2) - ;; SOURCE2 is guaranteed not to be RCX because of the - ;; require-register! used below. - ;; TARGET can be RCX only if the rule has machine register - ;; RCX as the target, unlikely, but it must be handled! - (let ((with-target - (lambda (target) - (let ((jlabel (generate-label 'SHIFT-JOIN)) - (slabel (generate-label 'SHIFT-NEGATIVE)) - (zlabel (generate-label 'SHIFT-ZERO))) - (LAP (MOV Q (R ,rcx) ,source2) - (SAR Q (R ,rcx) (&U ,scheme-type-width)) - (JS B (@PCR ,slabel)) - (CMP Q (R ,rcx) (& ,scheme-datum-width)) - (JGE B (@PCR ,zlabel)) - (SHL Q ,target (R ,rcx)) - (JMP B (@PCR ,jlabel)) - (LABEL ,zlabel) - (XOR Q ,target ,target) - (JMP B (@PCR ,jlabel)) - (LABEL ,slabel) - (NEG Q (R ,rcx)) - (CMP Q (R ,rcx) (& ,scheme-datum-width)) - (JGE B (@PCR ,zlabel)) - (SHR Q ,target (R ,rcx)) - ,@(word->fixnum target) - (LABEL ,jlabel)))))) - - (if (not (equal? target (INST-EA (R ,rcx)))) - (with-target target) - (let ((temp (temporary-register-reference))) - (LAP (MOV Q ,temp ,target) - ,@(with-target temp) - (MOV Q ,target ,temp)))))))) - (lambda (target source1 source2 overflow?) - overflow? ; ignored - (require-register! rcx) - (binary-register-operation operate #f 'GENERAL - (lambda (target source) - (LAP (MOV Q ,target ,source))) - target source1 source2)))) + (lambda (target source1 source2 overflow?) + (prefix-instructions! + (LAP ,@(load-machine-register! source1 rax) + ,@(load-machine-register! source2 rcx))) + (rtl-target:=machine-register! target rax) + (LAP ,@(invoke-hook/call entry:compiler-fixnum-shift)))) (define (do-division target source1 source2 result-reg) (prefix-instructions! (load-machine-register! source1 rax)) diff --git a/src/microcode/cmpauxmd/i386.m4 b/src/microcode/cmpauxmd/i386.m4 index fda7aa750..7f2cea671 100644 --- a/src/microcode/cmpauxmd/i386.m4 +++ b/src/microcode/cmpauxmd/i386.m4 @@ -290,6 +290,8 @@ define(TC_FIXNUM,26) define(TC_MANIFEST_NM_VECTOR,39) define(TC_COMPILED_ENTRY,40) +define(IMM_DETAGGED_FIXNUM_MINUS_ONE, IMM(eval((-1) * (1 << TC_LENGTH)))) + define(REGBLOCK_VAL,8) define(REGBLOCK_COMPILER_TEMP,16) define(REGBLOCK_LEXPR_ACTUALS,28) @@ -1051,6 +1053,49 @@ define_jump_indirection(nofp_quotient,37) define_jump_indirection(nofp_remainder,38) define_jump_indirection(nofp_modulo,39) +# Input and output in eax, shift count in ecx, all detagged fixnums. +# Return address is at the top of the stack. + +define_hook_label(fixnum_shift) + OP(sar,l) TW(IMM(TC_LENGTH),REG(ecx)) + js asm_fixnum_shift_negative + +asm_fixnum_lsh: + OP(cmp,l) TW(IMM(DATUM_LENGTH),REG(ecx)) + jge asm_fixnum_lsh_overflow + OP(shl,l) TW(REG(cl),REG(eax)) + ret + +asm_fixnum_lsh_overflow: + OP(xor,l) TW(REG(eax),REG(eax)) + ret + +asm_fixnum_shift_negative: + OP(neg,l) REG(ecx) + +asm_fixnum_rsh: + OP(cmp,l) TW(IMM(DATUM_LENGTH),REG(ecx)) + jge asm_fixnum_rsh_overflow + OP(sar,l) TW(REG(cl),REG(eax)) + + # Turn eax back into a detagged fixnum by masking off the low + # six bits. -1 has all bits set, but its detagged format has + # the low six bits clear. + OP(and,l) TW(IMM_DETAGGED_FIXNUM_MINUS_ONE,REG(eax)) + ret + +asm_fixnum_rsh_overflow: + OP(cmp,l) TW(IMM(0),REG(eax)) + js asm_fixnum_rsh_overflow_negative + +asm_fixnum_rsh_overflow_nonnegative: + OP(xor,l) TW(REG(eax),REG(eax)) + ret + +asm_fixnum_rsh_overflow_negative: + OP(mov,l) TW(IMM_DETAGGED_FIXNUM_MINUS_ONE,REG(eax)) + ret + IFDASM(`end') ### Edwin Variables: diff --git a/src/microcode/cmpauxmd/x86-64.m4 b/src/microcode/cmpauxmd/x86-64.m4 index 242c57eb6..66877605e 100644 --- a/src/microcode/cmpauxmd/x86-64.m4 +++ b/src/microcode/cmpauxmd/x86-64.m4 @@ -296,6 +296,7 @@ define(TC_COMPILED_ENTRY,40) # constants here. These are computed in terms of the parameters # above, and ordered lexicographically. +define(IMM_DETAGGED_FIXNUM_MINUS_ONE, `IMM(HEX(ffffffffffffffc0))') define(IMM_FALSE, `IMM(HEX(0000000000000000))') define(IMM_FIXNUM_0, `IMM(HEX(6800000000000000))') define(IMM_FLONUM_0, `IMM(HEX(1800000000000000))') @@ -857,6 +858,52 @@ define_jump_indirection(generic_quotient,37) define_jump_indirection(generic_remainder,38) define_jump_indirection(generic_modulo,39) +# Input and output in rax, shift count in rcx, all detagged fixnums. +# Return address is at the top of the stack. + +define_hook_label(fixnum_shift) + OP(sar,q) TW(IMM(TC_LENGTH),REG(rcx)) + js asm_fixnum_shift_negative + +asm_fixnum_lsh: + OP(cmp,q) TW(IMM(DATUM_LENGTH),REG(rcx)) + jge asm_fixnum_lsh_overflow + OP(shl,q) TW(REG(cl),REG(rax)) + ret + +asm_fixnum_lsh_overflow: + OP(xor,q) TW(REG(rax),REG(rax)) + ret + +asm_fixnum_shift_negative: + OP(neg,q) REG(rcx) + +asm_fixnum_rsh: + OP(cmp,q) TW(IMM(DATUM_LENGTH),REG(rcx)) + jge asm_fixnum_rsh_overflow + OP(sar,q) TW(REG(cl),REG(rax)) + + # Turn rax back into a detagged fixnum by masking off the low + # six bits. -1 has all bits set, but its detagged format has + # the low six bits clear. Use rcx as a temporary register + # because AND can't take a 64-bit immediate operand; only MOV + # can. + OP(mov,q) TW(IMM_DETAGGED_FIXNUM_MINUS_ONE,REG(rcx)) + OP(and,q) TW(REG(rcx),REG(rax)) + ret + +asm_fixnum_rsh_overflow: + OP(cmp,q) TW(IMM(0),REG(rax)) + js asm_fixnum_rsh_overflow_negative + +asm_fixnum_rsh_overflow_nonnegative: + OP(xor,q) TW(REG(rax),REG(rax)) + ret + +asm_fixnum_rsh_overflow_negative: + OP(mov,q) TW(IMM_DETAGGED_FIXNUM_MINUS_ONE,REG(rax)) + ret + IFDASM(`end') ### Edwin Variables: diff --git a/src/microcode/cmpintmd/i386.c b/src/microcode/cmpintmd/i386.c index 5bbc4697f..f6a3725b0 100644 --- a/src/microcode/cmpintmd/i386.c +++ b/src/microcode/cmpintmd/i386.c @@ -338,6 +338,7 @@ i386_reset_hook (void) SETUP_REGISTER (asm_serialize_cache); /* -7 */ else SETUP_REGISTER (asm_dont_serialize_cache); /* -7 */ + SETUP_REGISTER (asm_fixnum_shift); /* -6 */ #ifdef _MACH_UNIX { diff --git a/src/microcode/cmpintmd/i386.h b/src/microcode/cmpintmd/i386.h index fb278ac0d..918bc295c 100644 --- a/src/microcode/cmpintmd/i386.h +++ b/src/microcode/cmpintmd/i386.h @@ -267,6 +267,7 @@ extern int ASM_ENTRY_POINT (i386_interface_initialize) (void); extern void asm_assignment_trap (void); extern void asm_dont_serialize_cache (void); extern void asm_error (void); +extern void asm_fixnum_shift (void); extern void asm_generic_add (void); extern void asm_generic_decrement (void); extern void asm_generic_divide (void); diff --git a/src/microcode/cmpintmd/x86-64.c b/src/microcode/cmpintmd/x86-64.c index 3f364c144..322a61fa3 100644 --- a/src/microcode/cmpintmd/x86-64.c +++ b/src/microcode/cmpintmd/x86-64.c @@ -278,6 +278,8 @@ x86_64_reset_hook (void) renumber them now. */ SETUP_REGISTER (asm_interrupt_continuation_2); /* 39 */ + SETUP_REGISTER (asm_fixnum_shift); /* 40 */ + #ifdef _MACH_UNIX { vm_address_t addr; diff --git a/src/microcode/cmpintmd/x86-64.h b/src/microcode/cmpintmd/x86-64.h index 6b29d7db0..dcf2b5530 100644 --- a/src/microcode/cmpintmd/x86-64.h +++ b/src/microcode/cmpintmd/x86-64.h @@ -178,6 +178,7 @@ extern int ASM_ENTRY_POINT (x86_64_interface_initialize) (void); extern void asm_assignment_trap (void); extern void asm_dont_serialize_cache (void); extern void asm_error (void); +extern void asm_fixnum_shift (void); extern void asm_generic_add (void); extern void asm_generic_decrement (void); extern void asm_generic_divide (void); -- 2.25.1