From: Taylor R Campbell <campbell@mumble.net>
Date: Tue, 1 Dec 2009 20:24:50 +0000 (-0500)
Subject: Fix invocation of out-of-line FIXNUM-LSH hook for i386 and x86-64.
X-Git-Tag: 20100708-Gtk~223
X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=9701ce2d79dfd8a2f3ebcb9f27b48308268a12ab;p=mit-scheme.git

Fix invocation of out-of-line FIXNUM-LSH hook for i386 and x86-64.

Add comment explaining why it is unnecessary to clear the register
map before invoking the hook.
---

diff --git a/src/compiler/machines/i386/rulfix.scm b/src/compiler/machines/i386/rulfix.scm
index 60616b687..98bb506c9 100644
--- a/src/compiler/machines/i386/rulfix.scm
+++ b/src/compiler/machines/i386/rulfix.scm
@@ -486,12 +486,26 @@ USA.
 
 (define-arithmetic-method 'FIXNUM-LSH fixnum-methods/2-args
   (lambda (target source1 source2 overflow?)
-    overflow?                           ;ignore
-    (prefix-instructions!
-     (LAP ,@(load-machine-register! source1 eax)
-          ,@(load-machine-register! source2 ecx)))
-    (rtl-target:=machine-register! target eax)
-    (LAP ,@(invoke-hook/call entry:compiler-fixnum-shift))))
+    overflow?				;ignore
+    ;++ This is suboptimal in the cases when SOURCE1 is stored only in
+    ;++ ecx or when SOURCE2 is stored only in eax, and either one is
+    ;++ dead (which is often the case).  In such cases, this generates
+    ;++ code to needlessly save the dead pseudo-registers into their
+    ;++ homes simply because they were stored in eax and ecx.  It'd be
+    ;++ nice to have a variant of LOAD-MACHINE-REGISTER! for multiple
+    ;++ sources and targets, which would compute a parallel assignment
+    ;++ using machine registers if available for temporaries, or the
+    ;++ homes of pseudo-registers if not.
+    (let* ((load-eax (load-machine-register! source1 eax))
+	   (load-ecx (load-machine-register! source2 ecx)))
+      (delete-dead-registers!)
+      (rtl-target:=machine-register! target eax)
+      (LAP ,@load-eax
+	   ,@load-ecx
+	   ;; Clearing the map is not necessary because the hook uses
+	   ;; only eax and ecx.  If the hook were changed, it would be
+	   ;; necessary to clear the map first.
+	   ,@(invoke-hook/call entry:compiler-fixnum-shift)))))
 
 (define (do-division target source1 source2 result-reg)
   (prefix-instructions! (load-machine-register! source1 eax))
diff --git a/src/compiler/machines/x86-64/rulfix.scm b/src/compiler/machines/x86-64/rulfix.scm
index 81a1f5fb6..50211afc8 100644
--- a/src/compiler/machines/x86-64/rulfix.scm
+++ b/src/compiler/machines/x86-64/rulfix.scm
@@ -433,12 +433,26 @@ USA.
 
 (define-arithmetic-method 'FIXNUM-LSH fixnum-methods/2-args
   (lambda (target source1 source2 overflow?)
-    overflow?                           ;ignore
-    (prefix-instructions!
-     (LAP ,@(load-machine-register! source1 rax)
-          ,@(load-machine-register! source2 rcx)))
-    (rtl-target:=machine-register! target rax)
-    (LAP ,@(invoke-hook/call entry:compiler-fixnum-shift))))
+    overflow?				;ignore
+    ;++ This is suboptimal in the cases when SOURCE1 is stored only in
+    ;++ rcx or when SOURCE2 is stored only in rax, and either one is
+    ;++ dead (which is often the case).  In such cases, this generates
+    ;++ code to needlessly save the dead pseudo-registers into their
+    ;++ homes simply because they were stored in rax and rcx.  It'd be
+    ;++ nice to have a variant of LOAD-MACHINE-REGISTER! for multiple
+    ;++ sources and targets, which would compute a parallel assignment
+    ;++ using machine registers if available for temporaries, or the
+    ;++ homes of pseudo-registers if not.
+    (let* ((load-rax (load-machine-register! source1 rax))
+	   (load-rcx (load-machine-register! source2 rcx)))
+      (delete-dead-registers!)
+      (rtl-target:=machine-register! target rax)
+      (LAP ,@load-rax
+	   ,@load-rcx
+	   ;; Clearing the map is not necessary because the hook uses
+	   ;; only rax and rcx.  If the hook were changed, it would be
+	   ;; necessary to clear the map first.
+	   ,@(invoke-hook/call entry:compiler-fixnum-shift)))))
 
 (define (do-division target source1 source2 result-reg)
   (prefix-instructions! (load-machine-register! source1 rax))
diff --git a/src/microcode/cmpauxmd/i386.m4 b/src/microcode/cmpauxmd/i386.m4
index ed8efedc3..c7623085b 100644
--- a/src/microcode/cmpauxmd/i386.m4
+++ b/src/microcode/cmpauxmd/i386.m4
@@ -1113,7 +1113,10 @@ define_jump_indirection(nofp_remainder,38)
 define_jump_indirection(nofp_modulo,39)
 
 # Input and output in eax, shift count in ecx, all detagged fixnums.
-# Return address is at the top of the stack.
+# Return address is at the top of the stack, untagged.  This hook must
+# not use any registers other than eax and ecx; if it does, the code
+# to generate calls to it, in compiler/machines/i386/rulfix.scm, must
+# clear the register map first.
 
 define_hook_label(fixnum_shift)
 	OP(sar,l)	TW(IMM(TC_LENGTH),REG(ecx))
diff --git a/src/microcode/cmpauxmd/x86-64.m4 b/src/microcode/cmpauxmd/x86-64.m4
index 49115b930..3066165a3 100644
--- a/src/microcode/cmpauxmd/x86-64.m4
+++ b/src/microcode/cmpauxmd/x86-64.m4
@@ -859,7 +859,10 @@ define_jump_indirection(generic_remainder,38)
 define_jump_indirection(generic_modulo,39)
 
 # Input and output in rax, shift count in rcx, all detagged fixnums.
-# Return address is at the top of the stack.
+# Return address is at the top of the stack, untagged.  This hook must
+# not use any registers other than rax and rcx; if it does, the code
+# to generate calls to it, in compiler/machines/x86-64/rulfix.scm,
+# must clear the register map first.
 
 define_hook_label(fixnum_shift)
 	OP(sar,q)	TW(IMM(TC_LENGTH),REG(rcx))