Open-code floating-point primitives on AMD x86-64.

author Taylor R Campbell <campbell@mumble.net>

Wed, 11 Nov 2009 20:24:13 +0000 (15:24 -0500)

committer Taylor R Campbell <campbell@mumble.net>

Wed, 11 Nov 2009 20:24:13 +0000 (15:24 -0500)
author Taylor R Campbell <campbell@mumble.net>
Wed, 11 Nov 2009 20:24:13 +0000 (15:24 -0500)
committer Taylor R Campbell <campbell@mumble.net>
Wed, 11 Nov 2009 20:24:13 +0000 (15:24 -0500)
diff --git a/src/compiler/machines/x86-64/insmac.scm b/src/compiler/machines/x86-64/insmac.scm

index 9f8dede030f242ea192cf4b8271efbe5ee9616c5..559354bdd069e1247189df0fbf46f44292945182 100644 (file)
--- a/src/compiler/machines/x86-64/insmac.scm
+++ b/src/compiler/machines/x86-64/insmac.scm
@@ -74,15 +74,20 @@ USA.
    (sc-macro-transformer
     (lambda (form environment)
       environment
-     (if (syntax-match? '(IDENTIFIER ? SYMBOL) (cdr form))
+     (if (syntax-match? '(IDENTIFIER * SYMBOL) (cdr form))
          `(DEFINE (,(cadr form) EXPRESSION)
             (LET ((MATCH-RESULT (PATTERN-LOOKUP ,ea-database-name EXPRESSION)))
               (AND MATCH-RESULT
-                  ,(if (pair? (cddr form))
-                       `(LET ((EA (MATCH-RESULT)))
-                          (AND (MEMQ ',(caddr form) (EA/CATEGORIES EA))
-                               EA))
-                       `(MATCH-RESULT)))))
+                  ,(let ((categories (cddr form)))
+                     (if (pair? categories)
+                         `(LET ((EA (MATCH-RESULT)))
+                            (AND
+                             (OR
+                              ,@(map (lambda (category)
+                                       `(MEMQ ',category (EA/CATEGORIES EA)))
+                                     categories))
+                             EA))
+                         `(MATCH-RESULT))))))
          (ill-formed-syntax form)))))
  \f
  (define (parse-categories categories environment context)
@@ -90,7 +95,7 @@ USA.
    (if (not (and (pair? categories)
                 (eq? 'CATEGORIES (car categories))
                 (pair? (cdr categories))
-               (memq (cadr categories) '(REGISTER MEMORY))
+               (memq (cadr categories) '(REGISTER MEMORY XMM))
                 (null? (cddr categories))))
        (error "Malformed CATEGORIES for effective address rule:"
              categories
@@ -215,24 +220,38 @@ USA.
         (values tail 0))))
  
  (define (collect-prefix options tail environment)
-  (let loop ((options options) (operand #f) (register #f) (r/m #f))
+  (let loop ((options options) (operand #f) (register #f) (r/m #f) (float #f))
      (if (pair? options)
         (case (caar options)
-         ((OPERAND) (loop (cdr options) (cadar options) register r/m))
+         ((OPERAND) (loop (cdr options) (cadar options) register r/m float))
           ((OPCODE-REGISTER)
            (loop (cdr options)
                  operand
                  (or (not (pair? (cdar options))) (cadar options))
-                r/m))
+                r/m
+                float))
           ((ModR/M)
            ;; (ModR/M <r/m>), for fixed digits
            ;; (ModR/M <reg> <r/m>), for registers
-          (if (pair? (cddar options))
-              (loop (cdr options) operand (cadar options) (caddar options))
-              (loop (cdr options) operand #f (cadar options))))
+          (receive (register r/m)
+              (if (pair? (cddar options))
+                  (values (cadar options) (caddar options))
+                  (values #f (cadar options)))
+            (loop (cdr options) operand register r/m float)))
+         ((FLOAT)
+          ;; (FLOAT <scalar/packed> <single/double>)
+          (loop (cdr options) operand register r/m (cdar options)))
           (else (error "Bad instruction prefix option:" (car options))))
-       (let ((cons-prefix (close-syntax 'CONS-PREFIX environment)))
-         `(,cons-prefix ,operand ,register ,r/m ,tail)))))
+       (if float
+           (let ((cons-float-prefix
+                  (close-syntax 'CONS-FLOAT-PREFIX environment)))
+             (if operand
+                 (error "Float instructions can't have operand size prefix:"
+                        operand))
+             `(,cons-float-prefix ,register ,r/m ,(car float) ,(cadr float)
+                                  ,tail))
+           (let ((cons-prefix (close-syntax 'CONS-PREFIX environment)))
+             `(,cons-prefix ,operand ,register ,r/m ,tail))))))
  
  (define (collect-ModR/M field tail environment)
    (let ((digit-or-reg (car field))
diff --git a/src/compiler/machines/x86-64/instrf.scm b/src/compiler/machines/x86-64/instrf.scm

index bbad64cf67812003b04f8ef6bb0ac6e0dd7799fa..7e3021d0bf5b2c67aeb9f5cc4ce0374cd53c87c7 100644 (file)
--- a/src/compiler/machines/x86-64/instrf.scm
+++ b/src/compiler/machines/x86-64/instrf.scm
@@ -23,319 +23,689 @@ USA.
  
  |#
  
-;;;; Intel i387/i486 Instruction Set
+;;;; AMD x86-64 128-bit Media Instruction Set
  ;;; package: (compiler lap-syntaxer)
  
-(declare (usual-integrations))
+;;; The mnemonics here don't entirely match the ones in the AMD
+;;; manual, or in your typical x86-64 assembler.  These mnemonics try
+;;; to adhere to a convention of treating operand sizes, precisions,
+;;; and packed/scalar choices as arguments to a common mnemonic, where
+;;; it is sensible for there to be a choice.  Sometimes this is not
+;;; entirely clear, such as PSHUF (which works only with longword-size
+;;; (32-bit) operands) and PSHUFH/PSHUFL (which work only with
+;;; word-size (16-bit) operands).  And sometimes this doesn't work
+;;; very well (e.g., MOVQ).  Most instructions for floating-point
+;;; arithmetic have F suffixed to their names; e.g., rather than
+;;; ADDSS, ADDSD, ADDPS, and ADDPD, there's a single ADDF mnemonic, to
+;;; be used as (ADDF S S ...), (ADDF S D ...), &c.
+;;;
+;;; Would it have been better just to transcribe exactly the mnemonics
+;;; in the AMD manual?  Perhaps, and it might have caused fewer errors
+;;; in transcription, since there would be fewer different formats
+;;; that way.
  
-#|
+(declare (usual-integrations))
  \f
  (let-syntax
-    ((define-binary-flonum
-       (sc-macro-transformer
-       (lambda (form environment)
-         environment
-         (let ((mnemonic (list-ref form 1))
-               (pmnemonic (list-ref form 2))
-               (imnemonic (list-ref form 3))
-               (digit (list-ref form 4))
-               (opcode1 (list-ref form 5))
-               (opcode2 (list-ref form 6)))
-           `(begin
-              (define-instruction ,mnemonic
-                (((ST 0) (ST (? i)))
-                 (BYTE (8 #xd8)
-                       (8 (+ ,opcode1 i))))
-
-                (((ST (? i)) (ST 0))
-                 (BYTE (8 #xdc)
-                       (8 (+ ,opcode2 i))))
-
-                (()
-                 (BYTE (8 #xde)
-                       (8 (+ ,opcode2 1))))
-
-                ((D (? source mW))
-                 (BYTE (8 #xdc))
-                 (ModR/M ,digit source))
-
-                ((S (? source mW))
-                 (BYTE (8 #xd8))
-                 (ModR/M ,digit source)))
-
-              (define-instruction ,pmnemonic
-                (((ST (? i)) (ST 0))
-                 (BYTE (8 #xde)
-                       (8 (+ ,opcode2 i)))))
-
-              (define-instruction ,imnemonic
-                ((L (? source mW))
-                 (BYTE (8 #xda))
-                 (ModR/M ,digit source))
-
-                ((H (? source mW))
-                 (BYTE (8 #xde))
-                 (ModR/M ,digit source)))))))))
-
-  ;; The i486 book (and 387, etc.) has inconsistent instruction
-  ;; descriptions and opcode assignments for FSUB and siblings,
-  ;; and FDIV and siblings.
-  ;; FSUB ST(i),ST is described as replacing ST(i) with ST-ST(i)
-  ;; while the opcode described replaces ST(i) with ST(i)-ST.
-
-  ;; In the following, the F% forms follow the descriptions in the
-  ;; book, namely, F%SUB computes ST-ST(i) and F%SUBR computes
-  ;; ST(i)-ST, storing into their destination (first) argument.
-
-  ;; The %-less forms follow the opcodes and usual convention,
-  ;; namely FSUB computes destination (first) argument - source
-  ;; argument FSUBR computes source - destination.
-
-  (define-binary-flonum FADD   FADDP   FIADD   0 #xc0 #xc0)
-  (define-binary-flonum F%DIV  F%DIVP  F%IDIV  6 #xf0 #xf0)
-  (define-binary-flonum F%DIVR F%DIVPR F%IDIVR 7 #xf8 #xf8)
-  (define-binary-flonum FDIV   FDIVP   FIDIV   6 #xf0 #xf8)
-  (define-binary-flonum FDIVR  FDIVPR  FIDIVR  7 #xf8 #xf0)
-  (define-binary-flonum FMUL   FMULP   FIMUL   1 #xc8 #xc8)
-  (define-binary-flonum F%SUB  F%SUBP  F%ISUB  4 #xe0 #xe0)
-  (define-binary-flonum F%SUBR F%SUBPR F%ISUBR 5 #xe8 #xe8)
-  (define-binary-flonum FSUB   FSUBP   FISUB   4 #xe0 #xe8)
-  (define-binary-flonum FSUBR  FSUBPR  FISUBR  5 #xe8 #xe0))
+    ((define-flop-instruction
+      (sc-macro-transformer
+       (lambda (form environment)
+         environment                    ;ignore
+         (let ((mnemonic (cadr form))
+               (opcode (caddr form)))
+           `(define-instruction ,mnemonic
+              (((? p/s float-packed/scalar)
+                (? p float-precision)
+                (XMM (? target))
+                (? source xmm/m-ea))
+               (PREFIX (FLOAT p/s p) (ModR/M target source))
+               (BITS (8 #x0F)
+                     (8 ,opcode))
+               (ModR/M target source))))))))
+  (define-flop-instruction ADDF #x58)
+  (define-flop-instruction DIVF #x5E)
+  (define-flop-instruction MAXF #x5F)
+  (define-flop-instruction MINF #x5D)
+  (define-flop-instruction MULF #x59)
+  (define-flop-instruction SQRTF #x51)
+  (define-flop-instruction SUBF #x5C))
+
+(let-syntax ((define-packed-flop-instruction
+              (sc-macro-transformer
+               (lambda (form environment)
+                 environment            ;ignore
+                 (let ((mnemonic (cadr form))
+                       (opcode (caddr form)))
+                   `(define-instruction ,mnemonic
+                      ((P D (XMM (? target)) (? source xmm/m-ea))
+                       (PREFIX (ModR/M target source))
+                       (BITS (8 #x66)
+                             (8 #x0F)
+                             (8 #xD0))
+                       (ModR/M target source))
+
+                      ((P S (XMM (? target)) (? source xmm/m-ea))
+                       (PREFIX (ModR/M target source))
+                       (BITS (8 #xF2)
+                             (8 #x0F)
+                             (8 #xD0))
+                       (ModR/M target source))))))))
+  (define-packed-flop-instruction ADDSUBF       #xD0)
+  (define-packed-flop-instruction HADDF         #x7C)
+  (define-packed-flop-instruction HSUBF         #x7D))
  \f
-(define-trivial-instruction F2XM1 #xd9 #xf0)
-(define-trivial-instruction FABS  #xd9 #xe1)
-
-(define-instruction FBLD
-  (((? source mW))
-   (BYTE (8 #xd8))
-   (ModR/M 4 source)))
-
-(define-instruction FBSTP
-  (((? target mW))
-   (BYTE (8 #xdf))
-   (ModR/M 6 target)))
-
-(define-trivial-instruction FCHS   #xd9 #xe0)
-(define-trivial-instruction FCLEX  #x9b #xdb #xe2) ; = (FWAIT) (FNCLEX)
-(define-trivial-instruction FNCLEX #xdb #xe2)
-
-(let-syntax
-    ((define-flonum-comparison
-       (sc-macro-transformer
-       (lambda (form environment)
-         environment
-         (let ((mnemonic (cadr form))
-               (digit (caddr form))
-               (opcode (cadddr form)))
-           `(define-instruction ,mnemonic
-              (((ST 0) (ST (? i)))
-               (BYTE (8 #xd8)
-                     (8 (+ ,opcode i))))
-
-              (()
-               (BYTE (8 #xd8)
-                     (8 (+ ,opcode 1))))
-
-              ((D (? source mW))
-               (BYTE (8 #xdc))
-               (ModR/M ,digit source))
-
-              ((S (? source mW))
-               (BYTE (8 #xd8))
-               (ModR/M ,digit source))))))))
-
-  (define-flonum-comparison FCOM  2 #xd0)
-  (define-flonum-comparison FCOMP 3 #xd8))
-
-(define-trivial-instruction FCOMPP  #xde #xd9)
-(define-trivial-instruction FCOS    #xd9 #xff)
-(define-trivial-instruction FDECSTP #xd9 #xf6)
-
-(define-instruction FFREE
-  (((ST (? i)))
-   (BYTE (8 #xdd)
-        (8 (+ #xc0 i)))))
-
-(let-syntax
-    ((define-flonum-integer-comparison
-       (sc-macro-transformer
-       (lambda (form environment)
-         environment
-         (let ((mnemonic (cadr form))
-               (digit (caddr form)))
-           `(define-instruction ,mnemonic
-              ((L (? source mW))
-               (BYTE (8 #xda))
-               (ModR/M ,digit source))
-
-              ((H (? source mW))
-               (BYTE (8 #xde))
-               (ModR/M ,digit source))))))))
-
-  (define-flonum-integer-comparison FICOM  2)
-  (define-flonum-integer-comparison FICOMP 3))
+(let-syntax ((define-packed-bitwise-instruction
+              (sc-macro-transformer
+               (lambda (form environment)
+                 environment            ;ignore
+                 (let ((mnemonic (cadr form))
+                       (opcode (caddr form)))
+                   `(define-instruction ,mnemonic
+                      ((P
+                        (? p float-precision)
+                        (XMM (? target))
+                        (? source xmm/m-ea))
+                       (PREFIX (FLOAT 'P p) (ModR/M target source))
+                       (BITS (8 #x0F)
+                             (8 ,opcode))
+                       (ModR/M target source))))))))
+  (define-packed-bitwise-instruction ANDNF      #x55)
+  (define-packed-bitwise-instruction ANDF       #x54)
+  (define-packed-bitwise-instruction ORF        #x56)
+  (define-packed-bitwise-instruction XORF       #x57)
+  ;; Not really bitwise instruction, but these two fit the pattern.
+  (define-packed-bitwise-instruction UNPCKHF    #x15)
+  (define-packed-bitwise-instruction UNPCKLF    #x15))
+
+(define-instruction CMPF
+  (((? comparator float-comparator)
+    (? p/s float-packed/scalar)
+    (? p float-precision)
+    (XMM (? source1))
+    (? source2 xmm/m-ea))
+   (PREFIX (FLOAT p/s p) (ModR/M source1 source2))
+   (BITS (8 #x0F)
+         (8 #xC2))
+   (ModR/M source1 source2)
+   (BITS (8 comparator))))
+
+(let-syntax ((define-un/ordered-compare-instruction
+              (sc-macro-transformer
+               (lambda (form environment)
+                 environment            ;ignore
+                 (let ((mnemonic (cadr form))
+                       (opcode (caddr form)))
+                   `(define-instruction ,mnemonic
+                      ((D (XMM (? source1)) (? source2 xmm/m-ea))
+                       (PREFIX (ModR/M source1 source2))
+                       (BITS (8 #x66)
+                             (8 #x0F)
+                             (8 ,opcode))
+                       (ModR/M source1 source2))
+
+                      ((S (XMM (? source1)) (? source2 xmm/m-ea))
+                       (PREFIX (ModR/M source1 source2))
+                       (BITS (8 #x0F)
+                             (8 ,opcode))
+                       (ModR/M source1 source2))))))))
+  (define-un/ordered-compare-instruction COMISF #x2F)
+  (define-un/ordered-compare-instruction UCOMISF #x2E))
  \f
-(let-syntax
-    ((define-flonum-integer-memory
-       (sc-macro-transformer
-       (lambda (form environment)
-         environment
-         (let ((mnemonic (cadr form))
-               (digit1 (caddr form))
-               (digit2 (cadddr form)))
-           `(define-instruction ,mnemonic
-              ,@(if (not digit2)
-                    `()
-                    `(((Q (? source mW))
-                       (BYTE (8 #xdf))
-                       (ModR/M ,digit2 source))))
-
-              ((L (? source mW))
-               (BYTE (8 #xdb))
-               (ModR/M ,digit1 source))
-
-              ((H (? source mW))
-               (BYTE (8 #xdf))
-               (ModR/M ,digit1 source))))))))
-
-  (define-flonum-integer-memory FILD  0 5)
-  (define-flonum-integer-memory FIST  2 #f)
-  (define-flonum-integer-memory FISTP 3 7))
-
-(define-trivial-instruction FINCSTP #xd9 #xf7)
-(define-trivial-instruction FINIT   #x9b #xdb #xe3) ; = (FWAIT) (FNINT)
-(define-trivial-instruction FNINIT  #xdb #xe3)
-
-(let-syntax
-    ((define-flonum-memory
-       (sc-macro-transformer
-       (lambda (form environment)
-         environment
-         (let ((mnemonic (list-ref form 1))
-               (digit1 (list-ref form 2))
-               (digit2 (list-ref form 3))
-               (opcode1 (list-ref form 4))
-               (opcode2 (list-ref form 5)))
-           `(define-instruction ,mnemonic
-              (((ST (? i)))
-               (BYTE (8 ,opcode1)
-                     (8 (+ ,opcode2 i))))
-
-              ((D (? operand mW))
-               (BYTE (8 #xdd))
-               (ModR/M ,digit1 operand))
-
-              ((S (? operand mW))
-               (BYTE (8 #xd9))
-               (ModR/M ,digit1 operand))
-
-              ,@(if (not digit2)
-                    `()
-                    `(((X (? operand mW))
-                       (BYTE (8 #xdb))
-                       (ModR/M ,digit2 operand))))))))))
-
-  (define-flonum-memory FLD  0 5  #xd9 #xc0)
-  (define-flonum-memory FST  2 #f #xdd #xd0)
-  (define-flonum-memory FSTP 3 7  #xdd #xd8))
-
-(define-trivial-instruction FLD1   #xd9 #xe8)
-(define-trivial-instruction FLDL2T #xd9 #xe9)
-(define-trivial-instruction FLDL2E #xd9 #xea)
-(define-trivial-instruction FLDPI  #xd9 #xeb)
-(define-trivial-instruction FLDLG2 #xd9 #xec)
-(define-trivial-instruction FLDLN2 #xd9 #xed)
-(define-trivial-instruction FLDZ   #xd9 #xee)
+(let-syntax ((define-conversion-instruction
+              (sc-macro-transformer
+               (lambda (form environment)
+                 environment            ;ignore
+                 (let ((mnemonic (cadr form))
+                       (rules-definitions (cddr form)))
+                   `(define-instruction ,mnemonic
+                      ,@(append-map
+                         (lambda (rules-definition)
+                           (if (not (eq? (car rules-definition) 'DEFINE-RULES))
+                               (error "Malformed conversion rules definition:"
+                                      rules-definition))
+                           (let ((pattern (cadr rules-definition))
+                                 (prefix-options (caddr rules-definition))
+                                 (rules (cdddr rules-definition)))
+                             (map (lambda (rule)
+                                    (let ((conversion (car rule))
+                                          (bytes
+                                           (map (lambda (byte) `(8 ,byte))
+                                                (cdr rule))))
+                                      `((,conversion ,@pattern)
+                                        (PREFIX ,@prefix-options
+                                                (ModR/M reg ea))
+                                        (BITS ,@bytes)
+                                        (ModR/M reg ea))))
+                                  rules)))
+                         rules-definitions)))))))
+
+  (define-conversion-instruction CVTF
+    (define-rules ((XMM (? reg)) (? ea xmm/m-ea))
+        ()
+      (DQ->PD   #xF3 #x0F #xE6)
+      (DQ->PS        #x0F #x5B)
+      (PD->DQ   #xF2 #x0F #xE6)
+      (PD->PS   #x66 #x0F #x5A)
+      (PS->DQ   #x66 #x0F #x5B)
+      (PS->PD        #x0F #x5A)
+      (SD->SS   #xF2 #x0F #x5A)
+      (SS->SD   #xF3 #x0F #x5A))
+
+    ;++ SIZE can be only L or Q, not W.
+    (define-rules ((? size operand-size) (R (? reg)) (? ea xmm/m-ea))
+        ((OPERAND size))
+      (SD->SI   #xF2 #x0F #x2D)
+      (SS->SI   #xF3 #x0F #x2D))
+
+    (define-rules ((? size operand-size) (XMM (? reg)) (? ea r/m-ea))
+        ((OPERAND size))
+      (SI->SD   #xF2 #x0F #x2A)
+      (SI->SS   #xF3 #x0F #x2A)))
+
+  (define-conversion-instruction CVTFT  ;Convert Truncated
+    (define-rules ((XMM (? reg)) (? ea xmm/m-ea))
+        ()
+      (PD->DQ   #x66 #x0F #xE6)
+      (PS->DQ   #xF3 #x0F #x5B))
+    (define-rules ((? size operand-size) (R (? reg)) (? ea xmm/m-ea))
+        ()
+      (SD->SI   #xF2 #x0F #x2C)
+      (SS->SI   #xF3 #x0F #x2C))))
  \f
-(let-syntax
-    ((define-flonum-state
-       (sc-macro-transformer
-       (lambda (form environment)
-         environment
-         (let ((mnemonic (list-ref form 1))
-               (opcode (list-ref form 2))
-               (digit (list-ref form 3))
-               (mnemonic2 (list-ref form 4)))
-           `(begin
-              ,@(if (not mnemonic2)
-                    `()
-                    `((define-instruction ,mnemonic2
-                        (((? source mW))
-                         (BYTE (8 #x9b) ; (FWAIT)
-                               (8 ,opcode))
-                         (ModR/M ,digit source)))))
-
-              (define-instruction ,mnemonic
-                (((? source mW))
-                 (BYTE (8 ,opcode))
-                 (ModR/M ,digit source)))))))))
-
-  (define-flonum-state FNLDCW  #xd9 5 FLDCW)
-  (define-flonum-state FLDENV  #xd9 4 #f)
-  (define-flonum-state FNSTCW  #xd9 7 FSTCW)
-  (define-flonum-state FNSTENV #xd9 6 FSTENV)
-  (define-flonum-state FRSTOR  #xdb 4 #f)
-  (define-flonum-state FNSAVE  #xdd 6 FSAVE))
-
-(define-trivial-instruction FNOP    #xd9 #xd0)
-(define-trivial-instruction FPATAN  #xd9 #xf3)
-(define-trivial-instruction FPREM   #xd9 #xf8) ; truncating remainder
-(define-trivial-instruction FPREM1  #xd9 #xf5) ; IEEE remainder
-(define-trivial-instruction FPTAN   #xd9 #xf2)
-(define-trivial-instruction FRNDINT #xd9 #xfc)
-(define-trivial-instruction FSCALE  #xd9 #xfd)
-(define-trivial-instruction FSIN    #xd9 #xfe)
-(define-trivial-instruction FSINCOS #xd9 #xfb)
-(define-trivial-instruction FSQRT   #xd9 #xfa)
-
-(define-instruction FSTSW
-  (((? target mW))
-   (BYTE (8 #x9b)                      ; (FWAIT)
-        (8 #xdf))
-   (ModR/M 7 target))
-
-  (((R 0))
-   (BYTE (8 #x9b)                      ; (FWAIT)
-        (8 #xdf)
-        (8 #xe0))))
-
-(define-instruction FNSTSW
-  (((? target mW))
-   (BYTE (8 #xdf))
-   (ModR/M 7 target))
-
-  (((R 0))
-   (BYTE (8 #xdf)
-        (8 #xe0))))
+(define-instruction EXTRQ               ;SSE4A only
+  (((? target xmm-ea)
+    (&U (? size unsigned-5bit))
+    (&U (? position unsigned-5bit)))
+   (PREFIX (ModR/M target))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #x78))
+   (ModR/M 0 target)
+   (BITS (8 size UNSIGNED)
+         (8 position UNSIGNED)))
+
+  (((XMM (? target)) (? source xmm-ea))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #x79))
+   (ModR/M target source)))
+
+(define-instruction INSERTQ             ;SSE4A only
+  (((XMM (? target))
+    (? source xmm-ea)
+    (&U (? size unsigned-5bit))
+    (&U (? position unsigned-5bit)))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #xF2)
+         (8 #x0F)
+         (8 #x78))
+   (ModR/M target source)
+   (BITS (8 size UNSIGNED)
+         (8 position UNSIGNED)))
+
+  (((XMM (? target)) (? source xmm-ea))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #xF2)
+         (8 #x0F)
+         (8 #x79))
+   (ModR/M target source)))
+
+(define-instruction FXRSTOR
+  (((? source m-ea))
+   (PREFIX (ModR/M source))
+   (BITS (8 #x0F)
+         (8 #xAE))
+   (ModR/M 1 source)))
+
+(define-instruction FXSAVE
+  (((? target m-ea))
+   (PREFIX (ModR/M target))
+   (BITS (8 #x0F)
+         (8 #xAE))
+   (ModR/M 0 target)))
+
+;;; How does LDDQU differ from MOVDQU?
+
+(define-instruction LDDQU               ;Load Double Quadword Unaligned
+  (((XMM (? target)) (? source m-ea))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #xF2)
+         (8 #x0F)
+         (8 #xF0))
+   (ModR/M target source)))
+
+(define-instruction LDMXCSR
+  (((? source m-ea))
+   (PREFIX (ModR/M source))
+   (BITS (8 #x0F)
+         (8 #xAE))
+   (ModR/M 2 source)))
  \f
-(define-trivial-instruction FTST #xd9 #xe4)
-
-(let-syntax
-    ((define-binary-flonum
-       (sc-macro-transformer
-       (lambda (form environment)
-         environment
-         (let ((mnemonic (cadr form))
-               (opcode1 (caddr form))
-               (opcode2 (cadddr form)))
-           `(define-instruction ,mnemonic
-              (((ST 0) (ST (? i)))
-               (BYTE (8 ,opcode1)
-                     (8 (+ ,opcode2 i))))
-
-              (()
-               (BYTE (8 ,opcode1)
-                     (8 (+ ,opcode2 1))))))))))
-
-  (define-binary-flonum FUCOM  #xdd #xe0)
-  (define-binary-flonum FUCOMP #xdd #xe8)
-  (define-binary-flonum FXCH   #xd9 #xc8))
-
-(define-trivial-instruction FUCOMPP #xda #xe9)
-(define-trivial-instruction FWAIT   #x9b)
-(define-trivial-instruction FXAM    #xd9 #xe5)
-(define-trivial-instruction FXTRACT #xd9 #xf4)
-(define-trivial-instruction FYL2X   #xd9 #xf1)
-(define-trivial-instruction FYL2XP1 #xd9 #xf9)
-
-|#
-\ No newline at end of file
+(define-instruction MASKMOVDQU
+  (((@R 7) (XMM (? source1)) (? source2 xmm-ea))
+   (PREFIX (ModR/M source1 source2))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #xF7))
+   (ModR/M source1 source2)))
+
+(define-instruction MOVAF               ;Aligned
+  ((P (? p float-precision) (XMM (? target)) (? source xmm/m-ea))
+   (PREFIX (FLOAT 'P p) (ModR/M target source))
+   (BITS (8 #x0F)
+         (8 #x28))
+   (ModR/M target source))
+
+  ((P (? p float-precision) (? target xmm/m-ea) (XMM (? source)))
+   (PREFIX (FLOAT 'P p) (ModR/M source target))
+   (BITS (8 #x0F)
+         (8 #x29))
+   (ModR/M source target)))
+
+(define-instruction MOVD
+  ;++ SIZE can be only L or Q, not W.
+  (((? size operand-size) (XMM (? target)) (? source r/m-ea))
+   (PREFIX (OPERAND size) (ModR/M target source))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #x6E))
+   (ModR/M target source))
+
+  (((? size operand-size) (? target r/m-ea) (XMM (? source)))
+   (PREFIX (OPERAND size) (ModR/M source target))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #x7E))
+   (ModR/M source target)))
+
+(define-instruction MOVDDUP
+  (((XMM (? target)) (? source xmm/m-ea))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #xF2)
+         (8 #x0F)
+         (8 #x12))
+   (ModR/M target source)))
+\f
+(let-syntax ((define-move-dq-instruction
+              (sc-macro-transformer
+               (lambda (form environment)
+                 environment            ;ignore
+                 (let ((mnemonic (cadr form))
+                       (prefix (caddr form)))
+
+                   `(define-instruction ,mnemonic
+                      (((XMM (? target)) (? source xmm/m-ea))
+                       (PREFIX (ModR/M target source))
+                       (BITS (8 ,prefix)
+                             (8 #x0F)
+                             (8 #x6F))
+                       (ModR/M target source))
+
+                      (((? target xmm/m-ea) (XMM (? source)))
+                       (PREFIX (ModR/M source target))
+                       (BITS (8 ,prefix)
+                             (8 #x0F)
+                             (8 #x7F))
+                       (ModR/M source target))))))))
+  (define-move-dq-instruction MOVDQA #x66)
+  (define-move-dq-instruction MOVDQU #xF3))
+
+(let-syntax ((define-move-high/low-instructions
+              (sc-macro-transformer
+               (lambda (form environment)
+                 (let ((MOVxy (cadr form))
+                       (MOVx (caddr form))
+                       (opcode1 (cadddr form))
+                       (opcode2 (car (cddddr form))))
+
+                   `(begin
+                      (define-instruction ,MOVxy
+                        ((P S (XMM (? target)) (? source xmm-ea))
+                         (PREFIX (ModR/M target source))
+                         (BITS (8 #x0F)
+                               (8 ,opcode1))
+                         (ModR/M target source)))
+
+                      (define-instruction ,MOVx
+                        ((P D (XMM (? target)) (? source m-ea))
+                         (PREFIX (ModR/M target source))
+                         (BITS (8 #x66)
+                               (8 #x0F)
+                               (8 ,opcode2))
+                         (ModR/M target source))
+
+                        ((P D (? target m-ea) (XMM (? source)))
+                         (PREFIX (ModR/M source target))
+                         (BITS (8 #x66)
+                               (8 #x0F)
+                               (8 ,(+ opcode2 1)))
+                         (ModR/M source target))
+
+                        ((P S (XMM (? target)) (? source m-ea))
+                         (PREFIX (ModR/M target source))
+                         (BITS (8 #x0F)
+                               (8 ,opcode2))
+                         (ModR/M target source))
+
+                        ((P S (? target m-ea) (XMM (? source)))
+                         (PREFIX (ModR/M source target))
+                         (BITS (8 #x0F)
+                               (8 ,(+ opcode2 1)))
+                         (ModR/M source target)))))))))
+  ;; Note: (MOVL ...) is very different from (MOV L ...)!
+  (define-move-high/low-instructions MOVHL MOVH #x12 #x16)
+  (define-move-high/low-instructions MOVLH MOVL #x12 #x16))
+\f
+(define-instruction MOVMSKF
+  ((P (? p float-precision) (R (? target)) (? source xmm-ea))
+   (PREFIX (FLOAT 'P p) (ModR/M target source))
+   (BITS (8 #x0F)
+         (8 #x50))
+   (ModR/M target source)))
+
+(define-instruction MOVNTDQ
+  (((? target m-ea) (XMM (? source)))
+   (PREFIX (FLOAT 'P 'D) (ModR/M source target))
+   (BITS (8 #x0F)
+         (8 #xE7))
+   (ModR/M source target)))
+
+(define-instruction MOVNTF
+  (((? p/s float-packed/scalar)
+    (? p float-precision)
+    (? target m-ea)
+    (XMM (? source)))
+   (PREFIX (FLOAT p/s p) (ModR/M source target))
+   (BITS (8 #x0F)
+         (8 #x2B))
+   (ModR/M source target)))
+
+;; Note: (MOVQ ...) is very different from (MOV Q ...)!
+(define-instruction MOVQ
+  (((XMM (? target)) (? source xmm/m-ea))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #xF3)
+         (8 #x0F)
+         (8 #x7E))
+   (ModR/M target source))
+
+  (((? target xmm/m-ea) (XMM (? source)))
+   (PREFIX (ModR/M source target))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #xD6))
+   (ModR/M source target)))
+
+;;; Using the mnemonic MOVF avoids conflict with the general MOVS
+;;; instruction.
+
+(define-instruction MOVF
+  ((S (? p float-precision) (XMM (? target)) (? source xmm/m-ea))
+   (PREFIX (FLOAT 'S p) (ModR/M target source))
+   (BITS (8 #x0F)
+         (8 #x10))
+   (ModR/M target source))
+
+  ((S (? p float-precision) (? target xmm/m-ea) (XMM (? source)))
+   (PREFIX (FLOAT 'S p) (ModR/M source target))
+   (BITS (8 #x0F)
+         (8 #x11))
+   (ModR/M source target)))
+\f
+(let-syntax ((define-mov/dup-instruction
+              (sc-macro-transformer
+               (lambda (form environment)
+                 environment            ;ignore
+                 (let ((mnemonic (cadr form))
+                       (opcode (caddr form)))
+                   `(define-instruction ,mnemonic
+                      (((XMM (? target)) (? source xmm/m-ea))
+                       (PREFIX (ModR/M target source))
+                       (BITS (8 #xF3)
+                             (8 #x0F)
+                             (8 ,opcode))
+                       (ModR/M target source))))))))
+  (define-mov/dup-instruction MOVSHDUP #x16)
+  (define-mov/dup-instruction MOVSLDUP #x12))
+
+(define-instruction MOVUF               ;Unaligned
+  ((P (? p float-precision) (XMM (? target)) (? source xmm/m-ea))
+   (PREFIX (FLOAT 'P p) (ModR/M target source))
+   (BITS (8 #x0F)
+         (8 #x10))
+   (ModR/M target source))
+  ((P (? p float-precision) (? target xmm/m-ea) (XMM (? source)))
+   (PREFIX (FLOAT 'P p) (ModR/M source target))
+   (BITS (8 #x0F)
+         (8 #x11))
+   (ModR/M source target)))
+
+(let-syntax ((define-packed-sized-instruction
+              (sc-macro-transformer
+               (lambda (form environment)
+                 environment            ;ignore
+                 (let ((mnemonic (cadr form))
+                       (size/opcode-list (cddr form)))
+                   `(define-instruction ,mnemonic
+                      ,@(map (lambda (size/opcode)
+                               (let ((size (car size/opcode))
+                                     (opcode (cadr size/opcode)))
+                                 `((,size (XMM (? target)) (? source xmm/m-ea))
+                                   (PREFIX (ModR/M target source))
+                                   (BITS (8 #x66)
+                                         (8 #x0F)
+                                         (8 ,opcode))
+                                   (ModR/M target source))))
+                             size/opcode-list)))))))
+  (define-packed-sized-instruction PACKS (B #x63) (UB #x67) (W #x6B))
+  (define-packed-sized-instruction PADD (B #xFC) (W #xFD) (L #xFE) (Q #xD4))
+  ;++ Should PADDU be considered a separate instruction from PADDS?
+  (define-packed-sized-instruction PADDS (B #xEC) (W #xED) (UB #xDC) (UW #xDD))
+  (define-packed-sized-instruction PAVG (B #xE0) (W #xE3))
+  (define-packed-sized-instruction PCMPEQ (B #x74) (W #x75) (L #x76))
+  (define-packed-sized-instruction PCMPGT (B #x64) (W #x65) (L #x66))
+  (define-packed-sized-instruction PMAX (W #xEE) (UB #xDE))
+  (define-packed-sized-instruction PMIN (W #xEA) (UB #xDA))
+  (define-packed-sized-instruction PSUB (B #xF8) (W #xF9) (L #xFA) (Q #xFB))
+  ;++ Should PSUBSU be considered a separate instruction from PSUBS?
+  (define-packed-sized-instruction PSUBS (B #xE8) (W #xE9) (UB #xD8) (UW #xD9))
+  ;++ Should the size indicate the source or target size?  Right now
+  ;++ it indicates the source size.
+  (define-packed-sized-instruction PUNPCKH (B #x68) (W #x69) (L #x6A) (Q #x6D))
+  (define-packed-sized-instruction PUNPCKL (B #x60) (W #x61) (L #x62) (Q #x6C))
+  )
+\f
+(let-syntax ((define-packed-instruction
+              (sc-macro-transformer
+               (lambda (form environment)
+                 environment            ;ignore
+                 (let ((mnemonic (cadr form))
+                       (opcode (caddr form)))
+                   `(define-instruction ,mnemonic
+                      (((XMM (? target)) (? source xmm/m-ea))
+                       (PREFIX (ModR/M target source))
+                       (BITS (8 #x66)
+                             (8 #x0F)
+                             (8 ,opcode))
+                       (ModR/M target source))))))))
+  (define-packed-instruction PAND #xDB)
+  (define-packed-instruction PANDN #xDF)
+  (define-packed-instruction PMADDWL #xF5)
+  (define-packed-instruction PMULUDQ #xF4)
+  (define-packed-instruction POR #xEB)
+  (define-packed-instruction PSADBW #xF6)
+  (define-packed-instruction PXOR #xEF))
+
+(define-instruction PEXTR
+  ((W (R (? target)) (? source xmm-ea) (&U (? position unsigned-3bit)))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #xC5))
+   (ModR/M target source)
+   (BITS (8 position UNSIGNED))))
+
+(define-instruction PINSR
+  ((W (XMM (? target)) (? source r-ea) (&U (? position unsigned-3bit)))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #xC4))
+   (ModR/M target source)
+   (BITS (8 position UNSIGNED))))
+
+(define-instruction PMOVMSKB
+  (((R (? target)) (? source xmm-ea))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #xD7))
+   (ModR/M target source)))
+\f
+(define-instruction PMULH
+  ((W (XMM (? target)) (? source xmm/m-ea))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #xE5))
+   (ModR/M target source))
+
+  ((UW (XMM (? target)) (? source xmm/m-ea))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #xE4))
+   (ModR/M target source)))
+
+(define-instruction PMULL
+  ((W (XMM (? target)) (? source xmm/m-ea))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #xD5))
+   (ModR/M target source)))
+
+(define-instruction PSHUF
+  ((L (XMM (? target)) (? source xmm/m-ea) (&U (? wibblethwop unsigned-byte)))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 #x70))
+   (ModR/M target source)
+   (BITS (8 wibblethwop))))
+
+(define-instruction PSHUFH
+  ((W (XMM (? target)) (? source xmm/m-ea) (&U (? zob unsigned-byte)))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #xF3)
+         (8 #x0F)
+         (8 #x70))
+   (ModR/M target source)
+   (BITS (8 zob))))
+
+;;; Note: (PSHUF L ...) is very different from (PSHUFL ...)!  (The
+;;; latter must be (PSHUFL W ...) in any case.)
+
+(define-instruction PSHUFL
+  ((W (XMM (? target)) (? source xmm/m-ea) (&U (? veeblefitzer unsigned-byte)))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #xF3)
+         (8 #x0F)
+         (8 #x70))
+   (ModR/M target source)
+   (BITS (8 veeblefitzer))))
+\f
+(let-syntax ((define-shift-instruction
+              (sc-macro-transformer
+               (lambda (form environment)
+                 environment            ;ignore
+                 (let ((mnemonic (cadr form))
+                       (digit (caddr form))
+                       (dq-digit (cadddr form))
+                       (opcode (car (cddddr form)))
+                       (size/opcode-list (cddddr form)))
+                   `(define-instruction ,mnemonic
+                      ((DQ (? target xmm-ea) (&U count unsigned-byte))
+                       (PREFIX (ModR/M target))
+                       (BITS (8 #x66)
+                             (8 #x0F)
+                             (8 #x73))
+                       (ModR/M ,dq-digit target)
+                       (BITS (8 count UNSIGNED)))
+
+                      (((? size operand-size)
+                        (? target xmm-ea)
+                        (&U (? count unsigned-byte)))
+                       (PREFIX (ModR/M target))
+                       (BITS (8 #x66)
+                             (8 #x0F)
+                             (8 (case size ((Q) #x73) ((L) #x72) ((W) #x71))))
+                       (ModR/M ,digit target)
+                       (BITS (8 count UNSIGNED)))
+
+                      (((? size operand-size)
+                        (XMM (? target))
+                        (? source xmm/m-ea))
+                       (PREFIX (ModR/M target source))
+                       (BITS (8 #x66)
+                             (8 #x0F)
+                             (8 (case size
+                                  ((Q) ,(+ opcode 1))
+                                  ((L) ,(+ opcode 2))
+                                  ((W) ,(+ opcode 3)))))
+                       (ModR/M target source))))))))
+  (define-shift-instruction PSLL 6 7 #xF0)
+  (define-shift-instruction PSRL 2 3 #xD0))
+\f
+(define-instruction PSRA
+  ;++ This does not admit an operand size of Q.
+  (((? size operand-size) (? target xmm-ea) (&U (? count unsigned-byte)))
+   (PREFIX (ModR/M target))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 (case size ((L) #x72) ((W) #x71))))
+   (ModR/M 4 target)
+   (BITS (8 count UNSIGNED)))
+
+  (((? size operand-size) (XMM (? target)) (? source xmm/m-ea))
+   (PREFIX (ModR/M target source))
+   (BITS (8 #x66)
+         (8 #x0F)
+         (8 (case size ((L) #xE2) ((W) #xE1))))
+   (ModR/M target source)))
+
+(let-syntax ((define-reciprocal-instruction
+              (sc-macro-transformer
+               (lambda (form environment)
+                 (let ((mnemonic (cadr form))
+                       (opcode (caddr form)))
+                   `(define-instruction ,mnemonic
+                      (((? p/s float-packed/scalar)
+                        S               ;Single-precision only.
+                        (XMM (? target))
+                        (? source xmm/m-ea))
+                       (PREFIX (FLOAT p/s 'S) (ModR/M target source))
+                       (BITS (8 #x0F)
+                             (8 ,opcode))
+                       (ModR/M target source))))))))
+  (define-reciprocal-instruction RCPF #x53)
+  (define-reciprocal-instruction RSQRTF #x52))
+
+(define-instruction SHUF
+  ((P (? p float-precision)
+      (XMM (? target))
+      (? source xmm/m-ea)
+      (&U (? command unsigned-2bit)))
+   (PREFIX (FLOAT 'P p) (ModR/M target source))
+   (BITS (8 #x0F)
+         (8 #xC6))
+   (ModR/M target source)
+   (BITS (8 command UNSIGNED))))
+
+(define-instruction STMXCSR
+  (((? target m-ea))
+   (PREFIX (ModR/M target))
+   (BITS (8 #x0F)
+         (8 #xAE))
+   (ModR/M 3 target)))
diff --git a/src/compiler/machines/x86-64/insutl.scm b/src/compiler/machines/x86-64/insutl.scm

index 752816b210fe410c3e374d732eadb90e40fbec9f..fa3ab3b2a4fcaf267620376159dc371dca250c16 100644 (file)
--- a/src/compiler/machines/x86-64/insutl.scm
+++ b/src/compiler/machines/x86-64/insutl.scm
@@ -42,6 +42,12 @@ USA.
     (MODE #b11)
     (R/M (register-bits r)))
  
+  ((XMM (? r))
+   (CATEGORIES XMM)
+   (REX (B r))
+   (MODE #b11)
+   (R/M (register-bits r)))
+
  ;;;; Register-indirect
  
    ((@R (? r indirect-reg))
@@ -181,8 +187,11 @@ USA.
     (R/M 5)
     (BITS (32 offset SIGNED))))
  \f
-(define-ea-transformer r/m-ea)
+(define-ea-transformer r-ea REGISTER)
+(define-ea-transformer xmm-ea XMM)
  (define-ea-transformer m-ea MEMORY)
+(define-ea-transformer r/m-ea REGISTER MEMORY)
+(define-ea-transformer xmm/m-ea XMM MEMORY)
  
  (define-structure (effective-address
                    (conc-name ea/)
@@ -197,10 +206,53 @@ USA.
  (declare (integrate-operator register-rex))
  (define-integrable (register-rex register rex)
    (declare (integrate register))
-  (if (>= register r8)
+  (if (>= register 8)
        rex
        0))
  
+(define (cons-ModR/M digit ea tail)
+  (cons-syntax (ea/register ea)
+    (cons-syntax digit
+      (cons-syntax (ea/mode ea)
+       (append-syntax! (ea/extra ea) tail)))))
+
+(declare (integrate-operator opcode-register))
+(define (opcode-register opcode register)
+  (declare (integrate opcode))
+  (+ opcode (if (>= register 8) (- register 8) register)))
+
+(declare (integrate-operator float-comparator))
+(define (float-comparator comparator)
+  (case comparator
+    ((=) 0)
+    ((<) 1)
+    ((<=) 2)
+    ((UNORDERED) 3)
+    ((/=) 4)
+    ((>=) 5)
+    ((>) 6)
+    ((ORDERED) 7)
+    (else (error "Bad float comparator:" comparator))))
+
+(declare (integrate-operator operand-size))
+(define (operand-size s)
+  ;; B must be handled separately in general.
+  (case s
+    ((W L Q) s)
+    (else #f)))
+
+(declare (integrate-operator float-packed/scalar))
+(define (float-packed/scalar s)
+  (case s
+    ((S P) s)
+    (else #f)))
+
+(declare (integrate-operator float-precision))
+(define (float-precision s)
+  (case s
+    ((D S) s)
+    (else #f)))
+\f
  (define (cons-prefix operand-size register ea tail)
    (let ((tail
          (if (eq? operand-size 'W)
@@ -222,28 +274,40 @@ USA.
         (else (error "Invalid operand size:" operand-size)))
        (let ((extended-register?
              (or (eqv? register #t)
-                (and register (>= register r8)))))
+                (and register (>= register 8)))))
         (if ea
             (fix:or (if extended-register? #x44 0) (ea/rex-prefix ea))
             (if extended-register? #x41 0)))))))
  
-(define (cons-ModR/M digit ea tail)
-  (cons-syntax (ea/register ea)
-    (cons-syntax digit
-      (cons-syntax (ea/mode ea)
-       (append-syntax! (ea/extra ea) tail)))))
-
-(declare (integrate-operator opcode-register))
-(define (opcode-register opcode register)
-  (declare (integrate opcode))
-  (+ opcode (if (>= register r8) (- register r8) register)))
+;;; The SSE instructions don't consistently use this pattern for their
+;;; opcodes, but enough of them do that this approximate abstraction
+;;; helps to clarify the instruction syntax.
+
+(define (cons-float-prefix register ea packed/scalar precision tail)
+  (let* ((tail
+         (let ((float (list packed/scalar precision)))
+           (if (equal? float '(P S))
+               tail
+               (cons
+                (syntax-evaluation
+                 (cond ((equal? float '(P D)) #x66)
+                       ((equal? float '(S D)) #xF2)
+                       ((equal? float '(S S)) #xF3)
+                       (else (error "Bad float type:" float)))
+                 coerce-8-bit-unsigned)
+                tail))))
+        (rex-prefix
+         (let ((extended-register?
+                (or (eqv? register #t)
+                    (and register (>= register 8)))))
+           (if ea
+               (fix:or (if extended-register? #x44 0) (ea/rex-prefix ea))
+               (if extended-register? #x41 0)))))
+    (if (zero? rex-prefix)
+       tail
+       (cons-syntax (syntax-evaluation rex-prefix coerce-8-bit-unsigned)
+                    tail))))
  \f
-(define (operand-size s)
-  ;; B must be handled separately in general.
-  (case s
-    ((W L Q) s)
-    (else #f)))
-
  (define-integrable (register-bits r)
    (fix:and r #b111))
  
@@ -297,6 +361,18 @@ USA.
      ((8) #b11)
      (else false)))
  \f
+(declare (integrate-operator unsigned-2bit))
+(define (unsigned-2bit value)
+  (and (<= 0 value #b11) value))
+
+(declare (integrate-operator unsigned-3bit))
+(define (unsigned-3bit value)
+  (and (<= 0 value #b111) value))
+
+(declare (integrate-operator unsigned-5bit))
+(define (unsigned-5bit value)
+  (and (<= 0 value #b11111) value))
+
  (define (signed-byte value)
    (and (fits-in-signed-byte? value)
         value))
@@ -328,7 +404,7 @@ USA.
  (define (unsigned-quad value)
    (and (fits-in-unsigned-quad? value)
         value))
-
+\f
  (define (sign-extended-byte value)
    (and (fits-in-signed-byte? value)
         value))
diff --git a/src/compiler/machines/x86-64/lapgen.scm b/src/compiler/machines/x86-64/lapgen.scm

index a28795447f4246a1175b8cf79bcaef60515b1323..55f9f85c129ece9e0c4ac07636914a2a4e497695 100644 (file)
--- a/src/compiler/machines/x86-64/lapgen.scm
+++ b/src/compiler/machines/x86-64/lapgen.scm
@@ -35,48 +35,20 @@ USA.
    ;; rbp holds the pointer mask
    ;; rsi holds the register array pointer
    ;; rdi holds the free pointer
-  ;++ float
-  ;; fr7 is not used so that we can always push on the stack once.
    (list rax rcx rdx rbx r8 r9 r10 r11 r12 r13 r14 r15
-       ;++ float
-       ;; fr0 fr1 fr2 fr3 fr4 fr5 fr6
-       ;; mmx0 mmx1 mmx2 mmx3 mmx4 mmx5 mmx6 mmx7
-       ;; xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7
-       ;; xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15
-       ))
+       xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7
+       xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15))
  
  (define (sort-machine-registers registers)
    registers)
  
-;++ float
-
-#;
-(define (sort-machine-registers registers)
-  ;; FR0 is preferable to other FPU regs.  We promote it to the front
-  ;; if we find another FPU reg in front of it.
-  (let loop ((regs registers))
-    (cond ((null? regs) registers)     ; no float regs at all
-         ((general-register? (car regs)); ignore general regs
-          (loop (cdr regs)))
-         ((= (car regs) fr0)           ; found FR0 first
-          registers)
-         ((memq fr0 regs)              ; FR0 not first, is it present?
-          (cons fr0 (delq fr0 registers)) ; move to front
-          registers)
-         (else                         ; FR0 absent
-          registers))))
-
  (define (register-type register)
    (cond ((machine-register? register)
          (vector-ref
           '#(GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL
              GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL
-            ;++ float
-            ;; FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT   ;x87 fp
-            ;; FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT   ;MMX 64bit
-            ;; MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA   ;XMM 128bit
-            ;; MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA
-            )
+            FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT
+            FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT)
           register))
         ((register-value-class=word? register)
          'GENERAL)
@@ -87,19 +59,14 @@ USA.
  \f
  (define register-reference
    (let ((references (make-vector number-of-machine-registers)))
-    (do ((i rax (+ i 1)))
-       ((> i r15))
-      (vector-set! references i (INST-EA (R ,i))))
-    ;++ float
-    ;; (do ((i fr0 (+ i 1)))
-    ;;         ((>= i fr7))
-    ;;   (vector-set! references i (INST-EA (ST ,(floreg->sti i)))))
-    ;; (do ((i mmx0 (+ i 1)))
-    ;;         ((>= i mmx7))
-    ;;   (vector-set! references i (INST-EA (MMX ...))))
-    ;; (do ((i xmm0 (+ i 1)))
-    ;;         ((>= i xmm15))
-    ;;   (vector-set! references i (INST-EA (XMM ...))))
+    (do ((r rax (+ r 1))
+        (i 0 (+ i 1)))
+       ((> r r15))
+      (vector-set! references r (INST-EA (R ,i))))
+    (do ((r xmm0 (+ r 1))
+        (i 0 (+ i 1)))
+       ((> r xmm15))
+      (vector-set! references r (INST-EA (XMM ,i))))
      (lambda (register)
        (vector-ref references register))))
  
@@ -109,10 +76,9 @@ USA.
  (define (reference->register-transfer source target)
    (cond ((equal? (register-reference target) source)
          (LAP))
-       ;++ float
         ((float-register-reference? source)
          ;; Assume target is a float register
-        (LAP (FLD ,source)))
+        (LAP (MOVF S D ,(register-reference target) ,source)))
         (else
          (memory->machine-register source target))))
  
@@ -126,14 +92,9 @@ USA.
  (define (register->home-transfer source target)
    (machine->pseudo-register source target))
  
-;++ float
-
  (define-integrable (float-register-reference? ea)
-  ea
-  #f
-  #;
    (and (pair? ea)
-       (eq? (car ea) 'ST)))
+       (eq? (car ea) 'XMM)))
  \f
  ;;;; Linearizer interface
  
@@ -162,34 +123,22 @@ USA.
  \f
  ;;;; Utilities for the register allocator interface
  
-(define-integrable (machine->machine-register source target)
+(define (generate-move register source-ref target-ref)
+  (if (float-register? register)
+      (LAP (MOVF S D ,target-ref ,source-ref))
+      (LAP (MOV Q ,target-ref ,source-ref))))
+
+(define (machine->machine-register source target)
    (guarantee-registers-compatible source target)
-  ;++ float
-  (if (not (float-register? source))
-      (LAP (MOV Q ,(register-reference target) ,(register-reference source)))
-      (let ((ssti (floreg->sti source))
-           (tsti (floreg->sti target)))
-       (if (zero? ssti)
-           (LAP (FST (ST ,tsti)))
-           (LAP (FLD (ST ,ssti))
-                (FSTP (ST ,(1+ tsti))))))))
+  (generate-move source
+                (register-reference source)
+                (register-reference target)))
  
  (define (machine-register->memory source target)
-  ;++ float
-  (if (not (float-register? source))
-      (LAP (MOV Q ,target ,(register-reference source)))
-      (let ((ssti (floreg->sti source)))
-       (if (zero? ssti)
-           (LAP (FST D ,target))
-           (LAP (FLD (ST ,ssti))
-                (FSTP D ,target))))))
+  (generate-move source (register-reference source) target))
  
  (define (memory->machine-register source target)
-  ;++ float
-  (if (not (float-register? target))
-      (LAP (MOV Q ,(register-reference target) ,source))
-      (LAP (FLD D ,source)
-          (FSTP (ST ,(1+ (floreg->sti target)))))))
+  (generate-move target source (register-reference target)))
  
  (define-integrable (offset-referenceable? offset)
    (byte-offset-referenceable? (* address-units-per-object offset)))
@@ -213,7 +162,7 @@ USA.
        (error "Negative unsigned offset:" offset))
    ;; We don't have unsigned addressing modes.
    (byte-offset-reference register offset))
-\f
+
  ;;; This returns an offset in objects, not bytes.
  
  (define-integrable (pseudo-register-offset register)
@@ -226,29 +175,11 @@ USA.
  (define-integrable (machine->pseudo-register source target)
    (machine-register->memory source (pseudo-register-home target)))
  
-;++ float
-
-(define (general-register? register)
-  register
-  #t)
-
-(define (float-register? register)
-  register
-  #f)
-
-(define (floreg->sti reg)
-  (error "x87 floating-point not supported:" `(FLOREG->STI ,reg)))
-
-#|
-(define-integrable (floreg->sti reg)
-  (- reg fr0))
-
  (define-integrable (general-register? register)
-  (< register fr0))
+  (< register xmm0))
  
  (define-integrable (float-register? register)
-  (<= fr0 register fr7))
-|#
+  (>= register xmm0))
  \f
  ;;;; Utilities for the rules
  
@@ -271,6 +202,10 @@ USA.
         (flush-register! machine-reg)
         (add-pseudo-register-alias! rtl-reg machine-reg))))
  
+;;; OBJECT->MACHINE-REGISTER! takes only general registers, not float
+;;; registers.  Otherwise, (INST-EA (R ,mreg)) would need to be
+;;; (register-reference mreg).
+
  (define (object->machine-register! object mreg)
    ;; This ordering allows LOAD-CONSTANT to use MREG as a temporary.
    (let ((code (load-constant (INST-EA (R ,mreg)) object)))
@@ -391,7 +326,7 @@ USA.
      temp                               ;ignore
      (LAP ,@prefix
          ,@(receiver operand))))
-
+\f
  ;;; SIGNED-IMMEDIATE-OPERAND and UNSIGNED-IMMEDIATE-OPERAND abstract
  ;;; the pattern of performing an operation with an instruction that
  ;;; takes an immediate operand of 32 bits, but using a value that may
@@ -410,6 +345,9 @@ USA.
      (cond ((fits-in-signed-long? value)
            (values #f (LAP) operand))
           ((fits-in-signed-quad? value)
+          ;; (values #f
+          ;;         (LAP)
+          ;;         (INST-EA (@PCR ,(allocate-signed-quad-label value))))
            (let ((temp (temporary-reference)))
              (values temp (LAP (MOV Q ,temp ,operand)) temp)))
           (else
@@ -420,10 +358,39 @@ USA.
      (cond ((fits-in-unsigned-long? value)
            (values #f (LAP) operand))
           ((fits-in-unsigned-quad? value)
+          ;; (values #f
+          ;;         (LAP)
+          ;;         (INST-EA (@PCR ,(allocate-unsigned-quad-label value))))
            (let ((temp (temporary-reference)))
              (values temp (LAP (MOV Q ,temp ,operand)) temp)))
           (else
            (error "Unsigned immediate value too large:" value)))))
+
+(define (allocate-data-label datum block-name offset alignment data)
+  (let* ((block
+         (or (find-extra-code-block block-name)
+             (let ((block
+                    (declare-extra-code-block! block-name 'ANYWHERE '())))
+               (add-extra-code!
+                block
+                (LAP (PADDING ,offset ,alignment ,padding-string)))
+               block)))
+        (pairs (extra-code-block/xtra block))
+        (place (assoc datum pairs)))
+    (if place
+       (cdr place)
+       (let ((label (generate-label block-name)))
+         (set-extra-code-block/xtra!
+          block
+          (cons (cons datum label) pairs))
+         (add-extra-code! block (LAP (LABEL ,label) ,@data))
+         label))))
+
+(define (allocate-unsigned-quad-label quad)
+  (allocate-data-label quad 'QUADS 0 8 (LAP (QUAD U ,quad))))
+
+(define (allocate-signed-quad-label quad)
+  (allocate-data-label quad 'QUADS 0 8 (LAP (QUAD S ,quad))))
  \f
  (define (target-register target)
    (delete-dead-registers!)
@@ -461,6 +428,48 @@ USA.
  
  (define-integrable (allocate-indirection-register! register)
    (load-alias-register! register 'GENERAL))
+
+(define (binary-register-operation operate commutative? type move
+                                  target source1 source2)
+  (let* ((worst-case
+         (lambda (target source1 source2)
+           (LAP ,@(move target source1)
+                ,@(operate target source2))))
+        (new-target-alias!
+         (lambda ()
+           (let ((source1 (standard-register-reference source1 type #f))
+                 (source2 (standard-register-reference source2 type #f)))
+             (delete-dead-registers!)
+             (worst-case
+              (register-reference
+               (or (register-alias target type)
+                   (allocate-alias-register! target type)))
+              source1
+              source2)))))
+    (cond ((not (pseudo-register? target))
+          (if (not (eq? (register-type target) type))
+              (error "binary-register-operation: Wrong type register"
+                     target
+                     type)
+              (worst-case (register-reference target)
+                          (standard-register-reference source1 type #f)
+                          (standard-register-reference source2 type #f))))
+         ((register-copy-if-available source1 type target)
+          => (lambda (get-alias-ref)
+               (if (= source2 source1)
+                   (let ((ref (get-alias-ref)))
+                     (operate ref ref))
+                   (let ((source2
+                          (standard-register-reference source2 type #f)))
+                     (operate (get-alias-ref) source2)))))
+         ((not commutative?)
+          (new-target-alias!))
+         ((register-copy-if-available source2 type target)
+          => (lambda (get-alias-ref)
+               (let ((source1 (standard-register-reference source1 type #f)))
+                 (operate (get-alias-ref) source1))))
+         (else
+          (new-target-alias!)))))
  \f
  (define (with-indexed-address base* index* scale b-offset protect recvr)
    (let* ((base (allocate-indirection-register! base*))
diff --git a/src/compiler/machines/x86-64/machin.scm b/src/compiler/machines/x86-64/machin.scm

index c28e8c63f517529ae3e81c02de9bb4fd47a792af..f6aa1e57d38dec5e514ec47ea213fad90c321735 100644 (file)
--- a/src/compiler/machines/x86-64/machin.scm
+++ b/src/compiler/machines/x86-64/machin.scm
@@ -169,48 +169,26 @@ USA.
  (define r14 14)
  (define r15 15)
  
-;;; x87 floating-point stack locations, allocated as if registers.
-
-(define fr0 16)
-(define fr1 17)
-(define fr2 18)
-(define fr3 19)
-(define fr4 20)
-(define fr5 21)
-(define fr6 22)
-(define fr7 23)
-
-;;; 64-bit media registers (deprecated).
-
-(define mmx0 24)
-(define mmx1 25)
-(define mmx2 26)
-(define mmx3 27)
-(define mmx4 28)
-(define mmx5 29)
-(define mmx6 30)
-(define mmx7 31)
-
  ;;; 128-bit media registers.
  
-(define xmm0 32)
-(define xmm1 33)
-(define xmm2 34)
-(define xmm3 35)
-(define xmm4 36)
-(define xmm5 37)
-(define xmm6 38)
-(define xmm7 39)
-(define xmm8 40)
-(define xmm9 41)
-(define xmm10 42)
-(define xmm11 43)
-(define xmm12 44)
-(define xmm13 45)
-(define xmm14 46)
-(define xmm15 47)
+(define xmm0 16)
+(define xmm1 17)
+(define xmm2 18)
+(define xmm3 19)
+(define xmm4 20)
+(define xmm5 21)
+(define xmm6 22)
+(define xmm7 23)
+(define xmm8 24)
+(define xmm9 25)
+(define xmm10 26)
+(define xmm11 27)
+(define xmm12 28)
+(define xmm13 29)
+(define xmm14 30)
+(define xmm15 31)
  \f
-(define number-of-machine-registers 16)
+(define number-of-machine-registers 32)
  (define number-of-temporary-registers 256)
  
  (define-integrable regnum:stack-pointer rsp)
@@ -233,12 +211,8 @@ USA.
          value-class=address)
         ((<= r8 register r15)
          value-class=object)
-       ((<= fr0 register fr7)
-        value-class=float)
-       ((<= mmx0 register mmx7)
-        (error "MMX media registers not allocated:" register))
         ((<= xmm0 register xmm15)
-        (error "XMM media registers not allocated:" register))
+        value-class=float)
         (else
          (error "Invalid machine register:" register))))
  
@@ -392,64 +366,54 @@ USA.
        (error "Unknown register type" locative)))
  \f
  (define (rtl:constant-cost expression)
-  ;; i486 clock count for instruction to construct/fetch into register.
-  (let ((if-integer
-        (lambda (value)
-          value                        ; ignored
-          ;; Can this be done in fewer bytes for suitably small values?
-          1))                          ; MOV immediate
-       (get-pc-cost
-        (+ 3                           ; CALL
-           4))                         ; POP
-       (based-reference-cost
-        1)                             ; MOV r/m
-       (address-offset-cost
-        1))                            ; LEA instruction
-
-    (define (if-synthesized-constant type datum)
-      (if-integer (make-non-pointer-literal type datum)))
-
+  ;; Counts derived from the AMD64 Software Optimization Guide, Rev
+  ;; 3.06, from September 2005.  Scaled by two because LEA costs 1/2!
+  ;; This is pretty silly, but probably better than using i486 clock
+  ;; counts.
+  (let ((cost:lea 1)
+       (cost:mov-mem 6)
+       (cost:mov-imm 2)
+       (cost:or 2))
      (case (rtl:expression-type expression)
        ((CONSTANT)
         (let ((value (rtl:constant-value expression)))
          (if (non-pointer-object? value)
-            (if-synthesized-constant (object-type value)
-                                     (careful-object-datum value))
-            (+ get-pc-cost based-reference-cost))))
+            cost:mov-imm
+            cost:mov-mem)))
        ((MACHINE-CONSTANT)
-       (if-integer (rtl:machine-constant-value expression)))
-      ((ENTRY:PROCEDURE
-       ENTRY:CONTINUATION)
-       (+ get-pc-cost address-offset-cost))
-      ((ASSIGNMENT-CACHE
-       VARIABLE-CACHE)
-       (+ get-pc-cost based-reference-cost))
-      ((OFFSET-ADDRESS
-       BYTE-OFFSET-ADDRESS
-       FLOAT-OFFSET-ADDRESS)
-       address-offset-cost)
+       cost:mov-imm)
+      ((ENTRY:PROCEDURE ENTRY:CONTINUATION)
+       (+ cost:mov-imm cost:lea cost:or))
+      ((OFFSET-ADDRESS BYTE-OFFSET-ADDRESS FLOAT-OFFSET-ADDRESS)
+       (receive (offset-selector scale)
+          (case (rtl:expression-type expression)
+            ((OFFSET-ADDRESS)
+             (values rtl:offset-address-offset address-units-per-object))
+            ((BYTE-OFFSET-ADDRESS)
+             (values rtl:byte-offset-address-offset 1))
+            ((FLOAT-OFFSET-ADDRESS)
+             (values rtl:float-offset-address-offset
+                     address-units-per-float)))
+        (let ((offset (offset-selector expression)))
+          (if (and (rtl:machine-constant? offset)
+                   (not
+                    (fits-in-signed-long?
+                     (* scale (rtl:machine-constant-value offset)))))
+              (+ cost:mov-imm cost:lea)
+              cost:lea))))
        ((CONS-POINTER)
         (and (rtl:machine-constant? (rtl:cons-pointer-type expression))
             (rtl:machine-constant? (rtl:cons-pointer-datum expression))
-           (if-synthesized-constant
-            (rtl:machine-constant-value (rtl:cons-pointer-type expression))
-            (rtl:machine-constant-value
-             (rtl:cons-pointer-datum expression)))))
-      (else
-       false))))
+           cost:mov-imm))
+      (else #f))))
  
  (define compiler:open-code-floating-point-arithmetic?
-  false)
+  #t)
  
  (define compiler:primitives-with-no-open-coding
    '(DIVIDE-FIXNUM
      &/
-    FLOATING-VECTOR-CONS FLOATING-VECTOR-LENGTH
-    FLOATING-VECTOR-REF FLOATING-VECTOR-SET! FLONUM-ABS
-    FLONUM-ACOS FLONUM-ADD FLONUM-ASIN FLONUM-ATAN FLONUM-ATAN2
-    FLONUM-CEILING FLONUM-COS FLONUM-DIVIDE FLONUM-EQUAL?
-    FLONUM-EXP FLONUM-FLOOR FLONUM-GREATER? FLONUM-LESS?
-    FLONUM-LOG FLONUM-MULTIPLY FLONUM-NEGATE FLONUM-NEGATIVE?
-    FLONUM-POSITIVE? FLONUM-ROUND FLONUM-SIN FLONUM-SQRT
-    FLONUM-SUBTRACT FLONUM-TAN FLONUM-TRUNCATE FLONUM-ZERO?
-    FLONUM? GCD-FIXNUM STRING-ALLOCATE VECTOR-CONS))
-\ No newline at end of file
+    FLOATING-VECTOR-CONS FLONUM-ACOS FLONUM-ASIN FLONUM-ATAN
+    FLONUM-ATAN2 FLONUM-CEILING FLONUM-COS FLONUM-EXP FLONUM-FLOOR
+    FLONUM-LOG FLONUM-ROUND FLONUM-SIN FLONUM-TAN FLONUM-TRUNCATE
+    GCD-FIXNUM STRING-ALLOCATE VECTOR-CONS))
+\ No newline at end of file
diff --git a/src/compiler/machines/x86-64/rules1.scm b/src/compiler/machines/x86-64/rules1.scm

index d65fa052eab976a0136595a5de531131e5ff84cc..d3b265155dc0e4b069e02feb0ad9320b6d7a38ff 100644 (file)
--- a/src/compiler/machines/x86-64/rules1.scm
+++ b/src/compiler/machines/x86-64/rules1.scm
@@ -372,6 +372,7 @@ USA.
                                ,(if signed?
                                     (INST-EA (& ,n))
                                     (INST-EA (&U ,n))))
+                         ;++ Check that SCALE is a valid SIB scale.
                           (LEA Q ,target (@RI ,source ,temp ,scale)))))))))))
  
  (define-integrable (load-displaced-register target source n scale)
@@ -394,28 +395,64 @@ USA.
  
  (define (load-pc-relative-address/typed target type label)
    ;++ This is pretty horrid, especially since it happens for every
-  ;++ continuation pushed!  Neither alternative is much good.
-  ;; Twenty bytes.
+  ;++ continuation pushed!  None of the alternatives is much good.
+  ;; Twenty bytes, but only three instructions and no extra memory.
    (let ((temp (temporary-register-reference)))
      (LAP (MOV Q ,temp (&U ,(make-non-pointer-literal type 0)))
          (LEA Q ,target (@PCR ,label))
          (OR Q ,target ,temp)))
    #|
-  ;; Nineteen bytes.
+  ;; Nineteen bytes, but rather complicated (and needs syntax for an
+  ;; addressing mode not presently supported).
+  (cond ((zero? type)
+        (LAP (LEA Q ,target (@PCR ,label))))
+       ((zero? (remainder type 8))
+        (receive (type-divisor scale scale-log)
+            (cond ((not (zero? (remainder type #x10))) (values 8 8 3))
+                  ((not (zero? (remainder type #x20))) (values #x10 4 2))
+                  ((not (zero? (remainder type #x40))) (values #x20 2 1))
+                  (else (error "Type too large:" type)))
+          (let ((offset (quotient type type-divisor)))
+            (LAP (LEA Q ,target (@PCR ,label))
+                 (LEA Q ,target (@OI ,offset ,target ,scale))
+                 (ROR Q ,target (&U ,scale-log))))))
+       (else ...))
+  |#
+  #|
+  ;; This would be brilliant, except that it needs (PC * 2^6)-relative
+  ;; addressing, rather than PC-relative addressing.
+  (let* ((reference-point (generate-label 'PC))
+        (offset
+         `(+ ,type
+             (* ,(expt 2 scheme-type-width) (- ,label ,reference-point)))))
+    (LAP (LEA Q ,target (@PCO ,offset))
+        (LABEL ,reference-point)
+        (ROR Q ,target (&U ,scheme-type-width))))
+  |#
+  #|
+  ;; Nineteen bytes and no temporaries, but four instructions.
    (LAP (LEA Q ,target (@PCR ,label))
         (SHL Q ,target (&U ,scheme-type-width))
         (OR Q ,target (&U ,type))
         (ROR Q ,target (&U ,scheme-type-width)))
    |#
-  ;++ This doesn't work because CONSTANT->LABEL will give us a label
-  ;++ for the Scheme number object, not for the machine bit string.
    #|
-  ;; Seventeen bytes -- but we need the label to work.
+  ;; Seventeen bytes, but this requires reading eight bytes of memory.
    (let ((temp (temporary-register-reference))
         (literal (make-non-pointer-literal type 0)))
-    (LAP (MOV Q ,temp (@PCR ,(constant->label literal)))
+    (LAP (MOV Q ,temp (@PCR ,(allocate-unsigned-quad-label literal)))
          (LEA Q ,target (@PCR ,label))
          (OR Q ,target ,temp)))
+  |#
+  #|
+  ;; Fourteen bytes and no temporaries, but this requires reading
+  ;; eight bytes of memory.
+  (let* ((reference-point (generate-label 'REFERENCE-POINT))
+        (expression
+         `(+ ,(make-non-pointer-literal type 0) (- ,label ,reference-point))))
+    (LAP (LABEL ,reference-point)
+        (LEA Q ,target (@PCR ,reference-point))
+        (ADD Q ,target (@PCR ,(allocate-unsigned-quad-label expression)))))
    |#)
  
  (define (load-char-into-register type source target)
@@ -464,17 +501,18 @@ USA.
                                    (rtl:offset-address-offset base))))
         expression))
  
-(define (with-decoded-detagged-offset expression recvr)
+(define (with-decoded-detagged-offset expression receiver)
    (let ((base (rtl:offset-base expression)))
      (let ((base* (rtl:offset-address-base base))
           (index (rtl:offset-address-offset base)))
-      (recvr (rtl:register-number (if (rtl:register? base*)
-                                     base*
-                                     (rtl:object->address-expression base*)))
-            (rtl:register-number (if (rtl:register? index)
-                                     index
-                                     (rtl:object->datum-expression index)))
-            (rtl:machine-constant-value (rtl:offset-offset expression))))))
+      (receiver
+       (rtl:register-number (if (rtl:register? base*)
+                               base*
+                               (rtl:object->address-expression base*)))
+       (rtl:register-number (if (rtl:register? index)
+                               index
+                               (rtl:object->datum-expression index)))
+       (rtl:machine-constant-value (rtl:offset-offset expression))))))
  \f
  ;;;; Improved string references
  
diff --git a/src/compiler/machines/x86-64/rulfix.scm b/src/compiler/machines/x86-64/rulfix.scm

index 9ca807e4297add2ad17350c47829774f96e7d672..863d17d7bedbb8b62b8e8117da84a1a394666d5b 100644 (file)
--- a/src/compiler/machines/x86-64/rulfix.scm
+++ b/src/compiler/machines/x86-64/rulfix.scm
@@ -316,54 +316,14 @@ USA.
                    FIXNUM-AND
                    FIXNUM-OR
                    FIXNUM-XOR)))
-\f           
+            
  (define ((fixnum-2-args/standard commutative? operate) target source1
                                                        source2 overflow?)
    overflow?                            ; ignored
-  (two-arg-register-operation operate
-                             commutative?
-                             target
-                             source1
-                             source2))
-
-(define (two-arg-register-operation operate commutative?
-                                   target source1 source2)
-  (let* ((worst-case
-         (lambda (target source1 source2)
-           (LAP (MOV Q ,target ,source1)
-                ,@(operate target source2))))
-        (new-target-alias!
-         (lambda ()
-           (let ((source1 (any-reference source1))
-                 (source2 (any-reference source2)))
-             (delete-dead-registers!)
-             (worst-case (target-register-reference target)
-                         source1
-                         source2)))))
-    (cond ((not (pseudo-register? target))
-          (if (not (eq? (register-type target) 'GENERAL))
-              (error "two-arg-register-operation: Wrong type register"
-                     target 'GENERAL)
-              (worst-case (register-reference target)
-                          (any-reference source1)
-                          (any-reference source2))))
-         ((register-copy-if-available source1 'GENERAL target)
-          =>
-          (lambda (get-alias-ref)
-            (if (= source2 source1)
-                (let ((ref (get-alias-ref)))
-                  (operate ref ref))
-                (let ((source2 (any-reference source2)))
-                  (operate (get-alias-ref) source2)))))
-         ((not commutative?)
-          (new-target-alias!))
-         ((register-copy-if-available source2 'GENERAL target)
-          =>
-          (lambda (get-alias-ref)
-            (let ((source1 (any-reference source1)))
-              (operate (get-alias-ref) source1))))
-         (else
-          (new-target-alias!)))))
+  (binary-register-operation operate commutative? 'GENERAL
+                            (lambda (target source)
+                              (LAP (MOV Q ,target ,source)))
+                            target source1 source2))
  
  (define (fixnum-2-args/register*constant operator target
                                          source constant overflow?)
@@ -508,11 +468,10 @@ USA.
      (lambda (target source1 source2 overflow?)
        overflow?                                ; ignored
        (require-register! rcx)
-      (two-arg-register-operation operate
-                                 #f
-                                 target
-                                 source1
-                                 source2))))
+      (binary-register-operation operate #f 'GENERAL
+                                (lambda (target source)
+                                  (LAP (MOV Q ,target ,source)))
+                                target source1 source2))))
  \f
  (define (do-division target source1 source2 result-reg)
    (prefix-instructions! (load-machine-register! source1 rax))
diff --git a/src/compiler/machines/x86-64/rulflo.scm b/src/compiler/machines/x86-64/rulflo.scm

index 88562401484364f9e6574a967048c79d8e69953d..e5ca59fb85fe8a6bf27d0f8f0773ad12ede4e969 100644 (file)
--- a/src/compiler/machines/x86-64/rulflo.scm
+++ b/src/compiler/machines/x86-64/rulflo.scm
@@ -27,805 +27,369 @@ USA.
  ;;; package: (compiler lap-syntaxer)
  
  (declare (usual-integrations))
-
-#|
  \f
-;; ****
-;; Missing: 2 argument operations and predicates with non-trivial
-;; constant arguments.
-;; Also missing with (OBJECT->FLOAT (REGISTER ...)) operands.
-;; ****
+(define (flonum-source! source)
+  (or (register-alias source 'FLOAT)
+      (load-alias-register! source 'FLOAT)))
  
-(define (flonum-source! register)
-  (floreg->sti (load-alias-register! register 'FLOAT)))
+(define-integrable (flonum-source-reference! source)
+  (register-reference (flonum-source! source)))
  
-(define (flonum-target! pseudo-register)
+(define (flonum-target! target)
    (delete-dead-registers!)
-  (floreg->sti (allocate-alias-register! pseudo-register 'FLOAT)))
+  (or (register-alias target 'FLOAT)
+      (allocate-alias-register! target 'FLOAT)))
+
+(define-integrable (flonum-target-reference! target)
+  (register-reference (flonum-target! target)))
+
+;;; FLONUM-DATA-OFFSET is the number of bytes after the location
+;;; addressed by a FLONUM-tagged pointer before the actual flonum data
+;;; begin.
  
-(define (flonum-temporary!)
-  (allocate-temporary-register! 'FLOAT))
+(define-integrable flonum-data-offset address-units-per-object)
  
  (define-rule statement
-  ;; convert a floating-point number to a flonum object
-  (ASSIGN (REGISTER (? target))
-         (FLOAT->OBJECT (REGISTER (? source))))
-  (let* ((source (register-alias source 'FLOAT))
-        (target (target-register-reference target)))
-    (LAP (MOV W (@R ,regnum:free-pointer)
-             (&U ,(make-non-pointer-literal
-                   (ucode-type manifest-nm-vector)
-                   2)))
-        ,@(if (not source)
-              ;; Value is in memory home
-              (let ((off (pseudo-register-offset source))
-                    (temp (temporary-register-reference)))
-                (LAP (MOV W ,target
-                          ,(offset-reference regnum:regs-pointer off))
-                     (MOV W ,temp
-                          ,(offset-reference regnum:regs-pointer (1+ off)))
-                     (MOV W (@RO B ,regnum:free-pointer 4) ,target)
-                     (MOV W (@RO B ,regnum:free-pointer 8) ,temp)))
-              (store-float (floreg->sti source)
-                           (INST-EA (@RO B ,regnum:free-pointer 4))))
-        (LEA ,target
-             (@RO UW ,regnum:free-pointer
-                  ,(make-non-pointer-literal (ucode-type flonum) 0)))
-        (ADD W (R ,regnum:free-pointer) (& 12)))))
-
-#|
+  (ASSIGN (REGISTER (? target)) (FLOAT->OBJECT (REGISTER (? source))))
+  (let* ((source (flonum-source-reference! source))
+         (target (target-register-reference target)))
+    (LAP ,@(with-unsigned-immediate-operand
+               (make-non-pointer-literal (ucode-type MANIFEST-NM-VECTOR) 1)
+             (lambda (operand)
+               (LAP (MOV Q (@R ,regnum:free-pointer) ,operand))))
+         (MOVF S D (@RO ,regnum:free-pointer ,flonum-data-offset) ,source)
+         (MOV Q ,target (&U ,(make-non-pointer-literal (ucode-type FLONUM) 0)))
+         (OR Q ,target (R ,regnum:free-pointer))
+         (LEA Q (R ,regnum:free-pointer)
+              (@RO ,regnum:free-pointer
+                   ,(+ flonum-data-offset address-units-per-float))))))
+
  (define-rule statement
-  ;; convert a flonum object to a floating-point number
    (ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (REGISTER (? source))))
    (let* ((source (move-to-temporary-register! source 'GENERAL))
-        (target (flonum-target! target)))
+         (target (flonum-target-reference! target)))
      (LAP ,@(object->address (register-reference source))
-        ,@(load-float (INST-EA (@RO B ,source 4)) target))))
-|#
-
-(define-rule statement
-  ;; Convert a flonum object to a floating-point number.  Unlike the
-  ;; version above which has an implicits OBJECT->ADDRESS, this one
-  ;; uses the addressing mode to remove the type-code.  Saves a cycle
-  ;; and maybe a register spill if SOURCE is live after instruction.
-  (ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (REGISTER (? source))))
-  (let* ((source (source-register source))
-        (target (flonum-target! target)))
-    (object->float source target)))
-
-(define (object->float source-register target)
-  (let ((untagging+offset
-        (- 4 (make-non-pointer-literal (ucode-type flonum) 0))))
-    (load-float (INST-EA (@RO W ,source-register ,untagging+offset)) target)))
+         (MOVF S D ,target (@RO ,source ,flonum-data-offset)))))
  \f
-;;;; Floating-point vector support.
-
  (define-rule statement
    (ASSIGN (REGISTER (? target)) (? expression rtl:simple-float-offset?))
    (let* ((source (float-offset->reference! expression))
-        (target (flonum-target! target)))
-    (load-float source target)))
+         (target (flonum-target-reference! target)))
+    (LAP (MOVF S D ,target ,source))))
  
  (define-rule statement
    (ASSIGN (? expression rtl:simple-float-offset?) (REGISTER (? source)))
-  (let ((source (flonum-source! source))
-       (target (float-offset->reference! expression)))
-    (store-float source target)))
+  (let ((source (flonum-source-reference! source))
+        (target (float-offset->reference! expression)))
+    (LAP (MOVF S D ,target ,source))))
  
  (define-rule statement
-  (ASSIGN (REGISTER (? target))
-         (? expression rtl:detagged-float-offset?))
+  (ASSIGN (REGISTER (? target)) (? expression rtl:detagged-float-offset?))
    (with-detagged-float-location expression
-    (lambda (temp)
-      (load-float temp target))))
+    (lambda (source)
+      (LAP (MOVF S D ,(flonum-target-reference! target) ,source)))))
  
  (define-rule statement
-  (ASSIGN (? expression rtl:detagged-float-offset?)
-         (REGISTER (? source)))
+  (ASSIGN (? expression rtl:detagged-float-offset?) (REGISTER (? source)))
    (with-detagged-float-location expression
-    (lambda (temp)
-      (store-float (flonum-source! source) temp))))
+    (lambda (target)
+      (LAP (MOVF S D ,target ,(flonum-source-reference! source))))))
  
-(define (with-detagged-float-location rtl-expression recvr)
-  ;; Never needs to protect a register because it is a float register!
+(define (with-detagged-float-location rtl-expression receiver)
    (with-decoded-detagged-float-offset rtl-expression
-    (lambda (base index w-offset)
-      (with-indexed-address base index 8 (* 4 w-offset) false recvr))))
+    (lambda (base float-index object-offset)
+      (with-indexed-address base float-index address-units-per-float
+          (* address-units-per-object object-offset)
+          ;; No general registers to protect -- the target and source
+          ;; will always be float registers.
+          #f
+        receiver))))
+
+;;; These are nearly identical copies of RTL:DETAGGED-OFFSET? and
+;;; WITH-DECODED-DETAGGED-OFFSET, with FLOAT-OFFSET substituted for
+;;; OFFSET.  It is unfortunate that the RTL doesn't have a clearer
+;;; abstraction of offsets and addresses with arbitrary data.
  
  (define (rtl:detagged-float-offset? expression)
    (and (rtl:float-offset? expression)
-       (let ((base (rtl:float-offset-base expression))
-            (offset (rtl:float-offset-offset expression)))
-        (and (rtl:offset-address? base)
-             (rtl:machine-constant? (rtl:offset-address-offset base))
-             (rtl:detagged-index? (rtl:offset-address-base base)
-                                  offset)))
+       (rtl:machine-constant? (rtl:float-offset-offset expression))
+       (let ((base (rtl:float-offset-base expression)))
+         (and (rtl:float-offset-address? base)
+              (rtl:detagged-index? (rtl:float-offset-address-base base)
+                                   (rtl:float-offset-address-offset base))))
         expression))
  
-(define (with-decoded-detagged-float-offset expression recvr)
-  (let ((base (rtl:float-offset-base expression))
-       (index (rtl:float-offset-offset expression)))
-    (let ((base* (rtl:offset-address-base base)))
-      (recvr (rtl:register-number (if (rtl:register? base*)
-                                     base*
-                                     (rtl:object->address-expression base*)))
-            (rtl:register-number (if (rtl:register? index)
-                                     index
-                                     (rtl:object->datum-expression index)))
-            (rtl:machine-constant-value (rtl:offset-address-offset base))))))
-
-(define (load-float ea sti)
-  (LAP (FLD D ,ea)
-       (FSTP (ST ,(1+ sti)))))
-
-(define (store-float sti ea)
-  (if (zero? sti)
-      (LAP (FST D ,ea))
-      (LAP (FLD (ST ,sti))
-          (FSTP D ,ea))))
+(define (with-decoded-detagged-float-offset expression receiver)
+  (let ((base (rtl:float-offset-base expression)))
+    (let ((base* (rtl:float-offset-address-base base))
+          (index (rtl:float-offset-address-offset base)))
+      (receiver
+       (rtl:register-number (if (rtl:register? base*)
+                                base*
+                                (rtl:object->address-expression base*)))
+       (rtl:register-number (if (rtl:register? index)
+                                index
+                                (rtl:object->datum-expression index)))
+       (rtl:machine-constant-value (rtl:float-offset-offset expression))))))
  \f
  ;;;; Flonum Arithmetic
  
  (define-rule statement
    (ASSIGN (REGISTER (? target))
-         (FLONUM-1-ARG (? operation) (REGISTER (? source)) (? overflow?)))
-  overflow?                            ;ignore
-  ((flonum-1-arg/operator operation) target source))
-
-(define ((flonum-unary-operation/general operate) target source)
-  (define (default)
-    (let* ((source (flonum-source! source))
-          (target (flonum-target! target)))
-      (operate target source)))
-  ;; Attempt to reuse source for target if it is in ST(0).
-  ;; Otherwise we will target ST(0) by sorting the machine registers.
-  (cond ((and (pseudo-register? target) (pseudo-register? source)
-             (eqv? fr0 (pseudo-register-alias *register-map* 'FLOAT source)))
-        (reuse-pseudo-register-alias
-         source 'FLOAT
-         (lambda (alias)
-           (let* ((sti (floreg->sti alias)))
-             (delete-register! alias)
-             (delete-dead-registers!)
-             (add-pseudo-register-alias! target alias)
-             (operate sti sti)))
-         default))
-       (else (default))))
-
-'(define ((flonum-unary-operation/general operate) target source)
-  (define (default)
-    (let* ((source (flonum-source! source))
-          (target (flonum-target! target)))
-      (operate target source)))
-  ;; Attempt to reuse source for target.  This works well when the
-  ;; source is ST(0).  We try to arrange this by sorting the registers
-  ;; to give allocation preference to ST(0).
-  (cond ((pseudo-register? target)
-        (reuse-pseudo-register-alias
-         source 'FLOAT
-         (lambda (alias)
-           (let* ((sti (floreg->sti alias)))
-             (delete-register! alias)
-             (delete-dead-registers!)
-             (add-pseudo-register-alias! target alias)
-             (operate sti sti)))
-         default))
-       (else (default))))
-
-'(define ((flonum-unary-operation/general operate) target source)
-  (define (default)
-    (let* ((source (flonum-source! source))
-          (target (flonum-target! target)))
-      (operate target source)))
-  ;; Attempt to reuse source for target.  This works well when the
-  ;; source is ST(0).  We try to arrange this by sorting the registers
-  ;; to give allocation preference to ST(0).
-  (cond ((pseudo-register? target)
-        (let ((alias
-               (and (dead-register? source)
-                    (pseudo-register-alias *register-map* 'FLOAT source))))
-          (if alias
-              (default)))
-       
-       (reuse-pseudo-register-alias
-         source 'FLOAT
-         (lambda (alias)
-           (let* ((sti (floreg->sti alias)))
-               (delete-register! alias)
-               (delete-dead-registers!)
-               (add-pseudo-register-alias! target alias)
-               (operate sti sti)))
-         default))
-       (else (default))))
-
-(define (flonum-1-arg/operator operation)
+          (FLONUM-1-ARG (? operator) (REGISTER (? source)) (? overflow?)))
+  overflow?                             ;ignore
+  (let* ((source (flonum-source-reference! source))
+         (target (flonum-target-reference! target)))
+    ((flonum-1-arg/operator operator) target source)))
+
+(define-integrable (flonum-1-arg/operator operation)
    (lookup-arithmetic-method operation flonum-methods/1-arg))
  
  (define flonum-methods/1-arg
    (list 'FLONUM-METHODS/1-ARG))
-\f
-;;; Notice the weird ,', syntax here.
-;;; If LAP changes, this may also have to change.
  
-(let-syntax
-    ((define-flonum-operation
-       (sc-macro-transformer
-       (lambda (form environment)
-         environment
-         (let ((primitive-name (cadr form))
-               (opcode (caddr form)))
-           `(define-arithmetic-method ',primitive-name flonum-methods/1-arg
-              (flonum-unary-operation/general
-               (lambda (target source)
-                 (if (and (zero? target) (zero? source))
-                     (LAP (,opcode))
-                     (LAP (FLD (ST ,', source))
-                          (,opcode)
-                          (FSTP (ST ,',(1+ target)))))))))))))
-  (define-flonum-operation FLONUM-NEGATE FCHS)
-  (define-flonum-operation FLONUM-ABS FABS)
-  ;; Disabled: FSIN and FCOS limited to pi * 2^62.
-  ;;(define-flonum-operation FLONUM-SIN FSIN)
-  ;;(define-flonum-operation FLONUM-COS FCOS)
-  (define-flonum-operation FLONUM-SQRT FSQRT)
-  (define-flonum-operation FLONUM-ROUND FRNDINT))
-
-;; These (and FLONUM-ROUND above) presume that the default rounding mode
-;; is round-to-nearest/even
-
-(define (define-rounding prim-name mode)
-  (define-arithmetic-method prim-name flonum-methods/1-arg
-    (flonum-unary-operation/general
-     (lambda (target source)
-       (let ((temp (temporary-register-reference)))
-        (LAP (FSTCW (@R ,regnum:free-pointer))
-             ,@(if (and (zero? target) (zero? source))
-                   (LAP)
-                   (LAP (FLD (ST ,source))))
-             (MOV B ,temp (@RO B ,regnum:free-pointer 1))
-             (OR B (@RO B ,regnum:free-pointer 1) (&U ,mode))
-             (FNLDCW (@R ,regnum:free-pointer))
-             (FRNDINT)
-             (MOV B (@RO B ,regnum:free-pointer 1) ,temp)
-             ,@(if (and (zero? target) (zero? source))
-                   (LAP)
-                   (LAP (FSTP (ST ,(1+ target)))))
-             (FNLDCW (@R ,regnum:free-pointer))))))))
-
-(define-rounding 'FLONUM-CEILING #x08)
-(define-rounding 'FLONUM-FLOOR #x04)
-(define-rounding 'FLONUM-TRUNCATE #x0c)
-\f
-;; This is used in order to avoid using two stack locations for
-;; the remainder unary operations.
-
-(define ((flonum-unary-operation/stack-top operate) target source)
-  (define (finish source->top)
-    ;; Perhaps this can be improved?
-    (rtl-target:=machine-register! target fr0)
-    (LAP ,@source->top
-        ,@(operate)))
-
-  (if (or (machine-register? source)
-         (not (is-alias-for-register? fr0 source))
-         (not (dead-register? source)))
-      (finish (load-machine-register! source fr0))
-      (begin
-       (delete-dead-registers!)
-       (finish (LAP)))))
-
-(define-arithmetic-method 'FLONUM-LOG flonum-methods/1-arg
-  (flonum-unary-operation/stack-top
-   (lambda ()
-     (LAP (FLDLN2)
-         (FXCH (ST 0) (ST 1))
-         (FYL2X)))))
-
-(define-arithmetic-method 'FLONUM-EXP flonum-methods/1-arg
-  (flonum-unary-operation/stack-top
-   (lambda ()
-     ;; Hair to avoid arithmetic for non-finite inputs: exp(-inf) = 0,
-     ;; but exp(x) = x for any other non-finite x.  We use the first
-     ;; free slot (1) to pick apart the double format to check for
-     ;; non-finite inputs, and (2) to avoid using two stack slots.
-     (let ((temp (temporary-register-reference))
-          (infinity-or-nan (generate-label 'INFINITY-OR-NAN))
-          (join (generate-label 'JOIN))
-          (temp-pointer regnum:free-pointer))
-       (LAP (FST D (@R ,temp-pointer))
-           (MOV W ,temp (@RO W ,temp-pointer 4))
-           (AND W ,temp (&U #x7FFFFFFF))
-           (CMP W ,temp (&U #x7FF00000))
-           (JAE B (@PCR ,infinity-or-nan))
-           ;; Compute 2^(x log_2 e) with F2XM1 and FSCALE.
-           (FLDL2E)                    ;st0 = lg e, st1 = x
-           (FMULP (ST 1) (ST 0))       ;st0 = x lg e
-           (FLD (ST 0))                ;st0 = x lg e, st1 = x lg e
-           (FRNDINT)                   ;st0 = I(x lg e), st1 = x lg e
-           (FSUB (ST 1) (ST 0))        ;st0 = I(x lg e), st1 = F(x lg e)
-           (FSTP D (@R ,temp-pointer)) ;st0 = F(x lg e), save I(x lg e)
-           (F2XM1)                     ;st0 = 2^F(x lg e) - 1
-           (FLD1)                      ;st0 = 1, st1 = 2^F(x lg e) - 1
-           (FADD)                      ;st0 = 2^F(x lg e)
-           (FLD D (@R ,temp-pointer))  ;st0 = I(x lg e), st1 = 2^F(x lg e)
-           (FXCH (ST 0) (ST 1))        ;st0 = 2^F(x lg e), st1 = I(x lg e)
-           (FSCALE)                    ;st0 = 2^F(x lg e) * 2^I(x lg e),
-                                       ;st1 = I(x lg e)
-           (FSTP (ST 1))               ;Drop st1, leaving in st0 the value
-           (JMP B (@PCR ,join))        ; 2^(F(x lg e) + I(x lg e)) = e^x.
-         (LABEL ,infinity-or-nan)
-           (CMP W (@RO W ,temp-pointer 4) (&U #xFFF00000))
-           (JNE B (@PCR ,join))
-           (CMP W (@RO W ,temp-pointer 0) (& 0))
-           (JNE B (@PCR ,join))
-           (FSTP (ST 0))               ;Pop argument.
-           (FLDZ)                      ;Return zero.
-         (LABEL ,join))))))
+(define ((flonum-unary-operation/target-bits bit-string operate) target source)
+  (LAP (MOVF S D ,target (@PCR ,(allocate-double-float-bits-label bit-string)))
+       ,@(operate target source)))
+
+(define double-flobits:negative-zero
+  (let ((bit-string (make-bit-string 64 #f)))
+    (bit-string-set! bit-string 63)
+    bit-string))
+
+(define-arithmetic-method 'FLONUM-ABS flonum-methods/1-arg
+  (flonum-unary-operation/target-bits
+   (bit-string-not double-flobits:negative-zero)
+   (lambda (target source)
+     ;; No scalar version, but doing this packed is harmless.
+     (LAP (ANDF P D ,target ,source)))))
+
+(define-arithmetic-method 'FLONUM-NEGATE flonum-methods/1-arg
+  (flonum-unary-operation/target-bits
+   double-flobits:negative-zero
+   (lambda (target source)
+     ;; No scalar version, but doing this packed is harmless.
+     (LAP (XORF P D ,target ,source)))))
+
+(define-arithmetic-method 'FLONUM-SQRT flonum-methods/1-arg
+  (lambda (target source)
+    (LAP (SQRTF S D ,target ,source))))
  \f
-#|
-;; Disabled: FPTAN limited to pi * 2^62.
-(define-arithmetic-method 'FLONUM-TAN flonum-methods/1-arg
-  (flonum-unary-operation/stack-top
-   (lambda ()
-     (LAP (FPTAN)
-         (FSTP (ST 0))                 ; FPOP
-         ))))
-|#
+(define-rule statement
+  (ASSIGN (REGISTER (? target))
+          (FLONUM-2-ARGS (? operator)
+                         (REGISTER (? source1))
+                         (REGISTER (? source2))
+                         (? overflow?)))
+  overflow?                             ;ignore
+  ((flonum-2-args/operator operator) target source1 source2))
  
-(define-arithmetic-method 'FLONUM-ATAN flonum-methods/1-arg
-  (flonum-unary-operation/stack-top
-   (lambda ()
-     (LAP (FLD1)
-         (FPATAN)))))
-
-;; For now, these preserve values in memory
-;; in order to avoid flushing a stack location.
-
-(define-arithmetic-method 'FLONUM-ACOS flonum-methods/1-arg
-  (flonum-unary-operation/stack-top
-   (lambda ()
-     (LAP (FST D (@R ,regnum:free-pointer))
-         (FMUL (ST 0) (ST 0))
-         (FLD1)
-         (F%SUBP (ST 1) (ST 0))
-         (FSQRT)
-         (FLD D (@R ,regnum:free-pointer))
-         (FPATAN)))))
-
-(define-arithmetic-method 'FLONUM-ASIN flonum-methods/1-arg
-  (flonum-unary-operation/stack-top
-   (lambda ()
-     (LAP (FST D (@R ,regnum:free-pointer))
-         (FMUL (ST 0) (ST 0))
-         (FLD1)
-         (F%SUBP (ST 1) (ST 0))
-         (FSQRT)
-         (FLD D (@R ,regnum:free-pointer))
-         (FXCH (ST 0) (ST 1))
-         (FPATAN)))))
-\f
  (define-rule statement
    (ASSIGN (REGISTER (? target))
-         (FLONUM-2-ARGS (? operation)
-                        (REGISTER (? source1))
-                        (REGISTER (? source2))
-                        (? overflow?)))
-  overflow?                            ;ignore
-  ((flonum-2-args/operator operation) target source1 source2))
-
-;; Binary instructions all use ST(0), and are of the forms
-;;   Fop ST(0),ST(i)
-;;   Fop ST(i),ST(0)
-;;   FopP ST(i),ST(0)
-;;   Fop ST(0),memory
-;;
-;; If possible, we like to target ST(0) since it is likely to be the
-;; source of a subsequent operation.  Failing that, it is good to
-;; reuse one of the source aliases.
-
-(define ((flonum-binary-operation operate) target source1 source2)
-  (define (default)
-    (let* ((sti1 (flonum-source! source1))
-          (sti2 (flonum-source! source2)))
-      (operate (flonum-target! target) sti1 sti2)))
-  (define (try-reuse-1 if-cannot)
-    (reuse-pseudo-register-alias
-     source1 'FLOAT
-     (lambda (alias1)
-       (let* ((sti1 (floreg->sti alias1))
-             (sti2 (if (= source1 source2)
-                       sti1
-                       (flonum-source! source2))))
-        (delete-register! alias1)
-        (delete-dead-registers!)
-        (add-pseudo-register-alias! target alias1)
-        (operate sti1 sti1 sti2)))
-     if-cannot))
-  (define (try-reuse-2 if-cannot)
-    (reuse-pseudo-register-alias
-     source2 'FLOAT
-     (lambda (alias2)
-       (let* ((sti2 (floreg->sti alias2))
-             (sti1 (if (= source1 source2)
-                       sti2
-                       (flonum-source! source1))))
-        (delete-register! alias2)
-        (delete-dead-registers!)
-        (add-pseudo-register-alias! target alias2)
-        (operate sti2 sti1 sti2)))
-     if-cannot))
-  (cond ((pseudo-register? target)
-        (if (is-alias-for-register? fr0 source1)
-            (try-reuse-1 (lambda () (try-reuse-2 default)))
-            (try-reuse-2 (lambda () (try-reuse-1 default)))))
-       ((not (eq? (register-type target) 'FLOAT))
-        (error "flonum-2-args: Wrong type register" target 'FLOAT))
-       (else (default))))
-
-(define (flonum-2-args/operator operation)
-  (lookup-arithmetic-method operation flonum-methods/2-args))
+          (FLONUM-2-ARGS (? operator)
+                         (REGISTER (? source))
+                         (OBJECT->FLOAT (CONSTANT (? constant)))
+                         (? overflow?)))
+  overflow?                             ;ignore
+  ((flonum-register*constant/operator operator) target source constant))
+
+(define-rule statement
+  (ASSIGN (REGISTER (? target))
+          (FLONUM-2-ARGS (? operator)
+                         (OBJECT->FLOAT (CONSTANT (? constant)))
+                         (REGISTER (? source))
+                         (? overflow?)))
+  overflow?                             ;ignore
+  ((flonum-constant*register/operator operator) target constant source))
  
  (define flonum-methods/2-args
    (list 'FLONUM-METHODS/2-ARGS))
  
-(define (flonum-1-arg%1/operator operation)
-  (lookup-arithmetic-method operation flonum-methods/1-arg%1))
+(define flonum-methods/register*constant
+  (list 'FLONUM-METHODS/REGISTER*CONSTANT))
  
-(define flonum-methods/1-arg%1
-  (list 'FLONUM-METHODS/1-ARG%1))
+(define flonum-methods/constant*register
+  (list 'FLONUM-METHODS/CONSTANT*REGISTER))
  
-(define (flonum-1%1-arg/operator operation)
-  (lookup-arithmetic-method operation flonum-methods/1%1-arg))
+(define-integrable (flonum-2-args/operator operator)
+  (lookup-arithmetic-method operator flonum-methods/2-args))
  
-(define flonum-methods/1%1-arg
-  (list 'FLONUM-METHODS/1%1-ARG))
+(define-integrable (flonum-register*constant/operator operator)
+  (lookup-arithmetic-method operator flonum-methods/register*constant))
  
-(define (binary-flonum-arithmetic? operation)
-  (memq operation '(FLONUM-ADD FLONUM-SUBTRACT FLONUM-MULTIPLY FLONUM-DIVIDE)))
+(define-integrable (flonum-constant*register/operator operator)
+  (lookup-arithmetic-method operator flonum-methods/constant*register))
  \f
-(let-syntax
-    ((define-flonum-operation
-       (sc-macro-transformer
-       (lambda (form environment)
-         environment
-         (let ((primitive-name (list-ref form 1))
-               (op1%2 (list-ref form 2))
-               (op1%2p (list-ref form 3))
-               (op2%1 (list-ref form 4))
-               (op2%1p (list-ref form 5)))
-           `(begin
-              (define-arithmetic-method ',primitive-name flonum-methods/2-args
-                (flonum-binary-operation
-                 (lambda (target source1 source2)
-                   (cond ((= target source1)
-                          (cond ((zero? target)
-                                 (LAP (,op1%2 (ST 0) (ST ,',source2))))
-                                ((zero? source2)
-                                 (LAP (,op2%1 (ST ,',target) (ST 0))))
-                                (else
-                                 (LAP (FLD (ST ,',source2))
-                                      (,op2%1p (ST ,',(1+ target)) (ST 0))))))
-                         ((= target source2)
-                          (cond ((zero? target)
-                                 (LAP (,op2%1 (ST 0) (ST ,',source1))))
-                                ((zero? source1)
-                                 (LAP (,op1%2 (ST ,',target) (ST 0))))
-                                (else
-                                 (LAP (FLD (ST ,',source1))
-                                      (,op1%2p (ST ,',(1+ target)) (ST 0))))))
-                         (else
-                          (LAP (FLD (ST ,',source1))
-                               (,op1%2 (ST 0) (ST ,',(1+ source2)))
-                               (FSTP (ST ,',(1+ target)))))))))
-
-              (define-arithmetic-method ',primitive-name
-                flonum-methods/1%1-arg
-                (flonum-unary-operation/general
-                 (lambda (target source)
-                   (if (= source target)
-                       (LAP (FLD1)
-                            (,op1%2p (ST ,',(1+ target)) (ST 0)))
-                       (LAP (FLD1)
-                            (,op1%2 (ST 0) (ST ,',(1+ source)))
-                            (FSTP (ST ,',(1+ target))))))))
-
-              (define-arithmetic-method ',primitive-name
-                flonum-methods/1-arg%1
-                (flonum-unary-operation/general
-                 (lambda (target source)
-                   (if (= source target)
-                       (LAP (FLD1)
-                            (,op2%1p (ST ,',(1+ target)) (ST 0)))
-                       (LAP (FLD1)
-                            (,op2%1 (ST 0) (ST ,',(1+ source)))
-                            (FSTP (ST ,',(1+ target))))))))))))))
-
-  (define-flonum-operation FLONUM-ADD FADD FADDP FADD FADDP)
-  (define-flonum-operation FLONUM-SUBTRACT F%SUB F%SUBP F%SUBR F%SUBPR)
-  (define-flonum-operation FLONUM-MULTIPLY FMUL FMULP FMUL FMULP)
-  (define-flonum-operation FLONUM-DIVIDE F%DIV F%DIVP F%DIVR F%DIVPR))
-\f
-(define-arithmetic-method 'FLONUM-ATAN2 flonum-methods/2-args
-  (lambda (target source1 source2)
-    (if (and (not (machine-register? source1))
-            (is-alias-for-register? fr0 source1)
-            (dead-register? source1))
-       (let ((source2 (flonum-source! source2)))
-         (delete-dead-registers!)
-         (rtl-target:=machine-register! target fr0)
-         (LAP (FLD (ST ,source2))
-              (FPATAN)))
-       (begin
-         (prefix-instructions! (load-machine-register! source1 fr0))
-         (need-register! fr0)
-         (let ((source2
-                (if (= source2 source1) fr0 (flonum-source! source2))))
-           (delete-dead-registers!)
-           (rtl-target:=machine-register! target fr0)
-           (LAP (FLD (ST ,source2))
-                (FPATAN)))))))
-
-(define-arithmetic-method 'FLONUM-REMAINDER flonum-methods/2-args
-  (flonum-binary-operation
-   (lambda (target source1 source2)
-     (if (zero? source2)
-        (LAP (FLD (ST ,source1))
-             (FPREM1)
-             (FSTP (ST ,(1+ target))))
-        #|
-        ;; This sequence is one cycle shorter than the one below,
-        ;; but needs two spare stack locations instead of one.
-        ;; Since FPREM1 is a variable, very slow instruction,
-        ;; the difference in time will hardly be noticeable
-        ;; but the availability of an extra "register" may be.
-        (LAP (FLD (ST ,source2))
-             (FLD (ST ,source1))
-             (FPREM1)
-             (FSTP (ST ,(+ target 2)))
-             (FSTP (ST 0)))            ; FPOP
-        |#
-        (LAP (FXCH (ST 0) (ST ,source2))
-             (FLD (ST ,(if (zero? source1) source2 source1)))
-             (FPREM1)
-             (FSTP (ST ,(1+ (if (= target source2)
-                                0
-                                target))))
-             (FXCH (ST 0) (ST ,source2)))))))
-\f
-(define-rule statement
-  (ASSIGN (REGISTER (? target))
-         (FLONUM-2-ARGS FLONUM-SUBTRACT
-                        (OBJECT->FLOAT (CONSTANT 0.))
-                        (REGISTER (? source))
-                        (? overflow?)))
-  overflow?                            ;ignore
-  ((flonum-unary-operation/general
-    (lambda (target source)
-      (if (and (zero? target) (zero? source))
-         (LAP (FCHS))
-         (LAP (FLD (ST ,source))
-              (FCHS)
-              (FSTP (ST ,(1+ target)))))))
-   target source))
+(define ((flonum-2-args/standard commutative? operate) target source1 source2)
+  (binary-register-operation operate commutative? 'FLOAT
+                             (lambda (target source)
+                               (LAP (MOVF S D ,target ,source)))
+                             target source1 source2))
+
+(define ((flonum-register*constant/standard operate) target source constant)
+  (with-float-operand constant
+    (lambda (operand)
+      (operate
+       (register-reference (move-to-alias-register! source 'FLOAT target))
+       operand))))
+
+;++ Possible improvement, not currently easy with the generic register
+;++ allocator operations provided: if the constant is zero and we have
+;++ a temporary register available, we can zero that with XOR and use
+;++ it in the place of loading a PC-relative double in memory.
+
+(define ((flonum-constant*register/commutative operate) target constant source)
+  (with-float-operand constant
+    (lambda (operand)
+      (operate
+       (register-reference (move-to-alias-register! source 'FLOAT target))
+       operand))))
+
+(define ((flonum-constant*register/noncommutative operate)
+         target constant source)
+  (let* ((source (flonum-source-reference! source))
+         (target (flonum-target-reference! target)))
+    (with-float-operand constant
+      (lambda (operand)
+        (LAP (MOVF S D ,target ,operand)
+             ,@(operate target source))))))
  
-(define-rule statement
-  (ASSIGN (REGISTER (? target))
-         (FLONUM-2-ARGS (? operation)
-                        (REGISTER (? source))
-                        (OBJECT->FLOAT (CONSTANT 1.))
-                        (? overflow?)))
-  (QUALIFIER (binary-flonum-arithmetic? operation))
-  overflow?                            ;ignore
-  ((flonum-1-arg%1/operator operation) target source))
-
-(define-rule statement
-  (ASSIGN (REGISTER (? target))
-         (FLONUM-2-ARGS (? operation)
-                        (OBJECT->FLOAT (CONSTANT 1.))
-                        (REGISTER (? source))
-                        (? overflow?)))
-  (QUALIFIER (binary-flonum-arithmetic? operation))
-  overflow?                            ;ignore
-  ((flonum-1%1-arg/operator operation) target source))
+(let-syntax
+    ((binary-operation
+      (sc-macro-transformer
+       (lambda (form environment)
+         environment                    ;ignore
+         (let ((name (cadr form))
+               (op (caddr form))
+               (commutative? (cadddr form)))
+           `(let ((operate
+                   (lambda (target source)
+                     (LAP (,op S D ,',target ,',source)))))
+              (define-arithmetic-method ',name flonum-methods/2-args
+                (flonum-2-args/standard ,commutative? operate))
+              (define-arithmetic-method ',name flonum-methods/register*constant
+                (flonum-register*constant/standard operate))
+              (define-arithmetic-method ',name flonum-methods/constant*register
+                (,(if commutative?
+                      'flonum-constant*register/commutative
+                      'flonum-constant*register/noncommutative)
+                 operate))))))))
+  (binary-operation FLONUM-ADD ADDF #t)
+  (binary-operation FLONUM-DIVIDE DIVF #f)
+  (binary-operation FLONUM-MULTIPLY MULF #t)
+  (binary-operation FLONUM-SUBTRACT SUBF #f))
  \f
  ;;;; Flonum Predicates
  
-(define-rule predicate
-  (FLONUM-PRED-1-ARG (? predicate) (REGISTER (? source)))
-  (flonum-compare-zero predicate source))
-
-(define-rule predicate
-  (FLONUM-PRED-2-ARGS (? predicate)
-                     (REGISTER (? source1))
-                     (REGISTER (? source2)))
-  (let* ((st1 (flonum-source! source1))
-        (st2 (flonum-source! source2)))
-    (cond ((zero? st1)
-          (flonum-branch! predicate
-                          (LAP (FCOM (ST 0) (ST ,st2)))))
-         ((zero? st2)
-          (flonum-branch! (commute-flonum-predicate predicate)
-                          (LAP (FCOM (ST 0) (ST ,st1)))))
-         (else
-          (flonum-branch! predicate
-                          (LAP (FLD (ST ,st1))
-                               (FCOMP (ST 0) (ST ,(1+ st2)))))))))
+(define double-flobits:zero
+  (make-bit-string 64 #f))
  
  (define-rule predicate
-  (FLONUM-PRED-2-ARGS (? predicate)
-                     (REGISTER (? source))
-                     (OBJECT->FLOAT (CONSTANT 0.)))
-  (flonum-compare-zero predicate source))
+  (FLONUM-PRED-1-ARG (? predicate) (REGISTER (? source)))
+  (flonum-branch!
+   predicate
+   (flonum-source-reference! source)
+   (INST-EA (@PCR ,(allocate-double-float-bits-label double-flobits:zero)))))
  
  (define-rule predicate
    (FLONUM-PRED-2-ARGS (? predicate)
-                     (OBJECT->FLOAT (CONSTANT 0.))
-                     (REGISTER (? source)))
-  (flonum-compare-zero (commute-flonum-predicate predicate) source))
+                      (REGISTER (? source1))
+                      (REGISTER (? source2)))
+  (flonum-branch! predicate
+                  (flonum-source-reference! source1)
+                  (flonum-source-reference! source2)))
  
  (define-rule predicate
    (FLONUM-PRED-2-ARGS (? predicate)
-                     (REGISTER (? source))
-                     (OBJECT->FLOAT (CONSTANT 1.)))
-  (flonum-compare-one predicate source))
+                      (REGISTER (? source))
+                      (OBJECT->FLOAT (CONSTANT (? constant))))
+  (with-float-operand constant
+    (lambda (operand)
+      (flonum-branch! predicate (flonum-source-reference! source) operand))))
  
  (define-rule predicate
    (FLONUM-PRED-2-ARGS (? predicate)
-                     (OBJECT->FLOAT (CONSTANT 1.))
-                     (REGISTER (? source)))
-  (flonum-compare-one (commute-flonum-predicate predicate) source))
-\f
-(define (flonum-compare-zero predicate source)
-  (let ((sti (flonum-source! source)))
-    (if (zero? sti)
-       (flonum-branch! predicate
-                       (LAP (FTST)))
-       (flonum-branch! (commute-flonum-predicate predicate)
-                       (LAP (FLDZ)
-                            (FCOMP (ST 0) (ST ,(1+ sti))))))))
-
-(define (flonum-compare-one predicate source)
-  (let ((sti (flonum-source! source)))
-    (flonum-branch! (commute-flonum-predicate predicate)
-                   (LAP (FLD1)
-                        (FCOMP (ST 0) (ST ,(1+ sti)))))))
-
-(define (commute-flonum-predicate pred)
-  (case pred
+                      (OBJECT->FLOAT (CONSTANT (? constant)))
+                      (REGISTER (? source)))
+  (with-float-operand constant
+    (lambda (operand)
+      (flonum-branch! (commute-flonum-predicate predicate)
+                      (flonum-source-reference! source)
+                      operand))))
+
+(define (commute-flonum-predicate predicate)
+  (case predicate
      ((FLONUM-EQUAL? FLONUM-ZERO?) 'FLONUM-EQUAL?)
      ((FLONUM-LESS? FLONUM-NEGATIVE?) 'FLONUM-GREATER?)
      ((FLONUM-GREATER? FLONUM-POSITIVE?) 'FLONUM-LESS?)
-    (else
-     (error "commute-flonum-predicate: Unknown predicate" pred))))
+    (else (error "commute-flonum-predicate: Unknown predicate" predicate))))
  
-(define (flonum-branch! predicate prefix)
+(define (flonum-branch! predicate source1 source2)
    (case predicate
      ((FLONUM-EQUAL? FLONUM-ZERO?)
       (set-current-branches! (lambda (label)
-                             (let ((unordered (generate-label 'UNORDERED)))
-                               (LAP (JP (@PCR ,unordered))
-                                    (JE (@PCR ,label))
-                                    (LABEL ,unordered))))
-                           (lambda (label)
-                             (LAP (JNE (@PCR ,label))
-                                  (JP (@PCR ,label))))))
+                              (let ((unordered (generate-label 'UNORDERED)))
+                                (LAP (JP (@PCR ,unordered))
+                                     (JE (@PCR ,label))
+                                     (LABEL ,unordered))))
+                            (lambda (label)
+                              (LAP (JNE (@PCR ,label))
+                                   (JP (@PCR ,label))))))
      ((FLONUM-LESS? FLONUM-NEGATIVE?)
       (set-current-branches! (lambda (label)
-                             (let ((unordered (generate-label 'UNORDERED)))
-                               (LAP (JP (@PCR ,unordered))
-                                    (JB (@PCR ,label))
-                                    (LABEL ,unordered))))
-                           (lambda (label)
-                             (LAP (JAE (@PCR ,label))
-                                  (JP (@PCR ,label))))))
+                              (let ((unordered (generate-label 'UNORDERED)))
+                                (LAP (JP (@PCR ,unordered))
+                                     (JB (@PCR ,label))
+                                     (LABEL ,unordered))))
+                            (lambda (label)
+                              (LAP (JAE (@PCR ,label))
+                                   (JP (@PCR ,label))))))
      ((FLONUM-GREATER? FLONUM-POSITIVE?)
       (set-current-branches! (lambda (label)
-                             (LAP (JA (@PCR ,label))))
-                           (lambda (label)
-                             (LAP (JBE (@PCR ,label))))))
+                              (LAP (JA (@PCR ,label))))
+                            (lambda (label)
+                              (LAP (JBE (@PCR ,label))))))
      (else
       (error "flonum-branch!: Unknown predicate" predicate)))
-  (flush-register! eax)
-  (LAP ,@prefix
-       (FSTSW (R ,eax))
-       (SAHF)))
-\f
-;; This is endianness dependent!
-
-(define (flonum-value->data-decl value)
-  (let ((high (make-bit-string 32 false))
-       (low (make-bit-string 32 false)))
-    (read-bits! value 32 high)
-    (read-bits! value 64 low)
-    (LAP ,@(lap:comment `(FLOAT ,value))
-        (LONG U ,(bit-string->unsigned-integer high))
-        (LONG U ,(bit-string->unsigned-integer low)))))
-
-(define (flo:32-bit-representation-exact? value)
-  ;; Returns unsigned long representation if 32 bit representation
-  ;; exists, i.e. if all `1' significant mantissa bits fit in the 32
-  ;; bit format and the exponent is within range.
-  (let ((mant-diff (make-bit-string (- 52 23) false)))
-    (read-bits! value (+ 32 0) mant-diff)
-    (and (bit-string-zero? mant-diff)
-        (let ((expt64 (make-bit-string 11 false)))
-          (read-bits! value (+ 32 52) expt64)
-          (let ((expt (- (bit-string->unsigned-integer expt64) 1022)))
-            (and (<= -127 expt 127)
-                 (let ((sign (make-bit-string 1  false))
-                       (mant32 (make-bit-string 23 false)))
-                   (read-bits! value (+ 32 52 11) sign)
-                   (read-bits! value (+ 32 52 -23) mant32)
-                   (bit-string->unsigned-integer
-                    (bit-string-append
-                     (bit-string-append
-                      mant32
-                      (unsigned-integer->bit-string 8 (+ 126 expt)))
-                     sign)))))))))
-
-(define (flonum->label value block-name alignment offset data)
-  (let* ((block
-         (or (find-extra-code-block block-name)
-             (let ((block (declare-extra-code-block! block-name
-                                                     'ANYWHERE
-                                                     '())))
-               (add-extra-code!
-                block
-                (LAP (PADDING ,offset ,alignment ,padding-string)))
-               block)))
-        (pairs (extra-code-block/xtra block))
-        (place (assoc value pairs)))
-    (if place
-       (cdr place)
-       (let ((label (generate-label block-name)))
-         (set-extra-code-block/xtra!
-          block
-          (cons (cons value label) pairs))
-         (add-extra-code! block
-                          (LAP (LABEL ,label)
-                               ,@data))
-         label))))
-
-(define (double-flonum->label fp-value)
-  (flonum->label fp-value 'DOUBLE-FLOATS 8 0
-                (flonum-value->data-decl fp-value)))
-
-(define (single-flonum->label fp-value)
-  (flonum->label fp-value 'SINGLE-FLOATS 4 0
-                (LAP ,@(lap:comment `(SINGLE-FLOAT ,fp-value))
-                     (LONG U ,(flo:32-bit-representation-exact? fp-value)))))
-\f                                   
+  (LAP (UCOMISF D ,source1 ,source2)))
+\f                                   
  (define-rule statement
    (ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (CONSTANT (? fp-value))))
    (cond ((not (flo:flonum? fp-value))
-        (error "OBJECT->FLOAT: Not a floating-point value" fp-value))
-       ((flo:= fp-value 0.0)
-        (let ((target (flonum-target! target)))
-          (LAP (FLDZ)
-               (FSTP (ST ,(1+ target))))))
-       ((flo:= fp-value 1.0)
-        (let ((target (flonum-target! target)))
-          (LAP (FLD1)
-               (FSTP (ST ,(1+ target))))))
-       (compiler:cross-compiling?
-        (let* ((temp (allocate-temporary-register! 'GENERAL))
-               (target (flonum-target! target)))
-          (LAP ,@(load-constant (register-reference temp) fp-value)
-               ,@(object->float temp target))))
-       (else
-        (let ((target (flonum-target! target)))
-          (with-pcr-float fp-value
-             (lambda (ea size)
-               (LAP (FLD ,size ,ea)
-                    (FSTP (ST ,(1+ target))))))))))
-
-(define (with-pcr-float fp-value receiver)
-  (define (generate-ea label-expr size)
-    (with-pc
-     (lambda (pc-label pc-register)
-       (receiver (INST-EA (@RO W ,pc-register (- ,label-expr ,pc-label)))
-                size))))
-  (if (flo:32-bit-representation-exact? fp-value)
-      (generate-ea (single-flonum->label fp-value) 'S)
-      (generate-ea (double-flonum->label fp-value) 'D)))
-|#
+         (error "OBJECT->FLOAT: Not a floating-point value" fp-value))
+        ((flo:= fp-value 0.0)
+         (let ((target (flonum-target-reference! target)))
+           (LAP (XORF P D ,target ,target))))
+        (else
+         (with-float-operand fp-value
+           (lambda (operand)
+             (LAP (MOVF S D ,(flonum-target-reference! target) ,operand)))))))
+
+(define (with-float-operand fp-value receiver)
+  (if (not (flo:flonum? fp-value))
+      (error "Invalid constant flonum operand:" fp-value))
+  (if compiler:cross-compiling?
+      (let ((temp (allocate-temporary-register! 'GENERAL)))
+        (LAP ,@(load-constant (register-reference temp) fp-value)
+             ,@(object->address (register-reference temp))
+             ,@(receiver (INST-EA (@RO ,temp ,flonum-data-offset)))))
+      (receiver (INST-EA (@PCR ,(allocate-double-float-label fp-value))))))
+
+(define (allocate-double-float-bits-label bit-string)
+  (allocate-data-label bit-string 'DOUBLE-FLOATS 0 8
+    (LAP (QUAD U ,(bit-string->unsigned-integer bit-string)))))
+
+(define (allocate-single-float-bits-label bit-string)
+  (allocate-data-label bit-string 'SINGLE-FLOATS 0 4
+    (LAP (LONG U ,(bit-string->unsigned-integer bit-string)))))
+
+(define (allocate-double-float-label flonum)
+  (allocate-double-float-bits-label
+   (let ((bit-string (make-bit-string 64 #f)))
+     ;; Skip the manifest preceding the flonum data.  Is there a
+     ;; better way to express this?
+     (let* ((bytes-per-object (vector-ref (gc-space-status) 0))
+            (bits-per-object (* 8 bytes-per-object))
+            (flonum-data-offset-in-bits bits-per-object))
+       (read-bits! flonum flonum-data-offset-in-bits bit-string))
+     bit-string)))
diff --git a/src/compiler/machines/x86-64/rulrew.scm b/src/compiler/machines/x86-64/rulrew.scm

index 70259dc8ef6cbed06e82cb529ad181ee606ae824..185d1bd4c577e7935481a6a014aed1f35995c376 100644 (file)
--- a/src/compiler/machines/x86-64/rulrew.scm
+++ b/src/compiler/machines/x86-64/rulrew.scm
@@ -215,94 +215,24 @@ USA.
  \f
  (define-rule rewriting
    (OBJECT->FLOAT (REGISTER (? operand register-known-value)))
-  (QUALIFIER
-   (rtl:constant-flonum-test operand (lambda (v) v #T)))
+  ;; This is not quite what we want.  We really want to rewrite all
+  ;; OBJECT->FLOAT expressions with known constant operands, not just
+  ;; the nonzero ones, and then decide later whether to put it in
+  ;; memory based on whether there is a temporary register that we can
+  ;; zero with XOR.  By not rewriting this case when the constant is
+  ;; zero, using a temporary may cause some other register to be
+  ;; written to memory, which defeats the purpose of using XOR to
+  ;; avoid memory access.
+  (QUALIFIER (rtl:constant-flonum-test operand flo:nonzero?))
    (rtl:make-object->float operand))
  
  (define-rule rewriting
    (FLONUM-2-ARGS FLONUM-SUBTRACT
-                (REGISTER (? operand-1 register-known-value))
-                (? operand-2)
-                (? overflow?))
-  (QUALIFIER (rtl:constant-flonum-test operand-1 flo:zero?))
-  (rtl:make-flonum-2-args 'FLONUM-SUBTRACT operand-1 operand-2 overflow?))
-
-(define-rule rewriting
-  (FLONUM-2-ARGS (? operation)
-                (REGISTER (? operand-1 register-known-value))
-                (? operand-2)
-                (? overflow?))
-  (QUALIFIER
-   (and (memq operation
-             '(FLONUM-ADD FLONUM-SUBTRACT FLONUM-MULTIPLY FLONUM-DIVIDE))
-       (rtl:constant-flonum-test operand-1 flo:one?)))
-  (rtl:make-flonum-2-args operation operand-1 operand-2 overflow?))
-
-(define-rule rewriting
-  (FLONUM-2-ARGS (? operation)
-                (? operand-1)
-                (REGISTER (? operand-2 register-known-value))
-                (? overflow?))
-  (QUALIFIER
-   (and (memq operation
-             '(FLONUM-ADD FLONUM-SUBTRACT FLONUM-MULTIPLY FLONUM-DIVIDE))
-       (rtl:constant-flonum-test operand-2 flo:one?)))
-  (rtl:make-flonum-2-args operation operand-1 operand-2 overflow?))
-
-(define-rule rewriting
-  (FLONUM-PRED-2-ARGS (? predicate)
-                     (? operand-1)
-                     (REGISTER (? operand-2 register-known-value)))
-  (QUALIFIER (rtl:constant-flonum-test operand-2 flo:zero?))
-  (list 'FLONUM-PRED-2-ARGS predicate operand-1 operand-2))
-
-(define-rule rewriting
-  (FLONUM-PRED-2-ARGS (? predicate)
-                     (REGISTER (? operand-1 register-known-value))
-                     (? operand-2))
-  (QUALIFIER (rtl:constant-flonum-test operand-1 flo:zero?))
-  (list 'FLONUM-PRED-2-ARGS predicate operand-1 operand-2))
-\f
-#|
-;; These don't work as written.  They are not simplified and are
-;; therefore passed whole to the back end, and there is no way to
-;; construct the graph at this level.
-
-;; acos (x) = atan ((sqrt (1 - x^2)) / x)
-
-(define-rule pre-cse-rewriting
-  (FLONUM-1-ARG FLONUM-ACOS (? operand) #f)
-  (rtl:make-flonum-2-args
-   'FLONUM-ATAN2
-   (rtl:make-flonum-1-arg
-    'FLONUM-SQRT
-    (rtl:make-flonum-2-args
-     'FLONUM-SUBTRACT
-     (rtl:make-object->float (rtl:make-constant 1.))
-     (rtl:make-flonum-2-args 'FLONUM-MULTIPLY operand operand false)
-     false)
-    false)
-   operand
-   false))
-
-;; asin (x) = atan (x / (sqrt (1 - x^2)))
-
-(define-rule pre-cse-rewriting
-  (FLONUM-1-ARG FLONUM-ASIN (? operand) #f)
-  (rtl:make-flonum-2-args
-   'FLONUM-ATAN2
-   operand
-   (rtl:make-flonum-1-arg
-    'FLONUM-SQRT
-    (rtl:make-flonum-2-args
-     'FLONUM-SUBTRACT
-     (rtl:make-object->float (rtl:make-constant 1.))
-     (rtl:make-flonum-2-args 'FLONUM-MULTIPLY operand operand false)
-     false)
-    false)
-   false))
-
-|#
+                 (REGISTER (? operand1 register-known-value))
+                 (? operand2)
+                 (? overflow?))
+  (QUALIFIER (rtl:constant-flonum-test operand1 flo:zero?))
+  (rtl:make-flonum-1-arg 'FLONUM-NEGATE operand2 overflow?))
  
  (define (rtl:constant-flonum-test expression predicate)
    (and (rtl:object->float? expression)
@@ -312,8 +242,8 @@ USA.
                 (and (flo:flonum? n)
                      (predicate n)))))))
  
-(define (flo:one? value)
-  (flo:= value 1.))
+(define-integrable (flo:nonzero? value)
+  (not (flo:= value 0.)))
  \f
  ;;;; Indexed addressing modes
  
@@ -336,19 +266,6 @@ USA.
                 (MACHINE-CONSTANT (? value)))
    (QUALIFIER (and (rtl:float-offset-address? base)
                   (rtl:simple-subexpressions? base)))
-  (if (zero? value)
-      (rtl:make-float-offset
-       (rtl:float-offset-address-base base)
-       (rtl:float-offset-address-offset base))
-      (rtl:make-float-offset base (rtl:make-machine-constant value))))
-
-(define-rule rewriting
-  (FLOAT-OFFSET (REGISTER (? base register-known-value))
-               (MACHINE-CONSTANT (? value)))
-  (QUALIFIER
-   (and (rtl:offset-address? base)
-       (rtl:simple-subexpressions? base)
-       (rtl:machine-constant? (rtl:offset-address-offset base))))   
    (rtl:make-float-offset base (rtl:make-machine-constant value)))
  
  ;; This is here to avoid generating things like
diff --git a/src/microcode/cmpauxmd/x86-64.m4 b/src/microcode/cmpauxmd/x86-64.m4

index d134362f5851f6815290d001aec3743c3ce43336..52cf079953a29ff6b167b7aca34e4d66a02a174f 100644 (file)
--- a/src/microcode/cmpauxmd/x86-64.m4
+++ b/src/microcode/cmpauxmd/x86-64.m4
@@ -170,7 +170,7 @@ ifdef(`SUPPRESS_LEADING_UNDERSCORE',
  ifdef(`WCC386R',
        `define(EFR,`$1_')',
        `define(EFR,`EPFR($1)')')
-
+\f
  define(hook_reference,`EFR(asm_$1)')
  
  define(define_data,`export_label(EVR($1))')
@@ -211,6 +211,10 @@ ifdef(`DASM',
        `define(allocate_space,`EVR($1) db $2 dup (0)')',
        `define(allocate_space,`EVR($1):
         .space $2')')
+
+ifdef(`DASM',
+      `define(define_double,`EVR($1) dq $2')',
+      `define(define_double,`EVR($1):  .double $2')')
  \f
  ifdef(`DASM',
        `define(HEX, `0$1H')',
@@ -257,8 +261,8 @@ ifdef(`DASM',
        `define(LOF,`$1($2)')')
  
  ifdef(`DASM',
-      `define(DOF,`qword ptr $1[$2]')',
-      `define(DOF,`$1($2)')')
+      `define(QOF,`qword ptr $1[$2]')',
+      `define(QOF,`$1($2)')')
  
  ifdef(`DASM',
        `define(IDX,`dword ptr [$1] [$2]')',
@@ -290,11 +294,21 @@ define(TC_COMPILED_ENTRY,40)
  
  # TAG doesn't work due to m4 stupidity, so define these magic
  # constants here.  These are computed in terms of the parameters
-# above.
+# above, and ordered lexicographically.
  
+define(IMM_FALSE, `IMM(HEX(0000000000000000))')
+define(IMM_FIXNUM_0, `IMM(HEX(6800000000000000))')
+define(IMM_FLONUM_0, `IMM(HEX(1800000000000000))')
  define(IMM_MANIFEST_NM_VECTOR_1, `IMM(HEX(9c00000000000001))')
  define(IMM_TRUE, `IMM(HEX(2000000000000000))')
-define(IMM_FALSE, `IMM(HEX(0000000000000000))')
+
+# Flonums are represented by tagged pointers to the first of two
+# quadwords (sixteen bytes) in memory, the first of which is a
+# non-marked vector manifest of length 1, so that the GC will not
+# trace the other one, which is an IEEE 754 double-precision format
+# value.
+define(FLONUM_DATA_OFFSET,8)
+define(FLONUM_STORAGE_SIZE,16)
  
  define(REGBLOCK_VAL,16)
  define(REGBLOCK_COMPILER_TEMP,32)
@@ -333,6 +347,10 @@ allocate_quadword(C_Stack_Pointer)
  
  define_data(C_Frame_Pointer)
  allocate_quadword(C_Frame_Pointer)
+
+declare_alignment(8)
+define_double(flonum_zero,0.0)
+define_double(flonum_one,1.0)
  \f
  DECLARE_CODE_SEGMENT()
  declare_alignment(2)
@@ -421,12 +439,12 @@ ifdef(`WIN32',                                            # Register block = %rsi
  `      OP(mov,q)       TW(ABS(EVR(RegistersPtr)),regs)',
  `      OP(lea,q)       TW(ABS(EVR(Registers)),regs)')
         OP(mov,q)       TW(ABS(EVR(Free)),rfree)        # Free pointer = %rdi
-       OP(mov,q)       TW(DOF(REGBLOCK_VAL(),regs),REG(rax)) # Value/dynamic link
+       OP(mov,q)       TW(QOF(REGBLOCK_VAL(),regs),REG(rax)) # Value/dynamic link
         OP(mov,q)       TW(IMM(ADDRESS_MASK),rmask)     # = %rbp
         OP(mov,q)       TW(ABS(EVR(stack_pointer)),REG(rsp))
         OP(mov,q)       TW(REG(rax),REG(rcx))           # Preserve if used
         OP(and,q)       TW(rmask,REG(rcx))              # Restore potential dynamic link
-       OP(mov,q)       TW(REG(rcx),DOF(REGBLOCK_DLINK(),regs))
+       OP(mov,q)       TW(REG(rcx),QOF(REGBLOCK_DLINK(),regs))
         jmp             IJMP(REG(rdx))
  
  IF_WIN32(`
@@ -471,7 +489,7 @@ define_jump_indirection(interrupt_closure,18)
  define_jump_indirection(interrupt_continuation_2,3b)
  
  define_hook_label(interrupt_dlink)
-       OP(mov,q)       TW(DOF(REGBLOCK_DLINK(),regs),REG(rdx))
+       OP(mov,q)       TW(QOF(REGBLOCK_DLINK(),regs),REG(rdx))
         OP(mov,b)       TW(IMM(HEX(19)),REG(al))
         jmp     scheme_to_interface_call
  
@@ -551,21 +569,33 @@ asm_generic_fixnum_result:
         OP(and,q)       TW(rmask,IND(REG(rsp)))
         OP(or,b)        TW(IMM(TC_FIXNUM),REG(al))
         OP(ror,q)       TW(IMM(TC_LENGTH),REG(rax))
-       OP(mov,q)       TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+       OP(mov,q)       TW(REG(rax),QOF(REGBLOCK_VAL(),regs))
+       ret
+
+declare_alignment(2)
+asm_generic_flonum_result:
+       OP(and,q)       TW(rmask,IND(REG(rsp)))
+       OP(mov,q)       TW(IMM_MANIFEST_NM_VECTOR_1,REG(rcx))
+       OP(mov,q)       TW(REG(rcx),IND(rfree))
+       movsd           TW(REG(xmm0),QOF(FLONUM_DATA_OFFSET,rfree))
+       OP(mov,q)       TW(IMM_FLONUM_0,REG(rax))
+       OP(or,q)        TW(rfree,REG(rax))
+       OP(lea,q)       TW(QOF(FLONUM_STORAGE_SIZE,rfree),rfree)
+       OP(mov,q)       TW(REG(rax),QOF(REGBLOCK_VAL(),regs))
         ret
  
  declare_alignment(2)
  asm_generic_return_sharp_t:
         OP(and,q)       TW(rmask,IND(REG(rsp)))
         OP(mov,q)       TW(IMM_TRUE,REG(rax))
-       OP(mov,q)       TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+       OP(mov,q)       TW(REG(rax),QOF(REGBLOCK_VAL(),regs))
         ret
  
  declare_alignment(2)
  asm_generic_return_sharp_f:
         OP(and,q)       TW(rmask,IND(REG(rsp)))
         OP(mov,q)       TW(IMM_FALSE,REG(rax))
-       OP(mov,q)       TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+       OP(mov,q)       TW(REG(rax),QOF(REGBLOCK_VAL(),regs))
         ret
  \f
  define(define_unary_operation,
@@ -575,8 +605,16 @@ define_hook_label(generic_$1)
         OP(mov,q)       TW(REG(rdx),REG(rax))
         OP(shr,q)       TW(IMM(DATUM_LENGTH),REG(rax))
         OP(cmp,b)       TW(IMM(TC_FIXNUM),REG(al))
+       je      asm_generic_$1_fix
+       OP(cmp,b)       TW(IMM(TC_FLONUM),REG(al))
         jne     asm_generic_$1_fail
  
+asm_generic_$1_flo:
+       OP(and,q)       TW(rmask,REG(rdx))
+       movsd           TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0))
+       $4              TW(ABS(flonum_one),REG(xmm0))
+       jmp     asm_generic_flonum_result
+
  asm_generic_$1_fix:
         OP(mov,q)       TW(REG(rdx),REG(rax))
         OP(shl,q)       TW(IMM(TC_LENGTH),REG(rax))
@@ -595,8 +633,17 @@ define_hook_label(generic_$1)
         OP(mov,q)       TW(REG(rdx),REG(rax))
         OP(shr,q)       TW(IMM(DATUM_LENGTH),REG(rax))
         OP(cmp,b)       TW(IMM(TC_FIXNUM),REG(al))
+       je      asm_generic_$1_fix
+       OP(cmp,b)       TW(IMM(TC_FLONUM),REG(al))
         jne     asm_generic_$1_fail
  
+asm_generic_$1_flo:
+       OP(and,q)       TW(rmask,REG(rdx))
+       movsd           TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0))
+       ucomisd         TW(ABS(flonum_zero),REG(xmm0))
+       $3      asm_generic_return_sharp_t
+       jmp     asm_generic_return_sharp_f
+
  asm_generic_$1_fix:
         OP(mov,q)       TW(REG(rdx),REG(rax))
         OP(shl,q)       TW(IMM(TC_LENGTH),REG(rax))
@@ -610,7 +657,7 @@ asm_generic_$1_fail:
         jmp     scheme_to_interface')
  \f
  define(define_binary_operation,
-`define_binary_operation_with_fixup($1,$2,$3,
+`define_binary_operation_with_fixup($1,$2,$3,$4,
         `OP(shl,q)      TW(IMM(TC_LENGTH),REG(rax))')')
  
  define(define_binary_operation_with_fixup,
@@ -625,12 +672,21 @@ define_hook_label(generic_$1)
         OP(cmp,b)       TW(REG(al),REG(cl))
         jne     asm_generic_$1_fail
         OP(cmp,b)       TW(IMM(TC_FIXNUM),REG(al))
+       je      asm_generic_$1_fix
+       OP(cmp,b)       TW(IMM(TC_FLONUM),REG(al))
         jne     asm_generic_$1_fail
  
+asm_generic_$1_flo:
+       OP(and,q)       TW(rmask,REG(rdx))
+       OP(and,q)       TW(rmask,REG(rbx))
+       movsd           TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0))
+       $4              TW(QOF(FLONUM_DATA_OFFSET,REG(rbx)),REG(xmm0))
+       jmp     asm_generic_flonum_result
+
  asm_generic_$1_fix:
         OP(mov,q)       TW(REG(rdx),REG(rax))
         OP(mov,q)       TW(REG(rbx),REG(rcx))
-       $4                                              # Set up rax.
+       $5                                              # Set up rax.
         OP(shl,q)       TW(IMM(TC_LENGTH),REG(rcx))
         OP($3,q)        TW(REG(rcx),REG(rax))           # subq
         jno     asm_generic_fixnum_result
@@ -653,8 +709,18 @@ define_hook_label(generic_$1)
         OP(cmp,b)       TW(REG(al),REG(cl))
         jne     asm_generic_$1_fail
         OP(cmp,b)       TW(IMM(TC_FIXNUM),REG(al))
+       je      asm_generic_$1_fix
+       OP(cmp,b)       TW(IMM(TC_FLONUM),REG(al))
         jne     asm_generic_$1_fail
  
+asm_generic_$1_flo:
+       OP(and,q)       TW(rmask,REG(rdx))
+       OP(and,q)       TW(rmask,REG(rbx))
+       movsd           TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0))
+       ucomisd         TW(QOF(FLONUM_DATA_OFFSET,REG(rbx)),REG(xmm0))
+       $3      asm_generic_return_sharp_t
+       jmp     asm_generic_return_sharp_f
+
  asm_generic_$1_fix:
         OP(shl,q)       TW(IMM(TC_LENGTH),REG(rdx))
         OP(shl,q)       TW(IMM(TC_LENGTH),REG(rbx))
@@ -668,43 +734,122 @@ asm_generic_$1_fail:
         OP(mov,b)       TW(IMM(HEX($2)),REG(al))
         jmp     scheme_to_interface')
  \f
-#define_unary_operation(decrement,22,sub)
-#define_unary_operation(increment,26,add)
+# Division is hairy.  I'm not sure whether this will do the right
+# thing for infinities and NaNs.
+
+define_hook_label(generic_divide)
+       OP(pop,q)       REG(rdx)
+       OP(pop,q)       REG(rbx)
+       # We want to divide rdx by rbx.  First put the numerator's tag
+       # in al and the denominator's tag in cl.
+       OP(mov,q)       TW(REG(rdx),REG(rax))
+       OP(mov,q)       TW(REG(rbx),REG(rcx))
+       OP(shr,q)       TW(IMM(DATUM_LENGTH),REG(rax))
+       OP(shr,q)       TW(IMM(DATUM_LENGTH),REG(rcx))
+       OP(cmp,b)       TW(IMM(TC_FIXNUM),REG(al))
+       je      asm_generic_divide_fix
+       OP(cmp,b)       TW(IMM(TC_FLONUM),REG(al))
+       jne     asm_generic_divide_fail
+       OP(cmp,b)       TW(IMM(TC_FLONUM),REG(cl))
+       je      asm_generic_divide_flo_by_flo
+       OP(cmp,b)       TW(IMM(TC_FIXNUM),REG(cl))
+       jne     asm_generic_divide_fail
+
+asm_generic_divide_flo_by_fix:
+       # Numerator (rdx) is a flonum, denominator (rbx) is a fixnum.
+       OP(mov,q)       TW(REG(rbx),REG(rcx))
+       OP(shl,q)       TW(IMM(TC_LENGTH),REG(rcx))
+       # Division by zero -- bail.
+       jz      asm_generic_divide_fail
+       OP(and,q)       TW(rmask,REG(rdx))
+       OP(sar,q)       TW(IMM(TC_LENGTH),REG(rcx))
+       movsd           TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0))
+       OP(cvtsi2sd,q)  TW(REG(rcx),REG(xmm1))
+       divsd           TW(REG(xmm1),REG(xmm0))
+       jmp     asm_generic_flonum_result
+
+asm_generic_divide_fix:
+       OP(cmp,b)       TW(IMM(TC_FLONUM),REG(cl))
+       jne asm_generic_divide_fail
+\f
+asm_generic_divide_fix_by_flo:
+       # Numerator (rdx) is a fixnum, denominator (rbx) is a flonum.
+       OP(mov,q)       TW(REG(rbx),REG(rax))
+       OP(and,q)       TW(rmask,REG(rax))
+       OP(mov,q)       TW(REG(rdx),REG(rcx))
+       movsd           TW(QOF(FLONUM_DATA_OFFSET,REG(rax)),REG(xmm1))
+       OP(shl,q)       TW(IMM(TC_LENGTH),REG(rcx))
+       jz      asm_generic_divide_zero_by_flo
+       OP(sar,q)       TW(IMM(TC_LENGTH),REG(rcx))
+       OP(cvtsi2sd,q)  TW(REG(rcx),REG(xmm0))
+       divsd           TW(REG(xmm1),REG(xmm0))
+       jmp     asm_generic_flonum_result
+
+asm_generic_divide_zero_by_flo:
+       # rcx contains zero, representing a numerator exactly zero.
+       # Defer division of 0 by 0.0; otherwise, yield exactly zero.
+       OP(cvtsi2sd,q)  TW(REG(rcx),REG(xmm0))
+       ucomisd         TW(REG(xmm1),REG(xmm0))
+       je      asm_generic_divide_fail
+       OP(and,q)       TW(rmask,IND(REG(rsp)))
+       OP(mov,q)       TW(IMM_FIXNUM_0,REG(rax))
+       OP(mov,q)       TW(REG(rax),QOF(REGBLOCK_VAL(),regs))
+       ret
+
+asm_generic_divide_flo_by_flo:
+       # Numerator (rdx) and denominator (rbx) are both flonums.
+       OP(mov,q)       TW(REG(rdx),REG(rax))
+       OP(mov,q)       TW(REG(rbx),REG(rcx))
+       OP(and,q)       TW(rmask,REG(rax))
+       OP(and,q)       TW(rmask,REG(rcx))
+       movsd           TW(QOF(FLONUM_DATA_OFFSET,REG(rax)),REG(xmm0))
+       movsd           TW(QOF(FLONUM_DATA_OFFSET,REG(rcx)),REG(xmm1))
+       ucomisd         TW(ABS(flonum_zero),REG(xmm1))
+       je      asm_generic_divide_fail
+       divsd           TW(REG(xmm1),REG(xmm0))
+       jmp     asm_generic_flonum_result
+
+asm_generic_divide_fail:
+       OP(push,q)      REG(rbx)
+       OP(push,q)      REG(rdx)
+       OP(mov,b)       TW(IMM(HEX(23)),REG(al))
+       jmp     scheme_to_interface
+\f
+define_unary_operation(decrement,22,sub,subsd)
+define_unary_operation(increment,26,add,addsd)
  
-#define_unary_predicate(negative,2a,jl)
-#define_unary_predicate(positive,2c,jg)
-#define_unary_predicate(zero,2d,je)
+# define_unary_predicate(name,index,jcc)
+# define_unary_predicate(  $1,   $2, $3)
+define_unary_predicate(negative,2a,jl)
+define_unary_predicate(positive,2c,jg)
+define_unary_predicate(zero,2d,je)
  
-# define_binary_operation(name,index,op)
-# define_binary_operation(  $1,   $2,$3)
-#define_binary_operation(add,2b,add)
-#define_binary_operation(subtract,28,sub)
+# define_binary_operation(name,index,fxop,flop)
+# define_binary_operation(  $1,   $2,  $3,  $4)
+define_binary_operation(add,2b,add,addsd)
+define_binary_operation(subtract,28,sub,subsd)
  
  # No fixup -- leave it unshifted.
-#define_binary_operation_with_fixup(multiply,29,imul)
+define_binary_operation_with_fixup(multiply,29,imul,mulsd)
  
  # define_binary_predicate(name,index,jcc)
-#define_binary_predicate(equal,24,je)
-#define_binary_predicate(greater,25,jg)
-#define_binary_predicate(less,27,jl)
-
-# At the moment, there is no advantage to using the above code, and in
-# fact using it is a waste, since the compiler open-codes the fixnum
-# case already.  Later, the above code will also handle floating-point
-# arguments, which the compiler does not open-code.
-
-define_jump_indirection(generic_decrement,22)
-define_jump_indirection(generic_divide,23)
-define_jump_indirection(generic_equal,24)
-define_jump_indirection(generic_greater,25)
-define_jump_indirection(generic_increment,26)
-define_jump_indirection(generic_less,27)
-define_jump_indirection(generic_subtract,28)
-define_jump_indirection(generic_multiply,29)
-define_jump_indirection(generic_negative,2a)
-define_jump_indirection(generic_add,2b)
-define_jump_indirection(generic_positive,2c)
-define_jump_indirection(generic_zero,2d)
+# define_binary_predicate(  $1,   $2, $3)
+define_binary_predicate(equal,24,je)
+define_binary_predicate(greater,25,jg)
+define_binary_predicate(less,27,jl)
+
+#define_jump_indirection(generic_decrement,22)
+#define_jump_indirection(generic_divide,23)
+#define_jump_indirection(generic_equal,24)
+#define_jump_indirection(generic_greater,25)
+#define_jump_indirection(generic_increment,26)
+#define_jump_indirection(generic_less,27)
+#define_jump_indirection(generic_subtract,28)
+#define_jump_indirection(generic_multiply,29)
+#define_jump_indirection(generic_negative,2a)
+#define_jump_indirection(generic_add,2b)
+#define_jump_indirection(generic_positive,2c)
+#define_jump_indirection(generic_zero,2d)
  define_jump_indirection(generic_quotient,37)
  define_jump_indirection(generic_remainder,38)
  define_jump_indirection(generic_modulo,39)
author	Taylor R Campbell <campbell@mumble.net>
	Wed, 11 Nov 2009 20:24:13 +0000 (15:24 -0500)
committer	Taylor R Campbell <campbell@mumble.net>
	Wed, 11 Nov 2009 20:24:13 +0000 (15:24 -0500)
src/compiler/machines/x86-64/insmac.scm		patch \| blob \| history
src/compiler/machines/x86-64/instrf.scm		patch \| blob \| history
src/compiler/machines/x86-64/insutl.scm		patch \| blob \| history
src/compiler/machines/x86-64/lapgen.scm		patch \| blob \| history
src/compiler/machines/x86-64/machin.scm		patch \| blob \| history
src/compiler/machines/x86-64/rules1.scm		patch \| blob \| history
src/compiler/machines/x86-64/rulfix.scm		patch \| blob \| history
src/compiler/machines/x86-64/rulflo.scm		patch \| blob \| history
src/compiler/machines/x86-64/rulrew.scm		patch \| blob \| history
src/microcode/cmpauxmd/x86-64.m4		patch \| blob \| history