From: Taylor R Campbell <campbell@mumble.net>
Date: Thu, 12 Nov 2009 18:05:26 +0000 (-0500)
Subject: Fix bug in x86-64 instruction encoder: REX prefix must be last.
X-Git-Tag: 20100708-Gtk~249
X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=4e53fb3b334ae687c6804a7308037c615c656d8c;p=mit-scheme.git

Fix bug in x86-64 instruction encoder: REX prefix must be last.

(Thanks, AMD, for making it seem like the 66, F2, and F3 bytes are
part of the `opcode' all throughout the media instruction set
manual.)

Also change ([U]COMISF ...) to be ([U]COMIF S ...) for consistency.
---

diff --git a/src/compiler/machines/x86-64/instrf.scm b/src/compiler/machines/x86-64/instrf.scm
index 7e3021d0b..7f81f4bb4 100644
--- a/src/compiler/machines/x86-64/instrf.scm
+++ b/src/compiler/machines/x86-64/instrf.scm
@@ -78,16 +78,16 @@ USA.
                        (opcode (caddr form)))
                    `(define-instruction ,mnemonic
                       ((P D (XMM (? target)) (? source xmm/m-ea))
+                       (BITS (8 #x66))
                        (PREFIX (ModR/M target source))
-                       (BITS (8 #x66)
-                             (8 #x0F)
+                       (BITS (8 #x0F)
                              (8 #xD0))
                        (ModR/M target source))
 
                       ((P S (XMM (? target)) (? source xmm/m-ea))
+                       (BITS (8 #xF2))
                        (PREFIX (ModR/M target source))
-                       (BITS (8 #xF2)
-                             (8 #x0F)
+                       (BITS (8 #x0F)
                              (8 #xD0))
                        (ModR/M target source))))))))
   (define-packed-flop-instruction ADDSUBF       #xD0)
@@ -136,20 +136,21 @@ USA.
                  (let ((mnemonic (cadr form))
                        (opcode (caddr form)))
                    `(define-instruction ,mnemonic
-                      ((D (XMM (? source1)) (? source2 xmm/m-ea))
+                      ((S D (XMM (? source1)) (? source2 xmm/m-ea))
+                       (BITS (8 #x66))
                        (PREFIX (ModR/M source1 source2))
-                       (BITS (8 #x66)
-                             (8 #x0F)
+                       (BITS (8 #x0F)
                              (8 ,opcode))
                        (ModR/M source1 source2))
 
-                      ((S (XMM (? source1)) (? source2 xmm/m-ea))
+                      ((S S (XMM (? source1)) (? source2 xmm/m-ea))
                        (PREFIX (ModR/M source1 source2))
                        (BITS (8 #x0F)
                              (8 ,opcode))
                        (ModR/M source1 source2))))))))
-  (define-un/ordered-compare-instruction COMISF #x2F)
-  (define-un/ordered-compare-instruction UCOMISF #x2E))
+  ;; What does the `I' stand for?
+  (define-un/ordered-compare-instruction COMIF #x2F)
+  (define-un/ordered-compare-instruction UCOMIF #x2E))
 
 (let-syntax ((define-conversion-instruction
               (sc-macro-transformer
@@ -168,10 +169,15 @@ USA.
                                  (rules (cdddr rules-definition)))
                              (map (lambda (rule)
                                     (let ((conversion (car rule))
+                                          (pre-prefix
+                                           (if (cadr rule)
+                                               `((BITS (8 ,(cadr rule))))
+                                               '()))
                                           (bytes
                                            (map (lambda (byte) `(8 ,byte))
-                                                (cdr rule))))
+                                                (cddr rule))))
                                       `((,conversion ,@pattern)
+                                        ,@pre-prefix
                                         (PREFIX ,@prefix-options
                                                 (ModR/M reg ea))
                                         (BITS ,@bytes)
@@ -183,11 +189,11 @@ USA.
     (define-rules ((XMM (? reg)) (? ea xmm/m-ea))
         ()
       (DQ->PD   #xF3 #x0F #xE6)
-      (DQ->PS        #x0F #x5B)
+      (DQ->PS   #f   #x0F #x5B)
       (PD->DQ   #xF2 #x0F #xE6)
       (PD->PS   #x66 #x0F #x5A)
       (PS->DQ   #x66 #x0F #x5B)
-      (PS->PD        #x0F #x5A)
+      (PS->PD   #f   #x0F #x5A)
       (SD->SS   #xF2 #x0F #x5A)
       (SS->SD   #xF3 #x0F #x5A))
 
@@ -216,18 +222,18 @@ USA.
   (((? target xmm-ea)
     (&U (? size unsigned-5bit))
     (&U (? position unsigned-5bit)))
+   (BITS (8 #x66))
    (PREFIX (ModR/M target))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #x78))
    (ModR/M 0 target)
    (BITS (8 size UNSIGNED)
          (8 position UNSIGNED)))
 
   (((XMM (? target)) (? source xmm-ea))
+   (BITS (8 #x66))
    (PREFIX (ModR/M target source))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #x79))
    (ModR/M target source)))
 
@@ -236,18 +242,18 @@ USA.
     (? source xmm-ea)
     (&U (? size unsigned-5bit))
     (&U (? position unsigned-5bit)))
+   (BITS (8 #xF2))
    (PREFIX (ModR/M target source))
-   (BITS (8 #xF2)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #x78))
    (ModR/M target source)
    (BITS (8 size UNSIGNED)
          (8 position UNSIGNED)))
 
   (((XMM (? target)) (? source xmm-ea))
+   (BITS (8 #xF2))
    (PREFIX (ModR/M target source))
-   (BITS (8 #xF2)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #x79))
    (ModR/M target source)))
 
@@ -269,9 +275,9 @@ USA.
 
 (define-instruction LDDQU               ;Load Double Quadword Unaligned
   (((XMM (? target)) (? source m-ea))
+   (BITS (8 #xF2))
    (PREFIX (ModR/M target source))
-   (BITS (8 #xF2)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #xF0))
    (ModR/M target source)))
 
@@ -284,9 +290,9 @@ USA.
 
 (define-instruction MASKMOVDQU
   (((@R 7) (XMM (? source1)) (? source2 xmm-ea))
+   (BITS (8 #x66))
    (PREFIX (ModR/M source1 source2))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #xF7))
    (ModR/M source1 source2)))
 
@@ -306,24 +312,24 @@ USA.
 (define-instruction MOVD
   ;++ SIZE can be only L or Q, not W.
   (((? size operand-size) (XMM (? target)) (? source r/m-ea))
+   (BITS (8 #x66))
    (PREFIX (OPERAND size) (ModR/M target source))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #x6E))
    (ModR/M target source))
 
   (((? size operand-size) (? target r/m-ea) (XMM (? source)))
+   (BITS (8 #x66))
    (PREFIX (OPERAND size) (ModR/M source target))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #x7E))
    (ModR/M source target)))
 
 (define-instruction MOVDDUP
   (((XMM (? target)) (? source xmm/m-ea))
+   (BITS (8 #xF2))
    (PREFIX (ModR/M target source))
-   (BITS (8 #xF2)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #x12))
    (ModR/M target source)))
 
@@ -336,16 +342,16 @@ USA.
 
                    `(define-instruction ,mnemonic
                       (((XMM (? target)) (? source xmm/m-ea))
+                       (BITS (8 ,prefix))
                        (PREFIX (ModR/M target source))
-                       (BITS (8 ,prefix)
-                             (8 #x0F)
+                       (BITS (8 #x0F)
                              (8 #x6F))
                        (ModR/M target source))
 
                       (((? target xmm/m-ea) (XMM (? source)))
+                       (BITS (8 ,prefix))
                        (PREFIX (ModR/M source target))
-                       (BITS (8 ,prefix)
-                             (8 #x0F)
+                       (BITS (8 #x0F)
                              (8 #x7F))
                        (ModR/M source target))))))))
   (define-move-dq-instruction MOVDQA #x66)
@@ -369,16 +375,16 @@ USA.
 
                       (define-instruction ,MOVx
                         ((P D (XMM (? target)) (? source m-ea))
+                         (BITS (8 #x66))
                          (PREFIX (ModR/M target source))
-                         (BITS (8 #x66)
-                               (8 #x0F)
+                         (BITS (8 #x0F)
                                (8 ,opcode2))
                          (ModR/M target source))
 
                         ((P D (? target m-ea) (XMM (? source)))
+                         (BITS (8 #x66))
                          (PREFIX (ModR/M source target))
-                         (BITS (8 #x66)
-                               (8 #x0F)
+                         (BITS (8 #x0F)
                                (8 ,(+ opcode2 1)))
                          (ModR/M source target))
 
@@ -424,16 +430,16 @@ USA.
 ;; Note: (MOVQ ...) is very different from (MOV Q ...)!
 (define-instruction MOVQ
   (((XMM (? target)) (? source xmm/m-ea))
+   (BITS (8 #xF3))
    (PREFIX (ModR/M target source))
-   (BITS (8 #xF3)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #x7E))
    (ModR/M target source))
 
   (((? target xmm/m-ea) (XMM (? source)))
+   (BITS (8 #x66))
    (PREFIX (ModR/M source target))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #xD6))
    (ModR/M source target)))
 
@@ -461,9 +467,9 @@ USA.
                        (opcode (caddr form)))
                    `(define-instruction ,mnemonic
                       (((XMM (? target)) (? source xmm/m-ea))
+                       (BITS (8 #xF3))
                        (PREFIX (ModR/M target source))
-                       (BITS (8 #xF3)
-                             (8 #x0F)
+                       (BITS (8 #x0F)
                              (8 ,opcode))
                        (ModR/M target source))))))))
   (define-mov/dup-instruction MOVSHDUP #x16)
@@ -492,9 +498,9 @@ USA.
                                (let ((size (car size/opcode))
                                      (opcode (cadr size/opcode)))
                                  `((,size (XMM (? target)) (? source xmm/m-ea))
+                                   (BITS (8 #x66))
                                    (PREFIX (ModR/M target source))
-                                   (BITS (8 #x66)
-                                         (8 #x0F)
+                                   (BITS (8 #x0F)
                                          (8 ,opcode))
                                    (ModR/M target source))))
                              size/opcode-list)))))))
@@ -524,9 +530,9 @@ USA.
                        (opcode (caddr form)))
                    `(define-instruction ,mnemonic
                       (((XMM (? target)) (? source xmm/m-ea))
+                       (BITS (8 #x66))
                        (PREFIX (ModR/M target source))
-                       (BITS (8 #x66)
-                             (8 #x0F)
+                       (BITS (8 #x0F)
                              (8 ,opcode))
                        (ModR/M target source))))))))
   (define-packed-instruction PAND #xDB)
@@ -539,67 +545,67 @@ USA.
 
 (define-instruction PEXTR
   ((W (R (? target)) (? source xmm-ea) (&U (? position unsigned-3bit)))
+   (BITS (8 #x66))
    (PREFIX (ModR/M target source))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #xC5))
    (ModR/M target source)
    (BITS (8 position UNSIGNED))))
 
 (define-instruction PINSR
   ((W (XMM (? target)) (? source r-ea) (&U (? position unsigned-3bit)))
+   (BITS (8 #x66))
    (PREFIX (ModR/M target source))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #xC4))
    (ModR/M target source)
    (BITS (8 position UNSIGNED))))
 
 (define-instruction PMOVMSKB
   (((R (? target)) (? source xmm-ea))
+   (BITS (8 #x66))
    (PREFIX (ModR/M target source))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #xD7))
    (ModR/M target source)))
 
 (define-instruction PMULH
   ((W (XMM (? target)) (? source xmm/m-ea))
+   (BITS (8 #x66))
    (PREFIX (ModR/M target source))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #xE5))
    (ModR/M target source))
 
   ((UW (XMM (? target)) (? source xmm/m-ea))
+   (BITS (8 #x66))
    (PREFIX (ModR/M target source))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #xE4))
    (ModR/M target source)))
 
 (define-instruction PMULL
   ((W (XMM (? target)) (? source xmm/m-ea))
+   (BITS (8 #x66))
    (PREFIX (ModR/M target source))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #xD5))
    (ModR/M target source)))
 
 (define-instruction PSHUF
   ((L (XMM (? target)) (? source xmm/m-ea) (&U (? wibblethwop unsigned-byte)))
+   (BITS (8 #x66))
    (PREFIX (ModR/M target source))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #x70))
    (ModR/M target source)
    (BITS (8 wibblethwop))))
 
 (define-instruction PSHUFH
   ((W (XMM (? target)) (? source xmm/m-ea) (&U (? zob unsigned-byte)))
+   (BITS (8 #xF3))
    (PREFIX (ModR/M target source))
-   (BITS (8 #xF3)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #x70))
    (ModR/M target source)
    (BITS (8 zob))))
@@ -609,9 +615,9 @@ USA.
 
 (define-instruction PSHUFL
   ((W (XMM (? target)) (? source xmm/m-ea) (&U (? veeblefitzer unsigned-byte)))
+   (BITS (8 #xF3))
    (PREFIX (ModR/M target source))
-   (BITS (8 #xF3)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 #x70))
    (ModR/M target source)
    (BITS (8 veeblefitzer))))
@@ -627,9 +633,9 @@ USA.
                        (size/opcode-list (cddddr form)))
                    `(define-instruction ,mnemonic
                       ((DQ (? target xmm-ea) (&U count unsigned-byte))
+                       (BITS (8 #x66))
                        (PREFIX (ModR/M target))
-                       (BITS (8 #x66)
-                             (8 #x0F)
+                       (BITS (8 #x0F)
                              (8 #x73))
                        (ModR/M ,dq-digit target)
                        (BITS (8 count UNSIGNED)))
@@ -637,9 +643,9 @@ USA.
                       (((? size operand-size)
                         (? target xmm-ea)
                         (&U (? count unsigned-byte)))
+                       (BITS (8 #x66))
                        (PREFIX (ModR/M target))
-                       (BITS (8 #x66)
-                             (8 #x0F)
+                       (BITS (8 #x0F)
                              (8 (case size ((Q) #x73) ((L) #x72) ((W) #x71))))
                        (ModR/M ,digit target)
                        (BITS (8 count UNSIGNED)))
@@ -647,9 +653,9 @@ USA.
                       (((? size operand-size)
                         (XMM (? target))
                         (? source xmm/m-ea))
+                       (BITS (8 #x66))
                        (PREFIX (ModR/M target source))
-                       (BITS (8 #x66)
-                             (8 #x0F)
+                       (BITS (8 #x0F)
                              (8 (case size
                                   ((Q) ,(+ opcode 1))
                                   ((L) ,(+ opcode 2))
@@ -661,17 +667,17 @@ USA.
 (define-instruction PSRA
   ;++ This does not admit an operand size of Q.
   (((? size operand-size) (? target xmm-ea) (&U (? count unsigned-byte)))
+   (BITS (8 #x66))
    (PREFIX (ModR/M target))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 (case size ((L) #x72) ((W) #x71))))
    (ModR/M 4 target)
    (BITS (8 count UNSIGNED)))
 
   (((? size operand-size) (XMM (? target)) (? source xmm/m-ea))
+   (BITS (8 #x66))
    (PREFIX (ModR/M target source))
-   (BITS (8 #x66)
-         (8 #x0F)
+   (BITS (8 #x0F)
          (8 (case size ((L) #xE2) ((W) #xE1))))
    (ModR/M target source)))
 
diff --git a/src/compiler/machines/x86-64/insutl.scm b/src/compiler/machines/x86-64/insutl.scm
index fa3ab3b2a..84da6be97 100644
--- a/src/compiler/machines/x86-64/insutl.scm
+++ b/src/compiler/machines/x86-64/insutl.scm
@@ -253,60 +253,57 @@ USA.
     ((D S) s)
     (else #f)))
 
+;;; The REX prefix must come last, just before the `actual' opcode.
+
 (define (cons-prefix operand-size register ea tail)
-  (let ((tail
-	 (if (eq? operand-size 'W)
-	     (cons-syntax (syntax-evaluation #x66 coerce-8-bit-unsigned)
-			  tail)
-	     tail)))
-    ((lambda (rex-prefix)
-       (if (zero? rex-prefix)
-	   tail
-	   (cons-syntax (syntax-evaluation rex-prefix coerce-8-bit-unsigned)
-			tail)))
-     (fix:or
-      (case operand-size
-	;; B must be handled separately; there is no prefix for it.
-	;; W is handled with a #x66 prefix.
-	;; L is the default.
-	((#F W L) 0)
-	((Q) #x48)
-	(else (error "Invalid operand size:" operand-size)))
-      (let ((extended-register?
-	     (or (eqv? register #t)
-		 (and register (>= register 8)))))
-	(if ea
-	    (fix:or (if extended-register? #x44 0) (ea/rex-prefix ea))
-	    (if extended-register? #x41 0)))))))
-
-;;; The SSE instructions don't consistently use this pattern for their
-;;; opcodes, but enough of them do that this approximate abstraction
-;;; helps to clarify the instruction syntax.
+  ((lambda (tail)
+     (if (eq? operand-size 'W)
+	 (cons-syntax (syntax-evaluation #x66 coerce-8-bit-unsigned)
+		      tail)
+	 tail))
+   ((lambda (rex-prefix)
+      (if (zero? rex-prefix)
+	  tail
+	  (cons-syntax (syntax-evaluation rex-prefix coerce-8-bit-unsigned)
+		       tail)))
+    (fix:or
+     (case operand-size
+       ;; B must be handled separately; there is no prefix for it.
+       ;; W is handled with a #x66 prefix.
+       ;; L is the default.
+       ((#F W L) 0)
+       ((Q) #x48)
+       (else (error "Invalid operand size:" operand-size)))
+     (let ((extended-register?
+	    (or (eqv? register #t)
+		(and register (>= register 8)))))
+       (if ea
+	   (fix:or (if extended-register? #x44 0) (ea/rex-prefix ea))
+	   (if extended-register? #x41 0)))))))
 
 (define (cons-float-prefix register ea packed/scalar precision tail)
-  (let* ((tail
-	  (let ((float (list packed/scalar precision)))
-	    (if (equal? float '(P S))
-		tail
-		(cons
-		 (syntax-evaluation
-		  (cond ((equal? float '(P D)) #x66)
-			((equal? float '(S D)) #xF2)
-			((equal? float '(S S)) #xF3)
-			(else (error "Bad float type:" float)))
-		  coerce-8-bit-unsigned)
-		 tail))))
-	 (rex-prefix
+  ((lambda (tail)
+     (let ((float (list packed/scalar precision)))
+       (if (equal? float '(P S))
+	   tail
+	   (cons-syntax (syntax-evaluation
+			 (cond ((equal? float '(P D)) #x66)
+			       ((equal? float '(S D)) #xF2)
+			       ((equal? float '(S S)) #xF3)
+			       (else (error "Bad float type:" float)))
+			 coerce-8-bit-unsigned)
+			tail))))
+   (let ((rex-prefix
 	  (let ((extended-register?
 		 (or (eqv? register #t)
 		     (and register (>= register 8)))))
 	    (if ea
 		(fix:or (if extended-register? #x44 0) (ea/rex-prefix ea))
 		(if extended-register? #x41 0)))))
-    (if (zero? rex-prefix)
-	tail
-	(cons-syntax (syntax-evaluation rex-prefix coerce-8-bit-unsigned)
-		     tail))))
+     (if (zero? rex-prefix)
+	 tail
+	 (cons-syntax (syntax-evaluation rex-prefix coerce-8-bit-unsigned)
+		      tail)))))
 
 (define-integrable (register-bits r)
   (fix:and r #b111))
diff --git a/src/compiler/machines/x86-64/rulflo.scm b/src/compiler/machines/x86-64/rulflo.scm
index e5ca59fb8..03ada7501 100644
--- a/src/compiler/machines/x86-64/rulflo.scm
+++ b/src/compiler/machines/x86-64/rulflo.scm
@@ -351,7 +351,7 @@ USA.
                               (LAP (JBE (@PCR ,label))))))
     (else
      (error "flonum-branch!: Unknown predicate" predicate)))
-  (LAP (UCOMISF D ,source1 ,source2)))
+  (LAP (UCOMIF S D ,source1 ,source2)))
                                    
 (define-rule statement
   (ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (CONSTANT (? fp-value))))