(sc-macro-transformer
(lambda (form environment)
environment
- (if (syntax-match? '(IDENTIFIER ? SYMBOL) (cdr form))
+ (if (syntax-match? '(IDENTIFIER * SYMBOL) (cdr form))
`(DEFINE (,(cadr form) EXPRESSION)
(LET ((MATCH-RESULT (PATTERN-LOOKUP ,ea-database-name EXPRESSION)))
(AND MATCH-RESULT
- ,(if (pair? (cddr form))
- `(LET ((EA (MATCH-RESULT)))
- (AND (MEMQ ',(caddr form) (EA/CATEGORIES EA))
- EA))
- `(MATCH-RESULT)))))
+ ,(let ((categories (cddr form)))
+ (if (pair? categories)
+ `(LET ((EA (MATCH-RESULT)))
+ (AND
+ (OR
+ ,@(map (lambda (category)
+ `(MEMQ ',category (EA/CATEGORIES EA)))
+ categories))
+ EA))
+ `(MATCH-RESULT))))))
(ill-formed-syntax form)))))
\f
(define (parse-categories categories environment context)
(if (not (and (pair? categories)
(eq? 'CATEGORIES (car categories))
(pair? (cdr categories))
- (memq (cadr categories) '(REGISTER MEMORY))
+ (memq (cadr categories) '(REGISTER MEMORY XMM))
(null? (cddr categories))))
(error "Malformed CATEGORIES for effective address rule:"
categories
(values tail 0))))
(define (collect-prefix options tail environment)
- (let loop ((options options) (operand #f) (register #f) (r/m #f))
+ (let loop ((options options) (operand #f) (register #f) (r/m #f) (float #f))
(if (pair? options)
(case (caar options)
- ((OPERAND) (loop (cdr options) (cadar options) register r/m))
+ ((OPERAND) (loop (cdr options) (cadar options) register r/m float))
((OPCODE-REGISTER)
(loop (cdr options)
operand
(or (not (pair? (cdar options))) (cadar options))
- r/m))
+ r/m
+ float))
((ModR/M)
;; (ModR/M <r/m>), for fixed digits
;; (ModR/M <reg> <r/m>), for registers
- (if (pair? (cddar options))
- (loop (cdr options) operand (cadar options) (caddar options))
- (loop (cdr options) operand #f (cadar options))))
+ (receive (register r/m)
+ (if (pair? (cddar options))
+ (values (cadar options) (caddar options))
+ (values #f (cadar options)))
+ (loop (cdr options) operand register r/m float)))
+ ((FLOAT)
+ ;; (FLOAT <scalar/packed> <single/double>)
+ (loop (cdr options) operand register r/m (cdar options)))
(else (error "Bad instruction prefix option:" (car options))))
- (let ((cons-prefix (close-syntax 'CONS-PREFIX environment)))
- `(,cons-prefix ,operand ,register ,r/m ,tail)))))
+ (if float
+ (let ((cons-float-prefix
+ (close-syntax 'CONS-FLOAT-PREFIX environment)))
+ (if operand
+ (error "Float instructions can't have operand size prefix:"
+ operand))
+ `(,cons-float-prefix ,register ,r/m ,(car float) ,(cadr float)
+ ,tail))
+ (let ((cons-prefix (close-syntax 'CONS-PREFIX environment)))
+ `(,cons-prefix ,operand ,register ,r/m ,tail))))))
(define (collect-ModR/M field tail environment)
(let ((digit-or-reg (car field))
|#
-;;;; Intel i387/i486 Instruction Set
+;;;; AMD x86-64 128-bit Media Instruction Set
;;; package: (compiler lap-syntaxer)
-(declare (usual-integrations))
+;;; The mnemonics here don't entirely match the ones in the AMD
+;;; manual, or in your typical x86-64 assembler. These mnemonics try
+;;; to adhere to a convention of treating operand sizes, precisions,
+;;; and packed/scalar choices as arguments to a common mnemonic, where
+;;; it is sensible for there to be a choice. Sometimes this is not
+;;; entirely clear, such as PSHUF (which works only with longword-size
+;;; (32-bit) operands) and PSHUFH/PSHUFL (which work only with
+;;; word-size (16-bit) operands). And sometimes this doesn't work
+;;; very well (e.g., MOVQ). Most instructions for floating-point
+;;; arithmetic have F suffixed to their names; e.g., rather than
+;;; ADDSS, ADDSD, ADDPS, and ADDPD, there's a single ADDF mnemonic, to
+;;; be used as (ADDF S S ...), (ADDF S D ...), &c.
+;;;
+;;; Would it have been better just to transcribe exactly the mnemonics
+;;; in the AMD manual? Perhaps, and it might have caused fewer errors
+;;; in transcription, since there would be fewer different formats
+;;; that way.
-#|
+(declare (usual-integrations))
\f
(let-syntax
- ((define-binary-flonum
- (sc-macro-transformer
- (lambda (form environment)
- environment
- (let ((mnemonic (list-ref form 1))
- (pmnemonic (list-ref form 2))
- (imnemonic (list-ref form 3))
- (digit (list-ref form 4))
- (opcode1 (list-ref form 5))
- (opcode2 (list-ref form 6)))
- `(begin
- (define-instruction ,mnemonic
- (((ST 0) (ST (? i)))
- (BYTE (8 #xd8)
- (8 (+ ,opcode1 i))))
-
- (((ST (? i)) (ST 0))
- (BYTE (8 #xdc)
- (8 (+ ,opcode2 i))))
-
- (()
- (BYTE (8 #xde)
- (8 (+ ,opcode2 1))))
-
- ((D (? source mW))
- (BYTE (8 #xdc))
- (ModR/M ,digit source))
-
- ((S (? source mW))
- (BYTE (8 #xd8))
- (ModR/M ,digit source)))
-
- (define-instruction ,pmnemonic
- (((ST (? i)) (ST 0))
- (BYTE (8 #xde)
- (8 (+ ,opcode2 i)))))
-
- (define-instruction ,imnemonic
- ((L (? source mW))
- (BYTE (8 #xda))
- (ModR/M ,digit source))
-
- ((H (? source mW))
- (BYTE (8 #xde))
- (ModR/M ,digit source)))))))))
-
- ;; The i486 book (and 387, etc.) has inconsistent instruction
- ;; descriptions and opcode assignments for FSUB and siblings,
- ;; and FDIV and siblings.
- ;; FSUB ST(i),ST is described as replacing ST(i) with ST-ST(i)
- ;; while the opcode described replaces ST(i) with ST(i)-ST.
-
- ;; In the following, the F% forms follow the descriptions in the
- ;; book, namely, F%SUB computes ST-ST(i) and F%SUBR computes
- ;; ST(i)-ST, storing into their destination (first) argument.
-
- ;; The %-less forms follow the opcodes and usual convention,
- ;; namely FSUB computes destination (first) argument - source
- ;; argument FSUBR computes source - destination.
-
- (define-binary-flonum FADD FADDP FIADD 0 #xc0 #xc0)
- (define-binary-flonum F%DIV F%DIVP F%IDIV 6 #xf0 #xf0)
- (define-binary-flonum F%DIVR F%DIVPR F%IDIVR 7 #xf8 #xf8)
- (define-binary-flonum FDIV FDIVP FIDIV 6 #xf0 #xf8)
- (define-binary-flonum FDIVR FDIVPR FIDIVR 7 #xf8 #xf0)
- (define-binary-flonum FMUL FMULP FIMUL 1 #xc8 #xc8)
- (define-binary-flonum F%SUB F%SUBP F%ISUB 4 #xe0 #xe0)
- (define-binary-flonum F%SUBR F%SUBPR F%ISUBR 5 #xe8 #xe8)
- (define-binary-flonum FSUB FSUBP FISUB 4 #xe0 #xe8)
- (define-binary-flonum FSUBR FSUBPR FISUBR 5 #xe8 #xe0))
+ ((define-flop-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment ;ignore
+ (let ((mnemonic (cadr form))
+ (opcode (caddr form)))
+ `(define-instruction ,mnemonic
+ (((? p/s float-packed/scalar)
+ (? p float-precision)
+ (XMM (? target))
+ (? source xmm/m-ea))
+ (PREFIX (FLOAT p/s p) (ModR/M target source))
+ (BITS (8 #x0F)
+ (8 ,opcode))
+ (ModR/M target source))))))))
+ (define-flop-instruction ADDF #x58)
+ (define-flop-instruction DIVF #x5E)
+ (define-flop-instruction MAXF #x5F)
+ (define-flop-instruction MINF #x5D)
+ (define-flop-instruction MULF #x59)
+ (define-flop-instruction SQRTF #x51)
+ (define-flop-instruction SUBF #x5C))
+
+(let-syntax ((define-packed-flop-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment ;ignore
+ (let ((mnemonic (cadr form))
+ (opcode (caddr form)))
+ `(define-instruction ,mnemonic
+ ((P D (XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #xD0))
+ (ModR/M target source))
+
+ ((P S (XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #xF2)
+ (8 #x0F)
+ (8 #xD0))
+ (ModR/M target source))))))))
+ (define-packed-flop-instruction ADDSUBF #xD0)
+ (define-packed-flop-instruction HADDF #x7C)
+ (define-packed-flop-instruction HSUBF #x7D))
\f
-(define-trivial-instruction F2XM1 #xd9 #xf0)
-(define-trivial-instruction FABS #xd9 #xe1)
-
-(define-instruction FBLD
- (((? source mW))
- (BYTE (8 #xd8))
- (ModR/M 4 source)))
-
-(define-instruction FBSTP
- (((? target mW))
- (BYTE (8 #xdf))
- (ModR/M 6 target)))
-
-(define-trivial-instruction FCHS #xd9 #xe0)
-(define-trivial-instruction FCLEX #x9b #xdb #xe2) ; = (FWAIT) (FNCLEX)
-(define-trivial-instruction FNCLEX #xdb #xe2)
-
-(let-syntax
- ((define-flonum-comparison
- (sc-macro-transformer
- (lambda (form environment)
- environment
- (let ((mnemonic (cadr form))
- (digit (caddr form))
- (opcode (cadddr form)))
- `(define-instruction ,mnemonic
- (((ST 0) (ST (? i)))
- (BYTE (8 #xd8)
- (8 (+ ,opcode i))))
-
- (()
- (BYTE (8 #xd8)
- (8 (+ ,opcode 1))))
-
- ((D (? source mW))
- (BYTE (8 #xdc))
- (ModR/M ,digit source))
-
- ((S (? source mW))
- (BYTE (8 #xd8))
- (ModR/M ,digit source))))))))
-
- (define-flonum-comparison FCOM 2 #xd0)
- (define-flonum-comparison FCOMP 3 #xd8))
-
-(define-trivial-instruction FCOMPP #xde #xd9)
-(define-trivial-instruction FCOS #xd9 #xff)
-(define-trivial-instruction FDECSTP #xd9 #xf6)
-
-(define-instruction FFREE
- (((ST (? i)))
- (BYTE (8 #xdd)
- (8 (+ #xc0 i)))))
-
-(let-syntax
- ((define-flonum-integer-comparison
- (sc-macro-transformer
- (lambda (form environment)
- environment
- (let ((mnemonic (cadr form))
- (digit (caddr form)))
- `(define-instruction ,mnemonic
- ((L (? source mW))
- (BYTE (8 #xda))
- (ModR/M ,digit source))
-
- ((H (? source mW))
- (BYTE (8 #xde))
- (ModR/M ,digit source))))))))
-
- (define-flonum-integer-comparison FICOM 2)
- (define-flonum-integer-comparison FICOMP 3))
+(let-syntax ((define-packed-bitwise-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment ;ignore
+ (let ((mnemonic (cadr form))
+ (opcode (caddr form)))
+ `(define-instruction ,mnemonic
+ ((P
+ (? p float-precision)
+ (XMM (? target))
+ (? source xmm/m-ea))
+ (PREFIX (FLOAT 'P p) (ModR/M target source))
+ (BITS (8 #x0F)
+ (8 ,opcode))
+ (ModR/M target source))))))))
+ (define-packed-bitwise-instruction ANDNF #x55)
+ (define-packed-bitwise-instruction ANDF #x54)
+ (define-packed-bitwise-instruction ORF #x56)
+ (define-packed-bitwise-instruction XORF #x57)
+ ;; Not really bitwise instruction, but these two fit the pattern.
+ (define-packed-bitwise-instruction UNPCKHF #x15)
+ (define-packed-bitwise-instruction UNPCKLF #x15))
+
+(define-instruction CMPF
+ (((? comparator float-comparator)
+ (? p/s float-packed/scalar)
+ (? p float-precision)
+ (XMM (? source1))
+ (? source2 xmm/m-ea))
+ (PREFIX (FLOAT p/s p) (ModR/M source1 source2))
+ (BITS (8 #x0F)
+ (8 #xC2))
+ (ModR/M source1 source2)
+ (BITS (8 comparator))))
+
+(let-syntax ((define-un/ordered-compare-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment ;ignore
+ (let ((mnemonic (cadr form))
+ (opcode (caddr form)))
+ `(define-instruction ,mnemonic
+ ((D (XMM (? source1)) (? source2 xmm/m-ea))
+ (PREFIX (ModR/M source1 source2))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 ,opcode))
+ (ModR/M source1 source2))
+
+ ((S (XMM (? source1)) (? source2 xmm/m-ea))
+ (PREFIX (ModR/M source1 source2))
+ (BITS (8 #x0F)
+ (8 ,opcode))
+ (ModR/M source1 source2))))))))
+ (define-un/ordered-compare-instruction COMISF #x2F)
+ (define-un/ordered-compare-instruction UCOMISF #x2E))
\f
-(let-syntax
- ((define-flonum-integer-memory
- (sc-macro-transformer
- (lambda (form environment)
- environment
- (let ((mnemonic (cadr form))
- (digit1 (caddr form))
- (digit2 (cadddr form)))
- `(define-instruction ,mnemonic
- ,@(if (not digit2)
- `()
- `(((Q (? source mW))
- (BYTE (8 #xdf))
- (ModR/M ,digit2 source))))
-
- ((L (? source mW))
- (BYTE (8 #xdb))
- (ModR/M ,digit1 source))
-
- ((H (? source mW))
- (BYTE (8 #xdf))
- (ModR/M ,digit1 source))))))))
-
- (define-flonum-integer-memory FILD 0 5)
- (define-flonum-integer-memory FIST 2 #f)
- (define-flonum-integer-memory FISTP 3 7))
-
-(define-trivial-instruction FINCSTP #xd9 #xf7)
-(define-trivial-instruction FINIT #x9b #xdb #xe3) ; = (FWAIT) (FNINT)
-(define-trivial-instruction FNINIT #xdb #xe3)
-
-(let-syntax
- ((define-flonum-memory
- (sc-macro-transformer
- (lambda (form environment)
- environment
- (let ((mnemonic (list-ref form 1))
- (digit1 (list-ref form 2))
- (digit2 (list-ref form 3))
- (opcode1 (list-ref form 4))
- (opcode2 (list-ref form 5)))
- `(define-instruction ,mnemonic
- (((ST (? i)))
- (BYTE (8 ,opcode1)
- (8 (+ ,opcode2 i))))
-
- ((D (? operand mW))
- (BYTE (8 #xdd))
- (ModR/M ,digit1 operand))
-
- ((S (? operand mW))
- (BYTE (8 #xd9))
- (ModR/M ,digit1 operand))
-
- ,@(if (not digit2)
- `()
- `(((X (? operand mW))
- (BYTE (8 #xdb))
- (ModR/M ,digit2 operand))))))))))
-
- (define-flonum-memory FLD 0 5 #xd9 #xc0)
- (define-flonum-memory FST 2 #f #xdd #xd0)
- (define-flonum-memory FSTP 3 7 #xdd #xd8))
-
-(define-trivial-instruction FLD1 #xd9 #xe8)
-(define-trivial-instruction FLDL2T #xd9 #xe9)
-(define-trivial-instruction FLDL2E #xd9 #xea)
-(define-trivial-instruction FLDPI #xd9 #xeb)
-(define-trivial-instruction FLDLG2 #xd9 #xec)
-(define-trivial-instruction FLDLN2 #xd9 #xed)
-(define-trivial-instruction FLDZ #xd9 #xee)
+(let-syntax ((define-conversion-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment ;ignore
+ (let ((mnemonic (cadr form))
+ (rules-definitions (cddr form)))
+ `(define-instruction ,mnemonic
+ ,@(append-map
+ (lambda (rules-definition)
+ (if (not (eq? (car rules-definition) 'DEFINE-RULES))
+ (error "Malformed conversion rules definition:"
+ rules-definition))
+ (let ((pattern (cadr rules-definition))
+ (prefix-options (caddr rules-definition))
+ (rules (cdddr rules-definition)))
+ (map (lambda (rule)
+ (let ((conversion (car rule))
+ (bytes
+ (map (lambda (byte) `(8 ,byte))
+ (cdr rule))))
+ `((,conversion ,@pattern)
+ (PREFIX ,@prefix-options
+ (ModR/M reg ea))
+ (BITS ,@bytes)
+ (ModR/M reg ea))))
+ rules)))
+ rules-definitions)))))))
+
+ (define-conversion-instruction CVTF
+ (define-rules ((XMM (? reg)) (? ea xmm/m-ea))
+ ()
+ (DQ->PD #xF3 #x0F #xE6)
+ (DQ->PS #x0F #x5B)
+ (PD->DQ #xF2 #x0F #xE6)
+ (PD->PS #x66 #x0F #x5A)
+ (PS->DQ #x66 #x0F #x5B)
+ (PS->PD #x0F #x5A)
+ (SD->SS #xF2 #x0F #x5A)
+ (SS->SD #xF3 #x0F #x5A))
+
+ ;++ SIZE can be only L or Q, not W.
+ (define-rules ((? size operand-size) (R (? reg)) (? ea xmm/m-ea))
+ ((OPERAND size))
+ (SD->SI #xF2 #x0F #x2D)
+ (SS->SI #xF3 #x0F #x2D))
+
+ (define-rules ((? size operand-size) (XMM (? reg)) (? ea r/m-ea))
+ ((OPERAND size))
+ (SI->SD #xF2 #x0F #x2A)
+ (SI->SS #xF3 #x0F #x2A)))
+
+ (define-conversion-instruction CVTFT ;Convert Truncated
+ (define-rules ((XMM (? reg)) (? ea xmm/m-ea))
+ ()
+ (PD->DQ #x66 #x0F #xE6)
+ (PS->DQ #xF3 #x0F #x5B))
+ (define-rules ((? size operand-size) (R (? reg)) (? ea xmm/m-ea))
+ ()
+ (SD->SI #xF2 #x0F #x2C)
+ (SS->SI #xF3 #x0F #x2C))))
\f
-(let-syntax
- ((define-flonum-state
- (sc-macro-transformer
- (lambda (form environment)
- environment
- (let ((mnemonic (list-ref form 1))
- (opcode (list-ref form 2))
- (digit (list-ref form 3))
- (mnemonic2 (list-ref form 4)))
- `(begin
- ,@(if (not mnemonic2)
- `()
- `((define-instruction ,mnemonic2
- (((? source mW))
- (BYTE (8 #x9b) ; (FWAIT)
- (8 ,opcode))
- (ModR/M ,digit source)))))
-
- (define-instruction ,mnemonic
- (((? source mW))
- (BYTE (8 ,opcode))
- (ModR/M ,digit source)))))))))
-
- (define-flonum-state FNLDCW #xd9 5 FLDCW)
- (define-flonum-state FLDENV #xd9 4 #f)
- (define-flonum-state FNSTCW #xd9 7 FSTCW)
- (define-flonum-state FNSTENV #xd9 6 FSTENV)
- (define-flonum-state FRSTOR #xdb 4 #f)
- (define-flonum-state FNSAVE #xdd 6 FSAVE))
-
-(define-trivial-instruction FNOP #xd9 #xd0)
-(define-trivial-instruction FPATAN #xd9 #xf3)
-(define-trivial-instruction FPREM #xd9 #xf8) ; truncating remainder
-(define-trivial-instruction FPREM1 #xd9 #xf5) ; IEEE remainder
-(define-trivial-instruction FPTAN #xd9 #xf2)
-(define-trivial-instruction FRNDINT #xd9 #xfc)
-(define-trivial-instruction FSCALE #xd9 #xfd)
-(define-trivial-instruction FSIN #xd9 #xfe)
-(define-trivial-instruction FSINCOS #xd9 #xfb)
-(define-trivial-instruction FSQRT #xd9 #xfa)
-
-(define-instruction FSTSW
- (((? target mW))
- (BYTE (8 #x9b) ; (FWAIT)
- (8 #xdf))
- (ModR/M 7 target))
-
- (((R 0))
- (BYTE (8 #x9b) ; (FWAIT)
- (8 #xdf)
- (8 #xe0))))
-
-(define-instruction FNSTSW
- (((? target mW))
- (BYTE (8 #xdf))
- (ModR/M 7 target))
-
- (((R 0))
- (BYTE (8 #xdf)
- (8 #xe0))))
+(define-instruction EXTRQ ;SSE4A only
+ (((? target xmm-ea)
+ (&U (? size unsigned-5bit))
+ (&U (? position unsigned-5bit)))
+ (PREFIX (ModR/M target))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #x78))
+ (ModR/M 0 target)
+ (BITS (8 size UNSIGNED)
+ (8 position UNSIGNED)))
+
+ (((XMM (? target)) (? source xmm-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #x79))
+ (ModR/M target source)))
+
+(define-instruction INSERTQ ;SSE4A only
+ (((XMM (? target))
+ (? source xmm-ea)
+ (&U (? size unsigned-5bit))
+ (&U (? position unsigned-5bit)))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #xF2)
+ (8 #x0F)
+ (8 #x78))
+ (ModR/M target source)
+ (BITS (8 size UNSIGNED)
+ (8 position UNSIGNED)))
+
+ (((XMM (? target)) (? source xmm-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #xF2)
+ (8 #x0F)
+ (8 #x79))
+ (ModR/M target source)))
+
+(define-instruction FXRSTOR
+ (((? source m-ea))
+ (PREFIX (ModR/M source))
+ (BITS (8 #x0F)
+ (8 #xAE))
+ (ModR/M 1 source)))
+
+(define-instruction FXSAVE
+ (((? target m-ea))
+ (PREFIX (ModR/M target))
+ (BITS (8 #x0F)
+ (8 #xAE))
+ (ModR/M 0 target)))
+
+;;; How does LDDQU differ from MOVDQU?
+
+(define-instruction LDDQU ;Load Double Quadword Unaligned
+ (((XMM (? target)) (? source m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #xF2)
+ (8 #x0F)
+ (8 #xF0))
+ (ModR/M target source)))
+
+(define-instruction LDMXCSR
+ (((? source m-ea))
+ (PREFIX (ModR/M source))
+ (BITS (8 #x0F)
+ (8 #xAE))
+ (ModR/M 2 source)))
\f
-(define-trivial-instruction FTST #xd9 #xe4)
-
-(let-syntax
- ((define-binary-flonum
- (sc-macro-transformer
- (lambda (form environment)
- environment
- (let ((mnemonic (cadr form))
- (opcode1 (caddr form))
- (opcode2 (cadddr form)))
- `(define-instruction ,mnemonic
- (((ST 0) (ST (? i)))
- (BYTE (8 ,opcode1)
- (8 (+ ,opcode2 i))))
-
- (()
- (BYTE (8 ,opcode1)
- (8 (+ ,opcode2 1))))))))))
-
- (define-binary-flonum FUCOM #xdd #xe0)
- (define-binary-flonum FUCOMP #xdd #xe8)
- (define-binary-flonum FXCH #xd9 #xc8))
-
-(define-trivial-instruction FUCOMPP #xda #xe9)
-(define-trivial-instruction FWAIT #x9b)
-(define-trivial-instruction FXAM #xd9 #xe5)
-(define-trivial-instruction FXTRACT #xd9 #xf4)
-(define-trivial-instruction FYL2X #xd9 #xf1)
-(define-trivial-instruction FYL2XP1 #xd9 #xf9)
-
-|#
\ No newline at end of file
+(define-instruction MASKMOVDQU
+ (((@R 7) (XMM (? source1)) (? source2 xmm-ea))
+ (PREFIX (ModR/M source1 source2))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #xF7))
+ (ModR/M source1 source2)))
+
+(define-instruction MOVAF ;Aligned
+ ((P (? p float-precision) (XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (FLOAT 'P p) (ModR/M target source))
+ (BITS (8 #x0F)
+ (8 #x28))
+ (ModR/M target source))
+
+ ((P (? p float-precision) (? target xmm/m-ea) (XMM (? source)))
+ (PREFIX (FLOAT 'P p) (ModR/M source target))
+ (BITS (8 #x0F)
+ (8 #x29))
+ (ModR/M source target)))
+
+(define-instruction MOVD
+ ;++ SIZE can be only L or Q, not W.
+ (((? size operand-size) (XMM (? target)) (? source r/m-ea))
+ (PREFIX (OPERAND size) (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #x6E))
+ (ModR/M target source))
+
+ (((? size operand-size) (? target r/m-ea) (XMM (? source)))
+ (PREFIX (OPERAND size) (ModR/M source target))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #x7E))
+ (ModR/M source target)))
+
+(define-instruction MOVDDUP
+ (((XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #xF2)
+ (8 #x0F)
+ (8 #x12))
+ (ModR/M target source)))
+\f
+(let-syntax ((define-move-dq-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment ;ignore
+ (let ((mnemonic (cadr form))
+ (prefix (caddr form)))
+
+ `(define-instruction ,mnemonic
+ (((XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 ,prefix)
+ (8 #x0F)
+ (8 #x6F))
+ (ModR/M target source))
+
+ (((? target xmm/m-ea) (XMM (? source)))
+ (PREFIX (ModR/M source target))
+ (BITS (8 ,prefix)
+ (8 #x0F)
+ (8 #x7F))
+ (ModR/M source target))))))))
+ (define-move-dq-instruction MOVDQA #x66)
+ (define-move-dq-instruction MOVDQU #xF3))
+
+(let-syntax ((define-move-high/low-instructions
+ (sc-macro-transformer
+ (lambda (form environment)
+ (let ((MOVxy (cadr form))
+ (MOVx (caddr form))
+ (opcode1 (cadddr form))
+ (opcode2 (car (cddddr form))))
+
+ `(begin
+ (define-instruction ,MOVxy
+ ((P S (XMM (? target)) (? source xmm-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x0F)
+ (8 ,opcode1))
+ (ModR/M target source)))
+
+ (define-instruction ,MOVx
+ ((P D (XMM (? target)) (? source m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 ,opcode2))
+ (ModR/M target source))
+
+ ((P D (? target m-ea) (XMM (? source)))
+ (PREFIX (ModR/M source target))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 ,(+ opcode2 1)))
+ (ModR/M source target))
+
+ ((P S (XMM (? target)) (? source m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x0F)
+ (8 ,opcode2))
+ (ModR/M target source))
+
+ ((P S (? target m-ea) (XMM (? source)))
+ (PREFIX (ModR/M source target))
+ (BITS (8 #x0F)
+ (8 ,(+ opcode2 1)))
+ (ModR/M source target)))))))))
+ ;; Note: (MOVL ...) is very different from (MOV L ...)!
+ (define-move-high/low-instructions MOVHL MOVH #x12 #x16)
+ (define-move-high/low-instructions MOVLH MOVL #x12 #x16))
+\f
+(define-instruction MOVMSKF
+ ((P (? p float-precision) (R (? target)) (? source xmm-ea))
+ (PREFIX (FLOAT 'P p) (ModR/M target source))
+ (BITS (8 #x0F)
+ (8 #x50))
+ (ModR/M target source)))
+
+(define-instruction MOVNTDQ
+ (((? target m-ea) (XMM (? source)))
+ (PREFIX (FLOAT 'P 'D) (ModR/M source target))
+ (BITS (8 #x0F)
+ (8 #xE7))
+ (ModR/M source target)))
+
+(define-instruction MOVNTF
+ (((? p/s float-packed/scalar)
+ (? p float-precision)
+ (? target m-ea)
+ (XMM (? source)))
+ (PREFIX (FLOAT p/s p) (ModR/M source target))
+ (BITS (8 #x0F)
+ (8 #x2B))
+ (ModR/M source target)))
+
+;; Note: (MOVQ ...) is very different from (MOV Q ...)!
+(define-instruction MOVQ
+ (((XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #xF3)
+ (8 #x0F)
+ (8 #x7E))
+ (ModR/M target source))
+
+ (((? target xmm/m-ea) (XMM (? source)))
+ (PREFIX (ModR/M source target))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #xD6))
+ (ModR/M source target)))
+
+;;; Using the mnemonic MOVF avoids conflict with the general MOVS
+;;; instruction.
+
+(define-instruction MOVF
+ ((S (? p float-precision) (XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (FLOAT 'S p) (ModR/M target source))
+ (BITS (8 #x0F)
+ (8 #x10))
+ (ModR/M target source))
+
+ ((S (? p float-precision) (? target xmm/m-ea) (XMM (? source)))
+ (PREFIX (FLOAT 'S p) (ModR/M source target))
+ (BITS (8 #x0F)
+ (8 #x11))
+ (ModR/M source target)))
+\f
+(let-syntax ((define-mov/dup-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment ;ignore
+ (let ((mnemonic (cadr form))
+ (opcode (caddr form)))
+ `(define-instruction ,mnemonic
+ (((XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #xF3)
+ (8 #x0F)
+ (8 ,opcode))
+ (ModR/M target source))))))))
+ (define-mov/dup-instruction MOVSHDUP #x16)
+ (define-mov/dup-instruction MOVSLDUP #x12))
+
+(define-instruction MOVUF ;Unaligned
+ ((P (? p float-precision) (XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (FLOAT 'P p) (ModR/M target source))
+ (BITS (8 #x0F)
+ (8 #x10))
+ (ModR/M target source))
+ ((P (? p float-precision) (? target xmm/m-ea) (XMM (? source)))
+ (PREFIX (FLOAT 'P p) (ModR/M source target))
+ (BITS (8 #x0F)
+ (8 #x11))
+ (ModR/M source target)))
+
+(let-syntax ((define-packed-sized-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment ;ignore
+ (let ((mnemonic (cadr form))
+ (size/opcode-list (cddr form)))
+ `(define-instruction ,mnemonic
+ ,@(map (lambda (size/opcode)
+ (let ((size (car size/opcode))
+ (opcode (cadr size/opcode)))
+ `((,size (XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 ,opcode))
+ (ModR/M target source))))
+ size/opcode-list)))))))
+ (define-packed-sized-instruction PACKS (B #x63) (UB #x67) (W #x6B))
+ (define-packed-sized-instruction PADD (B #xFC) (W #xFD) (L #xFE) (Q #xD4))
+ ;++ Should PADDU be considered a separate instruction from PADDS?
+ (define-packed-sized-instruction PADDS (B #xEC) (W #xED) (UB #xDC) (UW #xDD))
+ (define-packed-sized-instruction PAVG (B #xE0) (W #xE3))
+ (define-packed-sized-instruction PCMPEQ (B #x74) (W #x75) (L #x76))
+ (define-packed-sized-instruction PCMPGT (B #x64) (W #x65) (L #x66))
+ (define-packed-sized-instruction PMAX (W #xEE) (UB #xDE))
+ (define-packed-sized-instruction PMIN (W #xEA) (UB #xDA))
+ (define-packed-sized-instruction PSUB (B #xF8) (W #xF9) (L #xFA) (Q #xFB))
+ ;++ Should PSUBSU be considered a separate instruction from PSUBS?
+ (define-packed-sized-instruction PSUBS (B #xE8) (W #xE9) (UB #xD8) (UW #xD9))
+ ;++ Should the size indicate the source or target size? Right now
+ ;++ it indicates the source size.
+ (define-packed-sized-instruction PUNPCKH (B #x68) (W #x69) (L #x6A) (Q #x6D))
+ (define-packed-sized-instruction PUNPCKL (B #x60) (W #x61) (L #x62) (Q #x6C))
+ )
+\f
+(let-syntax ((define-packed-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment ;ignore
+ (let ((mnemonic (cadr form))
+ (opcode (caddr form)))
+ `(define-instruction ,mnemonic
+ (((XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 ,opcode))
+ (ModR/M target source))))))))
+ (define-packed-instruction PAND #xDB)
+ (define-packed-instruction PANDN #xDF)
+ (define-packed-instruction PMADDWL #xF5)
+ (define-packed-instruction PMULUDQ #xF4)
+ (define-packed-instruction POR #xEB)
+ (define-packed-instruction PSADBW #xF6)
+ (define-packed-instruction PXOR #xEF))
+
+(define-instruction PEXTR
+ ((W (R (? target)) (? source xmm-ea) (&U (? position unsigned-3bit)))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #xC5))
+ (ModR/M target source)
+ (BITS (8 position UNSIGNED))))
+
+(define-instruction PINSR
+ ((W (XMM (? target)) (? source r-ea) (&U (? position unsigned-3bit)))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #xC4))
+ (ModR/M target source)
+ (BITS (8 position UNSIGNED))))
+
+(define-instruction PMOVMSKB
+ (((R (? target)) (? source xmm-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #xD7))
+ (ModR/M target source)))
+\f
+(define-instruction PMULH
+ ((W (XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #xE5))
+ (ModR/M target source))
+
+ ((UW (XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #xE4))
+ (ModR/M target source)))
+
+(define-instruction PMULL
+ ((W (XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #xD5))
+ (ModR/M target source)))
+
+(define-instruction PSHUF
+ ((L (XMM (? target)) (? source xmm/m-ea) (&U (? wibblethwop unsigned-byte)))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #x70))
+ (ModR/M target source)
+ (BITS (8 wibblethwop))))
+
+(define-instruction PSHUFH
+ ((W (XMM (? target)) (? source xmm/m-ea) (&U (? zob unsigned-byte)))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #xF3)
+ (8 #x0F)
+ (8 #x70))
+ (ModR/M target source)
+ (BITS (8 zob))))
+
+;;; Note: (PSHUF L ...) is very different from (PSHUFL ...)! (The
+;;; latter must be (PSHUFL W ...) in any case.)
+
+(define-instruction PSHUFL
+ ((W (XMM (? target)) (? source xmm/m-ea) (&U (? veeblefitzer unsigned-byte)))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #xF3)
+ (8 #x0F)
+ (8 #x70))
+ (ModR/M target source)
+ (BITS (8 veeblefitzer))))
+\f
+(let-syntax ((define-shift-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment ;ignore
+ (let ((mnemonic (cadr form))
+ (digit (caddr form))
+ (dq-digit (cadddr form))
+ (opcode (car (cddddr form)))
+ (size/opcode-list (cddddr form)))
+ `(define-instruction ,mnemonic
+ ((DQ (? target xmm-ea) (&U count unsigned-byte))
+ (PREFIX (ModR/M target))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 #x73))
+ (ModR/M ,dq-digit target)
+ (BITS (8 count UNSIGNED)))
+
+ (((? size operand-size)
+ (? target xmm-ea)
+ (&U (? count unsigned-byte)))
+ (PREFIX (ModR/M target))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 (case size ((Q) #x73) ((L) #x72) ((W) #x71))))
+ (ModR/M ,digit target)
+ (BITS (8 count UNSIGNED)))
+
+ (((? size operand-size)
+ (XMM (? target))
+ (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 (case size
+ ((Q) ,(+ opcode 1))
+ ((L) ,(+ opcode 2))
+ ((W) ,(+ opcode 3)))))
+ (ModR/M target source))))))))
+ (define-shift-instruction PSLL 6 7 #xF0)
+ (define-shift-instruction PSRL 2 3 #xD0))
+\f
+(define-instruction PSRA
+ ;++ This does not admit an operand size of Q.
+ (((? size operand-size) (? target xmm-ea) (&U (? count unsigned-byte)))
+ (PREFIX (ModR/M target))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 (case size ((L) #x72) ((W) #x71))))
+ (ModR/M 4 target)
+ (BITS (8 count UNSIGNED)))
+
+ (((? size operand-size) (XMM (? target)) (? source xmm/m-ea))
+ (PREFIX (ModR/M target source))
+ (BITS (8 #x66)
+ (8 #x0F)
+ (8 (case size ((L) #xE2) ((W) #xE1))))
+ (ModR/M target source)))
+
+(let-syntax ((define-reciprocal-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ (let ((mnemonic (cadr form))
+ (opcode (caddr form)))
+ `(define-instruction ,mnemonic
+ (((? p/s float-packed/scalar)
+ S ;Single-precision only.
+ (XMM (? target))
+ (? source xmm/m-ea))
+ (PREFIX (FLOAT p/s 'S) (ModR/M target source))
+ (BITS (8 #x0F)
+ (8 ,opcode))
+ (ModR/M target source))))))))
+ (define-reciprocal-instruction RCPF #x53)
+ (define-reciprocal-instruction RSQRTF #x52))
+
+(define-instruction SHUF
+ ((P (? p float-precision)
+ (XMM (? target))
+ (? source xmm/m-ea)
+ (&U (? command unsigned-2bit)))
+ (PREFIX (FLOAT 'P p) (ModR/M target source))
+ (BITS (8 #x0F)
+ (8 #xC6))
+ (ModR/M target source)
+ (BITS (8 command UNSIGNED))))
+
+(define-instruction STMXCSR
+ (((? target m-ea))
+ (PREFIX (ModR/M target))
+ (BITS (8 #x0F)
+ (8 #xAE))
+ (ModR/M 3 target)))
(MODE #b11)
(R/M (register-bits r)))
+ ((XMM (? r))
+ (CATEGORIES XMM)
+ (REX (B r))
+ (MODE #b11)
+ (R/M (register-bits r)))
+
;;;; Register-indirect
((@R (? r indirect-reg))
(R/M 5)
(BITS (32 offset SIGNED))))
\f
-(define-ea-transformer r/m-ea)
+(define-ea-transformer r-ea REGISTER)
+(define-ea-transformer xmm-ea XMM)
(define-ea-transformer m-ea MEMORY)
+(define-ea-transformer r/m-ea REGISTER MEMORY)
+(define-ea-transformer xmm/m-ea XMM MEMORY)
(define-structure (effective-address
(conc-name ea/)
(declare (integrate-operator register-rex))
(define-integrable (register-rex register rex)
(declare (integrate register))
- (if (>= register r8)
+ (if (>= register 8)
rex
0))
+(define (cons-ModR/M digit ea tail)
+ (cons-syntax (ea/register ea)
+ (cons-syntax digit
+ (cons-syntax (ea/mode ea)
+ (append-syntax! (ea/extra ea) tail)))))
+
+(declare (integrate-operator opcode-register))
+(define (opcode-register opcode register)
+ (declare (integrate opcode))
+ (+ opcode (if (>= register 8) (- register 8) register)))
+
+(declare (integrate-operator float-comparator))
+(define (float-comparator comparator)
+ (case comparator
+ ((=) 0)
+ ((<) 1)
+ ((<=) 2)
+ ((UNORDERED) 3)
+ ((/=) 4)
+ ((>=) 5)
+ ((>) 6)
+ ((ORDERED) 7)
+ (else (error "Bad float comparator:" comparator))))
+
+(declare (integrate-operator operand-size))
+(define (operand-size s)
+ ;; B must be handled separately in general.
+ (case s
+ ((W L Q) s)
+ (else #f)))
+
+(declare (integrate-operator float-packed/scalar))
+(define (float-packed/scalar s)
+ (case s
+ ((S P) s)
+ (else #f)))
+
+(declare (integrate-operator float-precision))
+(define (float-precision s)
+ (case s
+ ((D S) s)
+ (else #f)))
+\f
(define (cons-prefix operand-size register ea tail)
(let ((tail
(if (eq? operand-size 'W)
(else (error "Invalid operand size:" operand-size)))
(let ((extended-register?
(or (eqv? register #t)
- (and register (>= register r8)))))
+ (and register (>= register 8)))))
(if ea
(fix:or (if extended-register? #x44 0) (ea/rex-prefix ea))
(if extended-register? #x41 0)))))))
-(define (cons-ModR/M digit ea tail)
- (cons-syntax (ea/register ea)
- (cons-syntax digit
- (cons-syntax (ea/mode ea)
- (append-syntax! (ea/extra ea) tail)))))
-
-(declare (integrate-operator opcode-register))
-(define (opcode-register opcode register)
- (declare (integrate opcode))
- (+ opcode (if (>= register r8) (- register r8) register)))
+;;; The SSE instructions don't consistently use this pattern for their
+;;; opcodes, but enough of them do that this approximate abstraction
+;;; helps to clarify the instruction syntax.
+
+(define (cons-float-prefix register ea packed/scalar precision tail)
+ (let* ((tail
+ (let ((float (list packed/scalar precision)))
+ (if (equal? float '(P S))
+ tail
+ (cons
+ (syntax-evaluation
+ (cond ((equal? float '(P D)) #x66)
+ ((equal? float '(S D)) #xF2)
+ ((equal? float '(S S)) #xF3)
+ (else (error "Bad float type:" float)))
+ coerce-8-bit-unsigned)
+ tail))))
+ (rex-prefix
+ (let ((extended-register?
+ (or (eqv? register #t)
+ (and register (>= register 8)))))
+ (if ea
+ (fix:or (if extended-register? #x44 0) (ea/rex-prefix ea))
+ (if extended-register? #x41 0)))))
+ (if (zero? rex-prefix)
+ tail
+ (cons-syntax (syntax-evaluation rex-prefix coerce-8-bit-unsigned)
+ tail))))
\f
-(define (operand-size s)
- ;; B must be handled separately in general.
- (case s
- ((W L Q) s)
- (else #f)))
-
(define-integrable (register-bits r)
(fix:and r #b111))
((8) #b11)
(else false)))
\f
+(declare (integrate-operator unsigned-2bit))
+(define (unsigned-2bit value)
+ (and (<= 0 value #b11) value))
+
+(declare (integrate-operator unsigned-3bit))
+(define (unsigned-3bit value)
+ (and (<= 0 value #b111) value))
+
+(declare (integrate-operator unsigned-5bit))
+(define (unsigned-5bit value)
+ (and (<= 0 value #b11111) value))
+
(define (signed-byte value)
(and (fits-in-signed-byte? value)
value))
(define (unsigned-quad value)
(and (fits-in-unsigned-quad? value)
value))
-
+\f
(define (sign-extended-byte value)
(and (fits-in-signed-byte? value)
value))
;; rbp holds the pointer mask
;; rsi holds the register array pointer
;; rdi holds the free pointer
- ;++ float
- ;; fr7 is not used so that we can always push on the stack once.
(list rax rcx rdx rbx r8 r9 r10 r11 r12 r13 r14 r15
- ;++ float
- ;; fr0 fr1 fr2 fr3 fr4 fr5 fr6
- ;; mmx0 mmx1 mmx2 mmx3 mmx4 mmx5 mmx6 mmx7
- ;; xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7
- ;; xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15
- ))
+ xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7
+ xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15))
(define (sort-machine-registers registers)
registers)
-;++ float
-
-#;
-(define (sort-machine-registers registers)
- ;; FR0 is preferable to other FPU regs. We promote it to the front
- ;; if we find another FPU reg in front of it.
- (let loop ((regs registers))
- (cond ((null? regs) registers) ; no float regs at all
- ((general-register? (car regs)); ignore general regs
- (loop (cdr regs)))
- ((= (car regs) fr0) ; found FR0 first
- registers)
- ((memq fr0 regs) ; FR0 not first, is it present?
- (cons fr0 (delq fr0 registers)) ; move to front
- registers)
- (else ; FR0 absent
- registers))))
-
(define (register-type register)
(cond ((machine-register? register)
(vector-ref
'#(GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL
GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL
- ;++ float
- ;; FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT ;x87 fp
- ;; FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT ;MMX 64bit
- ;; MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA ;XMM 128bit
- ;; MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA
- )
+ FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT
+ FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT)
register))
((register-value-class=word? register)
'GENERAL)
\f
(define register-reference
(let ((references (make-vector number-of-machine-registers)))
- (do ((i rax (+ i 1)))
- ((> i r15))
- (vector-set! references i (INST-EA (R ,i))))
- ;++ float
- ;; (do ((i fr0 (+ i 1)))
- ;; ((>= i fr7))
- ;; (vector-set! references i (INST-EA (ST ,(floreg->sti i)))))
- ;; (do ((i mmx0 (+ i 1)))
- ;; ((>= i mmx7))
- ;; (vector-set! references i (INST-EA (MMX ...))))
- ;; (do ((i xmm0 (+ i 1)))
- ;; ((>= i xmm15))
- ;; (vector-set! references i (INST-EA (XMM ...))))
+ (do ((r rax (+ r 1))
+ (i 0 (+ i 1)))
+ ((> r r15))
+ (vector-set! references r (INST-EA (R ,i))))
+ (do ((r xmm0 (+ r 1))
+ (i 0 (+ i 1)))
+ ((> r xmm15))
+ (vector-set! references r (INST-EA (XMM ,i))))
(lambda (register)
(vector-ref references register))))
(define (reference->register-transfer source target)
(cond ((equal? (register-reference target) source)
(LAP))
- ;++ float
((float-register-reference? source)
;; Assume target is a float register
- (LAP (FLD ,source)))
+ (LAP (MOVF S D ,(register-reference target) ,source)))
(else
(memory->machine-register source target))))
(define (register->home-transfer source target)
(machine->pseudo-register source target))
-;++ float
-
(define-integrable (float-register-reference? ea)
- ea
- #f
- #;
(and (pair? ea)
- (eq? (car ea) 'ST)))
+ (eq? (car ea) 'XMM)))
\f
;;;; Linearizer interface
\f
;;;; Utilities for the register allocator interface
-(define-integrable (machine->machine-register source target)
+(define (generate-move register source-ref target-ref)
+ (if (float-register? register)
+ (LAP (MOVF S D ,target-ref ,source-ref))
+ (LAP (MOV Q ,target-ref ,source-ref))))
+
+(define (machine->machine-register source target)
(guarantee-registers-compatible source target)
- ;++ float
- (if (not (float-register? source))
- (LAP (MOV Q ,(register-reference target) ,(register-reference source)))
- (let ((ssti (floreg->sti source))
- (tsti (floreg->sti target)))
- (if (zero? ssti)
- (LAP (FST (ST ,tsti)))
- (LAP (FLD (ST ,ssti))
- (FSTP (ST ,(1+ tsti))))))))
+ (generate-move source
+ (register-reference source)
+ (register-reference target)))
(define (machine-register->memory source target)
- ;++ float
- (if (not (float-register? source))
- (LAP (MOV Q ,target ,(register-reference source)))
- (let ((ssti (floreg->sti source)))
- (if (zero? ssti)
- (LAP (FST D ,target))
- (LAP (FLD (ST ,ssti))
- (FSTP D ,target))))))
+ (generate-move source (register-reference source) target))
(define (memory->machine-register source target)
- ;++ float
- (if (not (float-register? target))
- (LAP (MOV Q ,(register-reference target) ,source))
- (LAP (FLD D ,source)
- (FSTP (ST ,(1+ (floreg->sti target)))))))
+ (generate-move target source (register-reference target)))
(define-integrable (offset-referenceable? offset)
(byte-offset-referenceable? (* address-units-per-object offset)))
(error "Negative unsigned offset:" offset))
;; We don't have unsigned addressing modes.
(byte-offset-reference register offset))
-\f
+
;;; This returns an offset in objects, not bytes.
(define-integrable (pseudo-register-offset register)
(define-integrable (machine->pseudo-register source target)
(machine-register->memory source (pseudo-register-home target)))
-;++ float
-
-(define (general-register? register)
- register
- #t)
-
-(define (float-register? register)
- register
- #f)
-
-(define (floreg->sti reg)
- (error "x87 floating-point not supported:" `(FLOREG->STI ,reg)))
-
-#|
-(define-integrable (floreg->sti reg)
- (- reg fr0))
-
(define-integrable (general-register? register)
- (< register fr0))
+ (< register xmm0))
(define-integrable (float-register? register)
- (<= fr0 register fr7))
-|#
+ (>= register xmm0))
\f
;;;; Utilities for the rules
(flush-register! machine-reg)
(add-pseudo-register-alias! rtl-reg machine-reg))))
+;;; OBJECT->MACHINE-REGISTER! takes only general registers, not float
+;;; registers. Otherwise, (INST-EA (R ,mreg)) would need to be
+;;; (register-reference mreg).
+
(define (object->machine-register! object mreg)
;; This ordering allows LOAD-CONSTANT to use MREG as a temporary.
(let ((code (load-constant (INST-EA (R ,mreg)) object)))
temp ;ignore
(LAP ,@prefix
,@(receiver operand))))
-
+\f
;;; SIGNED-IMMEDIATE-OPERAND and UNSIGNED-IMMEDIATE-OPERAND abstract
;;; the pattern of performing an operation with an instruction that
;;; takes an immediate operand of 32 bits, but using a value that may
(cond ((fits-in-signed-long? value)
(values #f (LAP) operand))
((fits-in-signed-quad? value)
+ ;; (values #f
+ ;; (LAP)
+ ;; (INST-EA (@PCR ,(allocate-signed-quad-label value))))
(let ((temp (temporary-reference)))
(values temp (LAP (MOV Q ,temp ,operand)) temp)))
(else
(cond ((fits-in-unsigned-long? value)
(values #f (LAP) operand))
((fits-in-unsigned-quad? value)
+ ;; (values #f
+ ;; (LAP)
+ ;; (INST-EA (@PCR ,(allocate-unsigned-quad-label value))))
(let ((temp (temporary-reference)))
(values temp (LAP (MOV Q ,temp ,operand)) temp)))
(else
(error "Unsigned immediate value too large:" value)))))
+
+(define (allocate-data-label datum block-name offset alignment data)
+ (let* ((block
+ (or (find-extra-code-block block-name)
+ (let ((block
+ (declare-extra-code-block! block-name 'ANYWHERE '())))
+ (add-extra-code!
+ block
+ (LAP (PADDING ,offset ,alignment ,padding-string)))
+ block)))
+ (pairs (extra-code-block/xtra block))
+ (place (assoc datum pairs)))
+ (if place
+ (cdr place)
+ (let ((label (generate-label block-name)))
+ (set-extra-code-block/xtra!
+ block
+ (cons (cons datum label) pairs))
+ (add-extra-code! block (LAP (LABEL ,label) ,@data))
+ label))))
+
+(define (allocate-unsigned-quad-label quad)
+ (allocate-data-label quad 'QUADS 0 8 (LAP (QUAD U ,quad))))
+
+(define (allocate-signed-quad-label quad)
+ (allocate-data-label quad 'QUADS 0 8 (LAP (QUAD S ,quad))))
\f
(define (target-register target)
(delete-dead-registers!)
(define-integrable (allocate-indirection-register! register)
(load-alias-register! register 'GENERAL))
+
+(define (binary-register-operation operate commutative? type move
+ target source1 source2)
+ (let* ((worst-case
+ (lambda (target source1 source2)
+ (LAP ,@(move target source1)
+ ,@(operate target source2))))
+ (new-target-alias!
+ (lambda ()
+ (let ((source1 (standard-register-reference source1 type #f))
+ (source2 (standard-register-reference source2 type #f)))
+ (delete-dead-registers!)
+ (worst-case
+ (register-reference
+ (or (register-alias target type)
+ (allocate-alias-register! target type)))
+ source1
+ source2)))))
+ (cond ((not (pseudo-register? target))
+ (if (not (eq? (register-type target) type))
+ (error "binary-register-operation: Wrong type register"
+ target
+ type)
+ (worst-case (register-reference target)
+ (standard-register-reference source1 type #f)
+ (standard-register-reference source2 type #f))))
+ ((register-copy-if-available source1 type target)
+ => (lambda (get-alias-ref)
+ (if (= source2 source1)
+ (let ((ref (get-alias-ref)))
+ (operate ref ref))
+ (let ((source2
+ (standard-register-reference source2 type #f)))
+ (operate (get-alias-ref) source2)))))
+ ((not commutative?)
+ (new-target-alias!))
+ ((register-copy-if-available source2 type target)
+ => (lambda (get-alias-ref)
+ (let ((source1 (standard-register-reference source1 type #f)))
+ (operate (get-alias-ref) source1))))
+ (else
+ (new-target-alias!)))))
\f
(define (with-indexed-address base* index* scale b-offset protect recvr)
(let* ((base (allocate-indirection-register! base*))
(define r14 14)
(define r15 15)
-;;; x87 floating-point stack locations, allocated as if registers.
-
-(define fr0 16)
-(define fr1 17)
-(define fr2 18)
-(define fr3 19)
-(define fr4 20)
-(define fr5 21)
-(define fr6 22)
-(define fr7 23)
-
-;;; 64-bit media registers (deprecated).
-
-(define mmx0 24)
-(define mmx1 25)
-(define mmx2 26)
-(define mmx3 27)
-(define mmx4 28)
-(define mmx5 29)
-(define mmx6 30)
-(define mmx7 31)
-
;;; 128-bit media registers.
-(define xmm0 32)
-(define xmm1 33)
-(define xmm2 34)
-(define xmm3 35)
-(define xmm4 36)
-(define xmm5 37)
-(define xmm6 38)
-(define xmm7 39)
-(define xmm8 40)
-(define xmm9 41)
-(define xmm10 42)
-(define xmm11 43)
-(define xmm12 44)
-(define xmm13 45)
-(define xmm14 46)
-(define xmm15 47)
+(define xmm0 16)
+(define xmm1 17)
+(define xmm2 18)
+(define xmm3 19)
+(define xmm4 20)
+(define xmm5 21)
+(define xmm6 22)
+(define xmm7 23)
+(define xmm8 24)
+(define xmm9 25)
+(define xmm10 26)
+(define xmm11 27)
+(define xmm12 28)
+(define xmm13 29)
+(define xmm14 30)
+(define xmm15 31)
\f
-(define number-of-machine-registers 16)
+(define number-of-machine-registers 32)
(define number-of-temporary-registers 256)
(define-integrable regnum:stack-pointer rsp)
value-class=address)
((<= r8 register r15)
value-class=object)
- ((<= fr0 register fr7)
- value-class=float)
- ((<= mmx0 register mmx7)
- (error "MMX media registers not allocated:" register))
((<= xmm0 register xmm15)
- (error "XMM media registers not allocated:" register))
+ value-class=float)
(else
(error "Invalid machine register:" register))))
(error "Unknown register type" locative)))
\f
(define (rtl:constant-cost expression)
- ;; i486 clock count for instruction to construct/fetch into register.
- (let ((if-integer
- (lambda (value)
- value ; ignored
- ;; Can this be done in fewer bytes for suitably small values?
- 1)) ; MOV immediate
- (get-pc-cost
- (+ 3 ; CALL
- 4)) ; POP
- (based-reference-cost
- 1) ; MOV r/m
- (address-offset-cost
- 1)) ; LEA instruction
-
- (define (if-synthesized-constant type datum)
- (if-integer (make-non-pointer-literal type datum)))
-
+ ;; Counts derived from the AMD64 Software Optimization Guide, Rev
+ ;; 3.06, from September 2005. Scaled by two because LEA costs 1/2!
+ ;; This is pretty silly, but probably better than using i486 clock
+ ;; counts.
+ (let ((cost:lea 1)
+ (cost:mov-mem 6)
+ (cost:mov-imm 2)
+ (cost:or 2))
(case (rtl:expression-type expression)
((CONSTANT)
(let ((value (rtl:constant-value expression)))
(if (non-pointer-object? value)
- (if-synthesized-constant (object-type value)
- (careful-object-datum value))
- (+ get-pc-cost based-reference-cost))))
+ cost:mov-imm
+ cost:mov-mem)))
((MACHINE-CONSTANT)
- (if-integer (rtl:machine-constant-value expression)))
- ((ENTRY:PROCEDURE
- ENTRY:CONTINUATION)
- (+ get-pc-cost address-offset-cost))
- ((ASSIGNMENT-CACHE
- VARIABLE-CACHE)
- (+ get-pc-cost based-reference-cost))
- ((OFFSET-ADDRESS
- BYTE-OFFSET-ADDRESS
- FLOAT-OFFSET-ADDRESS)
- address-offset-cost)
+ cost:mov-imm)
+ ((ENTRY:PROCEDURE ENTRY:CONTINUATION)
+ (+ cost:mov-imm cost:lea cost:or))
+ ((OFFSET-ADDRESS BYTE-OFFSET-ADDRESS FLOAT-OFFSET-ADDRESS)
+ (receive (offset-selector scale)
+ (case (rtl:expression-type expression)
+ ((OFFSET-ADDRESS)
+ (values rtl:offset-address-offset address-units-per-object))
+ ((BYTE-OFFSET-ADDRESS)
+ (values rtl:byte-offset-address-offset 1))
+ ((FLOAT-OFFSET-ADDRESS)
+ (values rtl:float-offset-address-offset
+ address-units-per-float)))
+ (let ((offset (offset-selector expression)))
+ (if (and (rtl:machine-constant? offset)
+ (not
+ (fits-in-signed-long?
+ (* scale (rtl:machine-constant-value offset)))))
+ (+ cost:mov-imm cost:lea)
+ cost:lea))))
((CONS-POINTER)
(and (rtl:machine-constant? (rtl:cons-pointer-type expression))
(rtl:machine-constant? (rtl:cons-pointer-datum expression))
- (if-synthesized-constant
- (rtl:machine-constant-value (rtl:cons-pointer-type expression))
- (rtl:machine-constant-value
- (rtl:cons-pointer-datum expression)))))
- (else
- false))))
+ cost:mov-imm))
+ (else #f))))
(define compiler:open-code-floating-point-arithmetic?
- false)
+ #t)
(define compiler:primitives-with-no-open-coding
'(DIVIDE-FIXNUM
&/
- FLOATING-VECTOR-CONS FLOATING-VECTOR-LENGTH
- FLOATING-VECTOR-REF FLOATING-VECTOR-SET! FLONUM-ABS
- FLONUM-ACOS FLONUM-ADD FLONUM-ASIN FLONUM-ATAN FLONUM-ATAN2
- FLONUM-CEILING FLONUM-COS FLONUM-DIVIDE FLONUM-EQUAL?
- FLONUM-EXP FLONUM-FLOOR FLONUM-GREATER? FLONUM-LESS?
- FLONUM-LOG FLONUM-MULTIPLY FLONUM-NEGATE FLONUM-NEGATIVE?
- FLONUM-POSITIVE? FLONUM-ROUND FLONUM-SIN FLONUM-SQRT
- FLONUM-SUBTRACT FLONUM-TAN FLONUM-TRUNCATE FLONUM-ZERO?
- FLONUM? GCD-FIXNUM STRING-ALLOCATE VECTOR-CONS))
\ No newline at end of file
+ FLOATING-VECTOR-CONS FLONUM-ACOS FLONUM-ASIN FLONUM-ATAN
+ FLONUM-ATAN2 FLONUM-CEILING FLONUM-COS FLONUM-EXP FLONUM-FLOOR
+ FLONUM-LOG FLONUM-ROUND FLONUM-SIN FLONUM-TAN FLONUM-TRUNCATE
+ GCD-FIXNUM STRING-ALLOCATE VECTOR-CONS))
\ No newline at end of file
,(if signed?
(INST-EA (& ,n))
(INST-EA (&U ,n))))
+ ;++ Check that SCALE is a valid SIB scale.
(LEA Q ,target (@RI ,source ,temp ,scale)))))))))))
(define-integrable (load-displaced-register target source n scale)
(define (load-pc-relative-address/typed target type label)
;++ This is pretty horrid, especially since it happens for every
- ;++ continuation pushed! Neither alternative is much good.
- ;; Twenty bytes.
+ ;++ continuation pushed! None of the alternatives is much good.
+ ;; Twenty bytes, but only three instructions and no extra memory.
(let ((temp (temporary-register-reference)))
(LAP (MOV Q ,temp (&U ,(make-non-pointer-literal type 0)))
(LEA Q ,target (@PCR ,label))
(OR Q ,target ,temp)))
#|
- ;; Nineteen bytes.
+ ;; Nineteen bytes, but rather complicated (and needs syntax for an
+ ;; addressing mode not presently supported).
+ (cond ((zero? type)
+ (LAP (LEA Q ,target (@PCR ,label))))
+ ((zero? (remainder type 8))
+ (receive (type-divisor scale scale-log)
+ (cond ((not (zero? (remainder type #x10))) (values 8 8 3))
+ ((not (zero? (remainder type #x20))) (values #x10 4 2))
+ ((not (zero? (remainder type #x40))) (values #x20 2 1))
+ (else (error "Type too large:" type)))
+ (let ((offset (quotient type type-divisor)))
+ (LAP (LEA Q ,target (@PCR ,label))
+ (LEA Q ,target (@OI ,offset ,target ,scale))
+ (ROR Q ,target (&U ,scale-log))))))
+ (else ...))
+ |#
+ #|
+ ;; This would be brilliant, except that it needs (PC * 2^6)-relative
+ ;; addressing, rather than PC-relative addressing.
+ (let* ((reference-point (generate-label 'PC))
+ (offset
+ `(+ ,type
+ (* ,(expt 2 scheme-type-width) (- ,label ,reference-point)))))
+ (LAP (LEA Q ,target (@PCO ,offset))
+ (LABEL ,reference-point)
+ (ROR Q ,target (&U ,scheme-type-width))))
+ |#
+ #|
+ ;; Nineteen bytes and no temporaries, but four instructions.
(LAP (LEA Q ,target (@PCR ,label))
(SHL Q ,target (&U ,scheme-type-width))
(OR Q ,target (&U ,type))
(ROR Q ,target (&U ,scheme-type-width)))
|#
- ;++ This doesn't work because CONSTANT->LABEL will give us a label
- ;++ for the Scheme number object, not for the machine bit string.
#|
- ;; Seventeen bytes -- but we need the label to work.
+ ;; Seventeen bytes, but this requires reading eight bytes of memory.
(let ((temp (temporary-register-reference))
(literal (make-non-pointer-literal type 0)))
- (LAP (MOV Q ,temp (@PCR ,(constant->label literal)))
+ (LAP (MOV Q ,temp (@PCR ,(allocate-unsigned-quad-label literal)))
(LEA Q ,target (@PCR ,label))
(OR Q ,target ,temp)))
+ |#
+ #|
+ ;; Fourteen bytes and no temporaries, but this requires reading
+ ;; eight bytes of memory.
+ (let* ((reference-point (generate-label 'REFERENCE-POINT))
+ (expression
+ `(+ ,(make-non-pointer-literal type 0) (- ,label ,reference-point))))
+ (LAP (LABEL ,reference-point)
+ (LEA Q ,target (@PCR ,reference-point))
+ (ADD Q ,target (@PCR ,(allocate-unsigned-quad-label expression)))))
|#)
(define (load-char-into-register type source target)
(rtl:offset-address-offset base))))
expression))
-(define (with-decoded-detagged-offset expression recvr)
+(define (with-decoded-detagged-offset expression receiver)
(let ((base (rtl:offset-base expression)))
(let ((base* (rtl:offset-address-base base))
(index (rtl:offset-address-offset base)))
- (recvr (rtl:register-number (if (rtl:register? base*)
- base*
- (rtl:object->address-expression base*)))
- (rtl:register-number (if (rtl:register? index)
- index
- (rtl:object->datum-expression index)))
- (rtl:machine-constant-value (rtl:offset-offset expression))))))
+ (receiver
+ (rtl:register-number (if (rtl:register? base*)
+ base*
+ (rtl:object->address-expression base*)))
+ (rtl:register-number (if (rtl:register? index)
+ index
+ (rtl:object->datum-expression index)))
+ (rtl:machine-constant-value (rtl:offset-offset expression))))))
\f
;;;; Improved string references
FIXNUM-AND
FIXNUM-OR
FIXNUM-XOR)))
-\f
+
(define ((fixnum-2-args/standard commutative? operate) target source1
source2 overflow?)
overflow? ; ignored
- (two-arg-register-operation operate
- commutative?
- target
- source1
- source2))
-
-(define (two-arg-register-operation operate commutative?
- target source1 source2)
- (let* ((worst-case
- (lambda (target source1 source2)
- (LAP (MOV Q ,target ,source1)
- ,@(operate target source2))))
- (new-target-alias!
- (lambda ()
- (let ((source1 (any-reference source1))
- (source2 (any-reference source2)))
- (delete-dead-registers!)
- (worst-case (target-register-reference target)
- source1
- source2)))))
- (cond ((not (pseudo-register? target))
- (if (not (eq? (register-type target) 'GENERAL))
- (error "two-arg-register-operation: Wrong type register"
- target 'GENERAL)
- (worst-case (register-reference target)
- (any-reference source1)
- (any-reference source2))))
- ((register-copy-if-available source1 'GENERAL target)
- =>
- (lambda (get-alias-ref)
- (if (= source2 source1)
- (let ((ref (get-alias-ref)))
- (operate ref ref))
- (let ((source2 (any-reference source2)))
- (operate (get-alias-ref) source2)))))
- ((not commutative?)
- (new-target-alias!))
- ((register-copy-if-available source2 'GENERAL target)
- =>
- (lambda (get-alias-ref)
- (let ((source1 (any-reference source1)))
- (operate (get-alias-ref) source1))))
- (else
- (new-target-alias!)))))
+ (binary-register-operation operate commutative? 'GENERAL
+ (lambda (target source)
+ (LAP (MOV Q ,target ,source)))
+ target source1 source2))
(define (fixnum-2-args/register*constant operator target
source constant overflow?)
(lambda (target source1 source2 overflow?)
overflow? ; ignored
(require-register! rcx)
- (two-arg-register-operation operate
- #f
- target
- source1
- source2))))
+ (binary-register-operation operate #f 'GENERAL
+ (lambda (target source)
+ (LAP (MOV Q ,target ,source)))
+ target source1 source2))))
\f
(define (do-division target source1 source2 result-reg)
(prefix-instructions! (load-machine-register! source1 rax))
;;; package: (compiler lap-syntaxer)
(declare (usual-integrations))
-
-#|
\f
-;; ****
-;; Missing: 2 argument operations and predicates with non-trivial
-;; constant arguments.
-;; Also missing with (OBJECT->FLOAT (REGISTER ...)) operands.
-;; ****
+(define (flonum-source! source)
+ (or (register-alias source 'FLOAT)
+ (load-alias-register! source 'FLOAT)))
-(define (flonum-source! register)
- (floreg->sti (load-alias-register! register 'FLOAT)))
+(define-integrable (flonum-source-reference! source)
+ (register-reference (flonum-source! source)))
-(define (flonum-target! pseudo-register)
+(define (flonum-target! target)
(delete-dead-registers!)
- (floreg->sti (allocate-alias-register! pseudo-register 'FLOAT)))
+ (or (register-alias target 'FLOAT)
+ (allocate-alias-register! target 'FLOAT)))
+
+(define-integrable (flonum-target-reference! target)
+ (register-reference (flonum-target! target)))
+
+;;; FLONUM-DATA-OFFSET is the number of bytes after the location
+;;; addressed by a FLONUM-tagged pointer before the actual flonum data
+;;; begin.
-(define (flonum-temporary!)
- (allocate-temporary-register! 'FLOAT))
+(define-integrable flonum-data-offset address-units-per-object)
(define-rule statement
- ;; convert a floating-point number to a flonum object
- (ASSIGN (REGISTER (? target))
- (FLOAT->OBJECT (REGISTER (? source))))
- (let* ((source (register-alias source 'FLOAT))
- (target (target-register-reference target)))
- (LAP (MOV W (@R ,regnum:free-pointer)
- (&U ,(make-non-pointer-literal
- (ucode-type manifest-nm-vector)
- 2)))
- ,@(if (not source)
- ;; Value is in memory home
- (let ((off (pseudo-register-offset source))
- (temp (temporary-register-reference)))
- (LAP (MOV W ,target
- ,(offset-reference regnum:regs-pointer off))
- (MOV W ,temp
- ,(offset-reference regnum:regs-pointer (1+ off)))
- (MOV W (@RO B ,regnum:free-pointer 4) ,target)
- (MOV W (@RO B ,regnum:free-pointer 8) ,temp)))
- (store-float (floreg->sti source)
- (INST-EA (@RO B ,regnum:free-pointer 4))))
- (LEA ,target
- (@RO UW ,regnum:free-pointer
- ,(make-non-pointer-literal (ucode-type flonum) 0)))
- (ADD W (R ,regnum:free-pointer) (& 12)))))
-
-#|
+ (ASSIGN (REGISTER (? target)) (FLOAT->OBJECT (REGISTER (? source))))
+ (let* ((source (flonum-source-reference! source))
+ (target (target-register-reference target)))
+ (LAP ,@(with-unsigned-immediate-operand
+ (make-non-pointer-literal (ucode-type MANIFEST-NM-VECTOR) 1)
+ (lambda (operand)
+ (LAP (MOV Q (@R ,regnum:free-pointer) ,operand))))
+ (MOVF S D (@RO ,regnum:free-pointer ,flonum-data-offset) ,source)
+ (MOV Q ,target (&U ,(make-non-pointer-literal (ucode-type FLONUM) 0)))
+ (OR Q ,target (R ,regnum:free-pointer))
+ (LEA Q (R ,regnum:free-pointer)
+ (@RO ,regnum:free-pointer
+ ,(+ flonum-data-offset address-units-per-float))))))
+
(define-rule statement
- ;; convert a flonum object to a floating-point number
(ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (REGISTER (? source))))
(let* ((source (move-to-temporary-register! source 'GENERAL))
- (target (flonum-target! target)))
+ (target (flonum-target-reference! target)))
(LAP ,@(object->address (register-reference source))
- ,@(load-float (INST-EA (@RO B ,source 4)) target))))
-|#
-
-(define-rule statement
- ;; Convert a flonum object to a floating-point number. Unlike the
- ;; version above which has an implicits OBJECT->ADDRESS, this one
- ;; uses the addressing mode to remove the type-code. Saves a cycle
- ;; and maybe a register spill if SOURCE is live after instruction.
- (ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (REGISTER (? source))))
- (let* ((source (source-register source))
- (target (flonum-target! target)))
- (object->float source target)))
-
-(define (object->float source-register target)
- (let ((untagging+offset
- (- 4 (make-non-pointer-literal (ucode-type flonum) 0))))
- (load-float (INST-EA (@RO W ,source-register ,untagging+offset)) target)))
+ (MOVF S D ,target (@RO ,source ,flonum-data-offset)))))
\f
-;;;; Floating-point vector support.
-
(define-rule statement
(ASSIGN (REGISTER (? target)) (? expression rtl:simple-float-offset?))
(let* ((source (float-offset->reference! expression))
- (target (flonum-target! target)))
- (load-float source target)))
+ (target (flonum-target-reference! target)))
+ (LAP (MOVF S D ,target ,source))))
(define-rule statement
(ASSIGN (? expression rtl:simple-float-offset?) (REGISTER (? source)))
- (let ((source (flonum-source! source))
- (target (float-offset->reference! expression)))
- (store-float source target)))
+ (let ((source (flonum-source-reference! source))
+ (target (float-offset->reference! expression)))
+ (LAP (MOVF S D ,target ,source))))
(define-rule statement
- (ASSIGN (REGISTER (? target))
- (? expression rtl:detagged-float-offset?))
+ (ASSIGN (REGISTER (? target)) (? expression rtl:detagged-float-offset?))
(with-detagged-float-location expression
- (lambda (temp)
- (load-float temp target))))
+ (lambda (source)
+ (LAP (MOVF S D ,(flonum-target-reference! target) ,source)))))
(define-rule statement
- (ASSIGN (? expression rtl:detagged-float-offset?)
- (REGISTER (? source)))
+ (ASSIGN (? expression rtl:detagged-float-offset?) (REGISTER (? source)))
(with-detagged-float-location expression
- (lambda (temp)
- (store-float (flonum-source! source) temp))))
+ (lambda (target)
+ (LAP (MOVF S D ,target ,(flonum-source-reference! source))))))
-(define (with-detagged-float-location rtl-expression recvr)
- ;; Never needs to protect a register because it is a float register!
+(define (with-detagged-float-location rtl-expression receiver)
(with-decoded-detagged-float-offset rtl-expression
- (lambda (base index w-offset)
- (with-indexed-address base index 8 (* 4 w-offset) false recvr))))
+ (lambda (base float-index object-offset)
+ (with-indexed-address base float-index address-units-per-float
+ (* address-units-per-object object-offset)
+ ;; No general registers to protect -- the target and source
+ ;; will always be float registers.
+ #f
+ receiver))))
+
+;;; These are nearly identical copies of RTL:DETAGGED-OFFSET? and
+;;; WITH-DECODED-DETAGGED-OFFSET, with FLOAT-OFFSET substituted for
+;;; OFFSET. It is unfortunate that the RTL doesn't have a clearer
+;;; abstraction of offsets and addresses with arbitrary data.
(define (rtl:detagged-float-offset? expression)
(and (rtl:float-offset? expression)
- (let ((base (rtl:float-offset-base expression))
- (offset (rtl:float-offset-offset expression)))
- (and (rtl:offset-address? base)
- (rtl:machine-constant? (rtl:offset-address-offset base))
- (rtl:detagged-index? (rtl:offset-address-base base)
- offset)))
+ (rtl:machine-constant? (rtl:float-offset-offset expression))
+ (let ((base (rtl:float-offset-base expression)))
+ (and (rtl:float-offset-address? base)
+ (rtl:detagged-index? (rtl:float-offset-address-base base)
+ (rtl:float-offset-address-offset base))))
expression))
-(define (with-decoded-detagged-float-offset expression recvr)
- (let ((base (rtl:float-offset-base expression))
- (index (rtl:float-offset-offset expression)))
- (let ((base* (rtl:offset-address-base base)))
- (recvr (rtl:register-number (if (rtl:register? base*)
- base*
- (rtl:object->address-expression base*)))
- (rtl:register-number (if (rtl:register? index)
- index
- (rtl:object->datum-expression index)))
- (rtl:machine-constant-value (rtl:offset-address-offset base))))))
-
-(define (load-float ea sti)
- (LAP (FLD D ,ea)
- (FSTP (ST ,(1+ sti)))))
-
-(define (store-float sti ea)
- (if (zero? sti)
- (LAP (FST D ,ea))
- (LAP (FLD (ST ,sti))
- (FSTP D ,ea))))
+(define (with-decoded-detagged-float-offset expression receiver)
+ (let ((base (rtl:float-offset-base expression)))
+ (let ((base* (rtl:float-offset-address-base base))
+ (index (rtl:float-offset-address-offset base)))
+ (receiver
+ (rtl:register-number (if (rtl:register? base*)
+ base*
+ (rtl:object->address-expression base*)))
+ (rtl:register-number (if (rtl:register? index)
+ index
+ (rtl:object->datum-expression index)))
+ (rtl:machine-constant-value (rtl:float-offset-offset expression))))))
\f
;;;; Flonum Arithmetic
(define-rule statement
(ASSIGN (REGISTER (? target))
- (FLONUM-1-ARG (? operation) (REGISTER (? source)) (? overflow?)))
- overflow? ;ignore
- ((flonum-1-arg/operator operation) target source))
-
-(define ((flonum-unary-operation/general operate) target source)
- (define (default)
- (let* ((source (flonum-source! source))
- (target (flonum-target! target)))
- (operate target source)))
- ;; Attempt to reuse source for target if it is in ST(0).
- ;; Otherwise we will target ST(0) by sorting the machine registers.
- (cond ((and (pseudo-register? target) (pseudo-register? source)
- (eqv? fr0 (pseudo-register-alias *register-map* 'FLOAT source)))
- (reuse-pseudo-register-alias
- source 'FLOAT
- (lambda (alias)
- (let* ((sti (floreg->sti alias)))
- (delete-register! alias)
- (delete-dead-registers!)
- (add-pseudo-register-alias! target alias)
- (operate sti sti)))
- default))
- (else (default))))
-
-'(define ((flonum-unary-operation/general operate) target source)
- (define (default)
- (let* ((source (flonum-source! source))
- (target (flonum-target! target)))
- (operate target source)))
- ;; Attempt to reuse source for target. This works well when the
- ;; source is ST(0). We try to arrange this by sorting the registers
- ;; to give allocation preference to ST(0).
- (cond ((pseudo-register? target)
- (reuse-pseudo-register-alias
- source 'FLOAT
- (lambda (alias)
- (let* ((sti (floreg->sti alias)))
- (delete-register! alias)
- (delete-dead-registers!)
- (add-pseudo-register-alias! target alias)
- (operate sti sti)))
- default))
- (else (default))))
-
-'(define ((flonum-unary-operation/general operate) target source)
- (define (default)
- (let* ((source (flonum-source! source))
- (target (flonum-target! target)))
- (operate target source)))
- ;; Attempt to reuse source for target. This works well when the
- ;; source is ST(0). We try to arrange this by sorting the registers
- ;; to give allocation preference to ST(0).
- (cond ((pseudo-register? target)
- (let ((alias
- (and (dead-register? source)
- (pseudo-register-alias *register-map* 'FLOAT source))))
- (if alias
- (default)))
-
- (reuse-pseudo-register-alias
- source 'FLOAT
- (lambda (alias)
- (let* ((sti (floreg->sti alias)))
- (delete-register! alias)
- (delete-dead-registers!)
- (add-pseudo-register-alias! target alias)
- (operate sti sti)))
- default))
- (else (default))))
-
-(define (flonum-1-arg/operator operation)
+ (FLONUM-1-ARG (? operator) (REGISTER (? source)) (? overflow?)))
+ overflow? ;ignore
+ (let* ((source (flonum-source-reference! source))
+ (target (flonum-target-reference! target)))
+ ((flonum-1-arg/operator operator) target source)))
+
+(define-integrable (flonum-1-arg/operator operation)
(lookup-arithmetic-method operation flonum-methods/1-arg))
(define flonum-methods/1-arg
(list 'FLONUM-METHODS/1-ARG))
-\f
-;;; Notice the weird ,', syntax here.
-;;; If LAP changes, this may also have to change.
-(let-syntax
- ((define-flonum-operation
- (sc-macro-transformer
- (lambda (form environment)
- environment
- (let ((primitive-name (cadr form))
- (opcode (caddr form)))
- `(define-arithmetic-method ',primitive-name flonum-methods/1-arg
- (flonum-unary-operation/general
- (lambda (target source)
- (if (and (zero? target) (zero? source))
- (LAP (,opcode))
- (LAP (FLD (ST ,', source))
- (,opcode)
- (FSTP (ST ,',(1+ target)))))))))))))
- (define-flonum-operation FLONUM-NEGATE FCHS)
- (define-flonum-operation FLONUM-ABS FABS)
- ;; Disabled: FSIN and FCOS limited to pi * 2^62.
- ;;(define-flonum-operation FLONUM-SIN FSIN)
- ;;(define-flonum-operation FLONUM-COS FCOS)
- (define-flonum-operation FLONUM-SQRT FSQRT)
- (define-flonum-operation FLONUM-ROUND FRNDINT))
-
-;; These (and FLONUM-ROUND above) presume that the default rounding mode
-;; is round-to-nearest/even
-
-(define (define-rounding prim-name mode)
- (define-arithmetic-method prim-name flonum-methods/1-arg
- (flonum-unary-operation/general
- (lambda (target source)
- (let ((temp (temporary-register-reference)))
- (LAP (FSTCW (@R ,regnum:free-pointer))
- ,@(if (and (zero? target) (zero? source))
- (LAP)
- (LAP (FLD (ST ,source))))
- (MOV B ,temp (@RO B ,regnum:free-pointer 1))
- (OR B (@RO B ,regnum:free-pointer 1) (&U ,mode))
- (FNLDCW (@R ,regnum:free-pointer))
- (FRNDINT)
- (MOV B (@RO B ,regnum:free-pointer 1) ,temp)
- ,@(if (and (zero? target) (zero? source))
- (LAP)
- (LAP (FSTP (ST ,(1+ target)))))
- (FNLDCW (@R ,regnum:free-pointer))))))))
-
-(define-rounding 'FLONUM-CEILING #x08)
-(define-rounding 'FLONUM-FLOOR #x04)
-(define-rounding 'FLONUM-TRUNCATE #x0c)
-\f
-;; This is used in order to avoid using two stack locations for
-;; the remainder unary operations.
-
-(define ((flonum-unary-operation/stack-top operate) target source)
- (define (finish source->top)
- ;; Perhaps this can be improved?
- (rtl-target:=machine-register! target fr0)
- (LAP ,@source->top
- ,@(operate)))
-
- (if (or (machine-register? source)
- (not (is-alias-for-register? fr0 source))
- (not (dead-register? source)))
- (finish (load-machine-register! source fr0))
- (begin
- (delete-dead-registers!)
- (finish (LAP)))))
-
-(define-arithmetic-method 'FLONUM-LOG flonum-methods/1-arg
- (flonum-unary-operation/stack-top
- (lambda ()
- (LAP (FLDLN2)
- (FXCH (ST 0) (ST 1))
- (FYL2X)))))
-
-(define-arithmetic-method 'FLONUM-EXP flonum-methods/1-arg
- (flonum-unary-operation/stack-top
- (lambda ()
- ;; Hair to avoid arithmetic for non-finite inputs: exp(-inf) = 0,
- ;; but exp(x) = x for any other non-finite x. We use the first
- ;; free slot (1) to pick apart the double format to check for
- ;; non-finite inputs, and (2) to avoid using two stack slots.
- (let ((temp (temporary-register-reference))
- (infinity-or-nan (generate-label 'INFINITY-OR-NAN))
- (join (generate-label 'JOIN))
- (temp-pointer regnum:free-pointer))
- (LAP (FST D (@R ,temp-pointer))
- (MOV W ,temp (@RO W ,temp-pointer 4))
- (AND W ,temp (&U #x7FFFFFFF))
- (CMP W ,temp (&U #x7FF00000))
- (JAE B (@PCR ,infinity-or-nan))
- ;; Compute 2^(x log_2 e) with F2XM1 and FSCALE.
- (FLDL2E) ;st0 = lg e, st1 = x
- (FMULP (ST 1) (ST 0)) ;st0 = x lg e
- (FLD (ST 0)) ;st0 = x lg e, st1 = x lg e
- (FRNDINT) ;st0 = I(x lg e), st1 = x lg e
- (FSUB (ST 1) (ST 0)) ;st0 = I(x lg e), st1 = F(x lg e)
- (FSTP D (@R ,temp-pointer)) ;st0 = F(x lg e), save I(x lg e)
- (F2XM1) ;st0 = 2^F(x lg e) - 1
- (FLD1) ;st0 = 1, st1 = 2^F(x lg e) - 1
- (FADD) ;st0 = 2^F(x lg e)
- (FLD D (@R ,temp-pointer)) ;st0 = I(x lg e), st1 = 2^F(x lg e)
- (FXCH (ST 0) (ST 1)) ;st0 = 2^F(x lg e), st1 = I(x lg e)
- (FSCALE) ;st0 = 2^F(x lg e) * 2^I(x lg e),
- ;st1 = I(x lg e)
- (FSTP (ST 1)) ;Drop st1, leaving in st0 the value
- (JMP B (@PCR ,join)) ; 2^(F(x lg e) + I(x lg e)) = e^x.
- (LABEL ,infinity-or-nan)
- (CMP W (@RO W ,temp-pointer 4) (&U #xFFF00000))
- (JNE B (@PCR ,join))
- (CMP W (@RO W ,temp-pointer 0) (& 0))
- (JNE B (@PCR ,join))
- (FSTP (ST 0)) ;Pop argument.
- (FLDZ) ;Return zero.
- (LABEL ,join))))))
+(define ((flonum-unary-operation/target-bits bit-string operate) target source)
+ (LAP (MOVF S D ,target (@PCR ,(allocate-double-float-bits-label bit-string)))
+ ,@(operate target source)))
+
+(define double-flobits:negative-zero
+ (let ((bit-string (make-bit-string 64 #f)))
+ (bit-string-set! bit-string 63)
+ bit-string))
+
+(define-arithmetic-method 'FLONUM-ABS flonum-methods/1-arg
+ (flonum-unary-operation/target-bits
+ (bit-string-not double-flobits:negative-zero)
+ (lambda (target source)
+ ;; No scalar version, but doing this packed is harmless.
+ (LAP (ANDF P D ,target ,source)))))
+
+(define-arithmetic-method 'FLONUM-NEGATE flonum-methods/1-arg
+ (flonum-unary-operation/target-bits
+ double-flobits:negative-zero
+ (lambda (target source)
+ ;; No scalar version, but doing this packed is harmless.
+ (LAP (XORF P D ,target ,source)))))
+
+(define-arithmetic-method 'FLONUM-SQRT flonum-methods/1-arg
+ (lambda (target source)
+ (LAP (SQRTF S D ,target ,source))))
\f
-#|
-;; Disabled: FPTAN limited to pi * 2^62.
-(define-arithmetic-method 'FLONUM-TAN flonum-methods/1-arg
- (flonum-unary-operation/stack-top
- (lambda ()
- (LAP (FPTAN)
- (FSTP (ST 0)) ; FPOP
- ))))
-|#
+(define-rule statement
+ (ASSIGN (REGISTER (? target))
+ (FLONUM-2-ARGS (? operator)
+ (REGISTER (? source1))
+ (REGISTER (? source2))
+ (? overflow?)))
+ overflow? ;ignore
+ ((flonum-2-args/operator operator) target source1 source2))
-(define-arithmetic-method 'FLONUM-ATAN flonum-methods/1-arg
- (flonum-unary-operation/stack-top
- (lambda ()
- (LAP (FLD1)
- (FPATAN)))))
-
-;; For now, these preserve values in memory
-;; in order to avoid flushing a stack location.
-
-(define-arithmetic-method 'FLONUM-ACOS flonum-methods/1-arg
- (flonum-unary-operation/stack-top
- (lambda ()
- (LAP (FST D (@R ,regnum:free-pointer))
- (FMUL (ST 0) (ST 0))
- (FLD1)
- (F%SUBP (ST 1) (ST 0))
- (FSQRT)
- (FLD D (@R ,regnum:free-pointer))
- (FPATAN)))))
-
-(define-arithmetic-method 'FLONUM-ASIN flonum-methods/1-arg
- (flonum-unary-operation/stack-top
- (lambda ()
- (LAP (FST D (@R ,regnum:free-pointer))
- (FMUL (ST 0) (ST 0))
- (FLD1)
- (F%SUBP (ST 1) (ST 0))
- (FSQRT)
- (FLD D (@R ,regnum:free-pointer))
- (FXCH (ST 0) (ST 1))
- (FPATAN)))))
-\f
(define-rule statement
(ASSIGN (REGISTER (? target))
- (FLONUM-2-ARGS (? operation)
- (REGISTER (? source1))
- (REGISTER (? source2))
- (? overflow?)))
- overflow? ;ignore
- ((flonum-2-args/operator operation) target source1 source2))
-
-;; Binary instructions all use ST(0), and are of the forms
-;; Fop ST(0),ST(i)
-;; Fop ST(i),ST(0)
-;; FopP ST(i),ST(0)
-;; Fop ST(0),memory
-;;
-;; If possible, we like to target ST(0) since it is likely to be the
-;; source of a subsequent operation. Failing that, it is good to
-;; reuse one of the source aliases.
-
-(define ((flonum-binary-operation operate) target source1 source2)
- (define (default)
- (let* ((sti1 (flonum-source! source1))
- (sti2 (flonum-source! source2)))
- (operate (flonum-target! target) sti1 sti2)))
- (define (try-reuse-1 if-cannot)
- (reuse-pseudo-register-alias
- source1 'FLOAT
- (lambda (alias1)
- (let* ((sti1 (floreg->sti alias1))
- (sti2 (if (= source1 source2)
- sti1
- (flonum-source! source2))))
- (delete-register! alias1)
- (delete-dead-registers!)
- (add-pseudo-register-alias! target alias1)
- (operate sti1 sti1 sti2)))
- if-cannot))
- (define (try-reuse-2 if-cannot)
- (reuse-pseudo-register-alias
- source2 'FLOAT
- (lambda (alias2)
- (let* ((sti2 (floreg->sti alias2))
- (sti1 (if (= source1 source2)
- sti2
- (flonum-source! source1))))
- (delete-register! alias2)
- (delete-dead-registers!)
- (add-pseudo-register-alias! target alias2)
- (operate sti2 sti1 sti2)))
- if-cannot))
- (cond ((pseudo-register? target)
- (if (is-alias-for-register? fr0 source1)
- (try-reuse-1 (lambda () (try-reuse-2 default)))
- (try-reuse-2 (lambda () (try-reuse-1 default)))))
- ((not (eq? (register-type target) 'FLOAT))
- (error "flonum-2-args: Wrong type register" target 'FLOAT))
- (else (default))))
-
-(define (flonum-2-args/operator operation)
- (lookup-arithmetic-method operation flonum-methods/2-args))
+ (FLONUM-2-ARGS (? operator)
+ (REGISTER (? source))
+ (OBJECT->FLOAT (CONSTANT (? constant)))
+ (? overflow?)))
+ overflow? ;ignore
+ ((flonum-register*constant/operator operator) target source constant))
+
+(define-rule statement
+ (ASSIGN (REGISTER (? target))
+ (FLONUM-2-ARGS (? operator)
+ (OBJECT->FLOAT (CONSTANT (? constant)))
+ (REGISTER (? source))
+ (? overflow?)))
+ overflow? ;ignore
+ ((flonum-constant*register/operator operator) target constant source))
(define flonum-methods/2-args
(list 'FLONUM-METHODS/2-ARGS))
-(define (flonum-1-arg%1/operator operation)
- (lookup-arithmetic-method operation flonum-methods/1-arg%1))
+(define flonum-methods/register*constant
+ (list 'FLONUM-METHODS/REGISTER*CONSTANT))
-(define flonum-methods/1-arg%1
- (list 'FLONUM-METHODS/1-ARG%1))
+(define flonum-methods/constant*register
+ (list 'FLONUM-METHODS/CONSTANT*REGISTER))
-(define (flonum-1%1-arg/operator operation)
- (lookup-arithmetic-method operation flonum-methods/1%1-arg))
+(define-integrable (flonum-2-args/operator operator)
+ (lookup-arithmetic-method operator flonum-methods/2-args))
-(define flonum-methods/1%1-arg
- (list 'FLONUM-METHODS/1%1-ARG))
+(define-integrable (flonum-register*constant/operator operator)
+ (lookup-arithmetic-method operator flonum-methods/register*constant))
-(define (binary-flonum-arithmetic? operation)
- (memq operation '(FLONUM-ADD FLONUM-SUBTRACT FLONUM-MULTIPLY FLONUM-DIVIDE)))
+(define-integrable (flonum-constant*register/operator operator)
+ (lookup-arithmetic-method operator flonum-methods/constant*register))
\f
-(let-syntax
- ((define-flonum-operation
- (sc-macro-transformer
- (lambda (form environment)
- environment
- (let ((primitive-name (list-ref form 1))
- (op1%2 (list-ref form 2))
- (op1%2p (list-ref form 3))
- (op2%1 (list-ref form 4))
- (op2%1p (list-ref form 5)))
- `(begin
- (define-arithmetic-method ',primitive-name flonum-methods/2-args
- (flonum-binary-operation
- (lambda (target source1 source2)
- (cond ((= target source1)
- (cond ((zero? target)
- (LAP (,op1%2 (ST 0) (ST ,',source2))))
- ((zero? source2)
- (LAP (,op2%1 (ST ,',target) (ST 0))))
- (else
- (LAP (FLD (ST ,',source2))
- (,op2%1p (ST ,',(1+ target)) (ST 0))))))
- ((= target source2)
- (cond ((zero? target)
- (LAP (,op2%1 (ST 0) (ST ,',source1))))
- ((zero? source1)
- (LAP (,op1%2 (ST ,',target) (ST 0))))
- (else
- (LAP (FLD (ST ,',source1))
- (,op1%2p (ST ,',(1+ target)) (ST 0))))))
- (else
- (LAP (FLD (ST ,',source1))
- (,op1%2 (ST 0) (ST ,',(1+ source2)))
- (FSTP (ST ,',(1+ target)))))))))
-
- (define-arithmetic-method ',primitive-name
- flonum-methods/1%1-arg
- (flonum-unary-operation/general
- (lambda (target source)
- (if (= source target)
- (LAP (FLD1)
- (,op1%2p (ST ,',(1+ target)) (ST 0)))
- (LAP (FLD1)
- (,op1%2 (ST 0) (ST ,',(1+ source)))
- (FSTP (ST ,',(1+ target))))))))
-
- (define-arithmetic-method ',primitive-name
- flonum-methods/1-arg%1
- (flonum-unary-operation/general
- (lambda (target source)
- (if (= source target)
- (LAP (FLD1)
- (,op2%1p (ST ,',(1+ target)) (ST 0)))
- (LAP (FLD1)
- (,op2%1 (ST 0) (ST ,',(1+ source)))
- (FSTP (ST ,',(1+ target))))))))))))))
-
- (define-flonum-operation FLONUM-ADD FADD FADDP FADD FADDP)
- (define-flonum-operation FLONUM-SUBTRACT F%SUB F%SUBP F%SUBR F%SUBPR)
- (define-flonum-operation FLONUM-MULTIPLY FMUL FMULP FMUL FMULP)
- (define-flonum-operation FLONUM-DIVIDE F%DIV F%DIVP F%DIVR F%DIVPR))
-\f
-(define-arithmetic-method 'FLONUM-ATAN2 flonum-methods/2-args
- (lambda (target source1 source2)
- (if (and (not (machine-register? source1))
- (is-alias-for-register? fr0 source1)
- (dead-register? source1))
- (let ((source2 (flonum-source! source2)))
- (delete-dead-registers!)
- (rtl-target:=machine-register! target fr0)
- (LAP (FLD (ST ,source2))
- (FPATAN)))
- (begin
- (prefix-instructions! (load-machine-register! source1 fr0))
- (need-register! fr0)
- (let ((source2
- (if (= source2 source1) fr0 (flonum-source! source2))))
- (delete-dead-registers!)
- (rtl-target:=machine-register! target fr0)
- (LAP (FLD (ST ,source2))
- (FPATAN)))))))
-
-(define-arithmetic-method 'FLONUM-REMAINDER flonum-methods/2-args
- (flonum-binary-operation
- (lambda (target source1 source2)
- (if (zero? source2)
- (LAP (FLD (ST ,source1))
- (FPREM1)
- (FSTP (ST ,(1+ target))))
- #|
- ;; This sequence is one cycle shorter than the one below,
- ;; but needs two spare stack locations instead of one.
- ;; Since FPREM1 is a variable, very slow instruction,
- ;; the difference in time will hardly be noticeable
- ;; but the availability of an extra "register" may be.
- (LAP (FLD (ST ,source2))
- (FLD (ST ,source1))
- (FPREM1)
- (FSTP (ST ,(+ target 2)))
- (FSTP (ST 0))) ; FPOP
- |#
- (LAP (FXCH (ST 0) (ST ,source2))
- (FLD (ST ,(if (zero? source1) source2 source1)))
- (FPREM1)
- (FSTP (ST ,(1+ (if (= target source2)
- 0
- target))))
- (FXCH (ST 0) (ST ,source2)))))))
-\f
-(define-rule statement
- (ASSIGN (REGISTER (? target))
- (FLONUM-2-ARGS FLONUM-SUBTRACT
- (OBJECT->FLOAT (CONSTANT 0.))
- (REGISTER (? source))
- (? overflow?)))
- overflow? ;ignore
- ((flonum-unary-operation/general
- (lambda (target source)
- (if (and (zero? target) (zero? source))
- (LAP (FCHS))
- (LAP (FLD (ST ,source))
- (FCHS)
- (FSTP (ST ,(1+ target)))))))
- target source))
+(define ((flonum-2-args/standard commutative? operate) target source1 source2)
+ (binary-register-operation operate commutative? 'FLOAT
+ (lambda (target source)
+ (LAP (MOVF S D ,target ,source)))
+ target source1 source2))
+
+(define ((flonum-register*constant/standard operate) target source constant)
+ (with-float-operand constant
+ (lambda (operand)
+ (operate
+ (register-reference (move-to-alias-register! source 'FLOAT target))
+ operand))))
+
+;++ Possible improvement, not currently easy with the generic register
+;++ allocator operations provided: if the constant is zero and we have
+;++ a temporary register available, we can zero that with XOR and use
+;++ it in the place of loading a PC-relative double in memory.
+
+(define ((flonum-constant*register/commutative operate) target constant source)
+ (with-float-operand constant
+ (lambda (operand)
+ (operate
+ (register-reference (move-to-alias-register! source 'FLOAT target))
+ operand))))
+
+(define ((flonum-constant*register/noncommutative operate)
+ target constant source)
+ (let* ((source (flonum-source-reference! source))
+ (target (flonum-target-reference! target)))
+ (with-float-operand constant
+ (lambda (operand)
+ (LAP (MOVF S D ,target ,operand)
+ ,@(operate target source))))))
-(define-rule statement
- (ASSIGN (REGISTER (? target))
- (FLONUM-2-ARGS (? operation)
- (REGISTER (? source))
- (OBJECT->FLOAT (CONSTANT 1.))
- (? overflow?)))
- (QUALIFIER (binary-flonum-arithmetic? operation))
- overflow? ;ignore
- ((flonum-1-arg%1/operator operation) target source))
-
-(define-rule statement
- (ASSIGN (REGISTER (? target))
- (FLONUM-2-ARGS (? operation)
- (OBJECT->FLOAT (CONSTANT 1.))
- (REGISTER (? source))
- (? overflow?)))
- (QUALIFIER (binary-flonum-arithmetic? operation))
- overflow? ;ignore
- ((flonum-1%1-arg/operator operation) target source))
+(let-syntax
+ ((binary-operation
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment ;ignore
+ (let ((name (cadr form))
+ (op (caddr form))
+ (commutative? (cadddr form)))
+ `(let ((operate
+ (lambda (target source)
+ (LAP (,op S D ,',target ,',source)))))
+ (define-arithmetic-method ',name flonum-methods/2-args
+ (flonum-2-args/standard ,commutative? operate))
+ (define-arithmetic-method ',name flonum-methods/register*constant
+ (flonum-register*constant/standard operate))
+ (define-arithmetic-method ',name flonum-methods/constant*register
+ (,(if commutative?
+ 'flonum-constant*register/commutative
+ 'flonum-constant*register/noncommutative)
+ operate))))))))
+ (binary-operation FLONUM-ADD ADDF #t)
+ (binary-operation FLONUM-DIVIDE DIVF #f)
+ (binary-operation FLONUM-MULTIPLY MULF #t)
+ (binary-operation FLONUM-SUBTRACT SUBF #f))
\f
;;;; Flonum Predicates
-(define-rule predicate
- (FLONUM-PRED-1-ARG (? predicate) (REGISTER (? source)))
- (flonum-compare-zero predicate source))
-
-(define-rule predicate
- (FLONUM-PRED-2-ARGS (? predicate)
- (REGISTER (? source1))
- (REGISTER (? source2)))
- (let* ((st1 (flonum-source! source1))
- (st2 (flonum-source! source2)))
- (cond ((zero? st1)
- (flonum-branch! predicate
- (LAP (FCOM (ST 0) (ST ,st2)))))
- ((zero? st2)
- (flonum-branch! (commute-flonum-predicate predicate)
- (LAP (FCOM (ST 0) (ST ,st1)))))
- (else
- (flonum-branch! predicate
- (LAP (FLD (ST ,st1))
- (FCOMP (ST 0) (ST ,(1+ st2)))))))))
+(define double-flobits:zero
+ (make-bit-string 64 #f))
(define-rule predicate
- (FLONUM-PRED-2-ARGS (? predicate)
- (REGISTER (? source))
- (OBJECT->FLOAT (CONSTANT 0.)))
- (flonum-compare-zero predicate source))
+ (FLONUM-PRED-1-ARG (? predicate) (REGISTER (? source)))
+ (flonum-branch!
+ predicate
+ (flonum-source-reference! source)
+ (INST-EA (@PCR ,(allocate-double-float-bits-label double-flobits:zero)))))
(define-rule predicate
(FLONUM-PRED-2-ARGS (? predicate)
- (OBJECT->FLOAT (CONSTANT 0.))
- (REGISTER (? source)))
- (flonum-compare-zero (commute-flonum-predicate predicate) source))
+ (REGISTER (? source1))
+ (REGISTER (? source2)))
+ (flonum-branch! predicate
+ (flonum-source-reference! source1)
+ (flonum-source-reference! source2)))
(define-rule predicate
(FLONUM-PRED-2-ARGS (? predicate)
- (REGISTER (? source))
- (OBJECT->FLOAT (CONSTANT 1.)))
- (flonum-compare-one predicate source))
+ (REGISTER (? source))
+ (OBJECT->FLOAT (CONSTANT (? constant))))
+ (with-float-operand constant
+ (lambda (operand)
+ (flonum-branch! predicate (flonum-source-reference! source) operand))))
(define-rule predicate
(FLONUM-PRED-2-ARGS (? predicate)
- (OBJECT->FLOAT (CONSTANT 1.))
- (REGISTER (? source)))
- (flonum-compare-one (commute-flonum-predicate predicate) source))
-\f
-(define (flonum-compare-zero predicate source)
- (let ((sti (flonum-source! source)))
- (if (zero? sti)
- (flonum-branch! predicate
- (LAP (FTST)))
- (flonum-branch! (commute-flonum-predicate predicate)
- (LAP (FLDZ)
- (FCOMP (ST 0) (ST ,(1+ sti))))))))
-
-(define (flonum-compare-one predicate source)
- (let ((sti (flonum-source! source)))
- (flonum-branch! (commute-flonum-predicate predicate)
- (LAP (FLD1)
- (FCOMP (ST 0) (ST ,(1+ sti)))))))
-
-(define (commute-flonum-predicate pred)
- (case pred
+ (OBJECT->FLOAT (CONSTANT (? constant)))
+ (REGISTER (? source)))
+ (with-float-operand constant
+ (lambda (operand)
+ (flonum-branch! (commute-flonum-predicate predicate)
+ (flonum-source-reference! source)
+ operand))))
+
+(define (commute-flonum-predicate predicate)
+ (case predicate
((FLONUM-EQUAL? FLONUM-ZERO?) 'FLONUM-EQUAL?)
((FLONUM-LESS? FLONUM-NEGATIVE?) 'FLONUM-GREATER?)
((FLONUM-GREATER? FLONUM-POSITIVE?) 'FLONUM-LESS?)
- (else
- (error "commute-flonum-predicate: Unknown predicate" pred))))
+ (else (error "commute-flonum-predicate: Unknown predicate" predicate))))
-(define (flonum-branch! predicate prefix)
+(define (flonum-branch! predicate source1 source2)
(case predicate
((FLONUM-EQUAL? FLONUM-ZERO?)
(set-current-branches! (lambda (label)
- (let ((unordered (generate-label 'UNORDERED)))
- (LAP (JP (@PCR ,unordered))
- (JE (@PCR ,label))
- (LABEL ,unordered))))
- (lambda (label)
- (LAP (JNE (@PCR ,label))
- (JP (@PCR ,label))))))
+ (let ((unordered (generate-label 'UNORDERED)))
+ (LAP (JP (@PCR ,unordered))
+ (JE (@PCR ,label))
+ (LABEL ,unordered))))
+ (lambda (label)
+ (LAP (JNE (@PCR ,label))
+ (JP (@PCR ,label))))))
((FLONUM-LESS? FLONUM-NEGATIVE?)
(set-current-branches! (lambda (label)
- (let ((unordered (generate-label 'UNORDERED)))
- (LAP (JP (@PCR ,unordered))
- (JB (@PCR ,label))
- (LABEL ,unordered))))
- (lambda (label)
- (LAP (JAE (@PCR ,label))
- (JP (@PCR ,label))))))
+ (let ((unordered (generate-label 'UNORDERED)))
+ (LAP (JP (@PCR ,unordered))
+ (JB (@PCR ,label))
+ (LABEL ,unordered))))
+ (lambda (label)
+ (LAP (JAE (@PCR ,label))
+ (JP (@PCR ,label))))))
((FLONUM-GREATER? FLONUM-POSITIVE?)
(set-current-branches! (lambda (label)
- (LAP (JA (@PCR ,label))))
- (lambda (label)
- (LAP (JBE (@PCR ,label))))))
+ (LAP (JA (@PCR ,label))))
+ (lambda (label)
+ (LAP (JBE (@PCR ,label))))))
(else
(error "flonum-branch!: Unknown predicate" predicate)))
- (flush-register! eax)
- (LAP ,@prefix
- (FSTSW (R ,eax))
- (SAHF)))
-\f
-;; This is endianness dependent!
-
-(define (flonum-value->data-decl value)
- (let ((high (make-bit-string 32 false))
- (low (make-bit-string 32 false)))
- (read-bits! value 32 high)
- (read-bits! value 64 low)
- (LAP ,@(lap:comment `(FLOAT ,value))
- (LONG U ,(bit-string->unsigned-integer high))
- (LONG U ,(bit-string->unsigned-integer low)))))
-
-(define (flo:32-bit-representation-exact? value)
- ;; Returns unsigned long representation if 32 bit representation
- ;; exists, i.e. if all `1' significant mantissa bits fit in the 32
- ;; bit format and the exponent is within range.
- (let ((mant-diff (make-bit-string (- 52 23) false)))
- (read-bits! value (+ 32 0) mant-diff)
- (and (bit-string-zero? mant-diff)
- (let ((expt64 (make-bit-string 11 false)))
- (read-bits! value (+ 32 52) expt64)
- (let ((expt (- (bit-string->unsigned-integer expt64) 1022)))
- (and (<= -127 expt 127)
- (let ((sign (make-bit-string 1 false))
- (mant32 (make-bit-string 23 false)))
- (read-bits! value (+ 32 52 11) sign)
- (read-bits! value (+ 32 52 -23) mant32)
- (bit-string->unsigned-integer
- (bit-string-append
- (bit-string-append
- mant32
- (unsigned-integer->bit-string 8 (+ 126 expt)))
- sign)))))))))
-
-(define (flonum->label value block-name alignment offset data)
- (let* ((block
- (or (find-extra-code-block block-name)
- (let ((block (declare-extra-code-block! block-name
- 'ANYWHERE
- '())))
- (add-extra-code!
- block
- (LAP (PADDING ,offset ,alignment ,padding-string)))
- block)))
- (pairs (extra-code-block/xtra block))
- (place (assoc value pairs)))
- (if place
- (cdr place)
- (let ((label (generate-label block-name)))
- (set-extra-code-block/xtra!
- block
- (cons (cons value label) pairs))
- (add-extra-code! block
- (LAP (LABEL ,label)
- ,@data))
- label))))
-
-(define (double-flonum->label fp-value)
- (flonum->label fp-value 'DOUBLE-FLOATS 8 0
- (flonum-value->data-decl fp-value)))
-
-(define (single-flonum->label fp-value)
- (flonum->label fp-value 'SINGLE-FLOATS 4 0
- (LAP ,@(lap:comment `(SINGLE-FLOAT ,fp-value))
- (LONG U ,(flo:32-bit-representation-exact? fp-value)))))
-\f
+ (LAP (UCOMISF D ,source1 ,source2)))
+\f
(define-rule statement
(ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (CONSTANT (? fp-value))))
(cond ((not (flo:flonum? fp-value))
- (error "OBJECT->FLOAT: Not a floating-point value" fp-value))
- ((flo:= fp-value 0.0)
- (let ((target (flonum-target! target)))
- (LAP (FLDZ)
- (FSTP (ST ,(1+ target))))))
- ((flo:= fp-value 1.0)
- (let ((target (flonum-target! target)))
- (LAP (FLD1)
- (FSTP (ST ,(1+ target))))))
- (compiler:cross-compiling?
- (let* ((temp (allocate-temporary-register! 'GENERAL))
- (target (flonum-target! target)))
- (LAP ,@(load-constant (register-reference temp) fp-value)
- ,@(object->float temp target))))
- (else
- (let ((target (flonum-target! target)))
- (with-pcr-float fp-value
- (lambda (ea size)
- (LAP (FLD ,size ,ea)
- (FSTP (ST ,(1+ target))))))))))
-
-(define (with-pcr-float fp-value receiver)
- (define (generate-ea label-expr size)
- (with-pc
- (lambda (pc-label pc-register)
- (receiver (INST-EA (@RO W ,pc-register (- ,label-expr ,pc-label)))
- size))))
- (if (flo:32-bit-representation-exact? fp-value)
- (generate-ea (single-flonum->label fp-value) 'S)
- (generate-ea (double-flonum->label fp-value) 'D)))
-|#
+ (error "OBJECT->FLOAT: Not a floating-point value" fp-value))
+ ((flo:= fp-value 0.0)
+ (let ((target (flonum-target-reference! target)))
+ (LAP (XORF P D ,target ,target))))
+ (else
+ (with-float-operand fp-value
+ (lambda (operand)
+ (LAP (MOVF S D ,(flonum-target-reference! target) ,operand)))))))
+
+(define (with-float-operand fp-value receiver)
+ (if (not (flo:flonum? fp-value))
+ (error "Invalid constant flonum operand:" fp-value))
+ (if compiler:cross-compiling?
+ (let ((temp (allocate-temporary-register! 'GENERAL)))
+ (LAP ,@(load-constant (register-reference temp) fp-value)
+ ,@(object->address (register-reference temp))
+ ,@(receiver (INST-EA (@RO ,temp ,flonum-data-offset)))))
+ (receiver (INST-EA (@PCR ,(allocate-double-float-label fp-value))))))
+
+(define (allocate-double-float-bits-label bit-string)
+ (allocate-data-label bit-string 'DOUBLE-FLOATS 0 8
+ (LAP (QUAD U ,(bit-string->unsigned-integer bit-string)))))
+
+(define (allocate-single-float-bits-label bit-string)
+ (allocate-data-label bit-string 'SINGLE-FLOATS 0 4
+ (LAP (LONG U ,(bit-string->unsigned-integer bit-string)))))
+
+(define (allocate-double-float-label flonum)
+ (allocate-double-float-bits-label
+ (let ((bit-string (make-bit-string 64 #f)))
+ ;; Skip the manifest preceding the flonum data. Is there a
+ ;; better way to express this?
+ (let* ((bytes-per-object (vector-ref (gc-space-status) 0))
+ (bits-per-object (* 8 bytes-per-object))
+ (flonum-data-offset-in-bits bits-per-object))
+ (read-bits! flonum flonum-data-offset-in-bits bit-string))
+ bit-string)))
\f
(define-rule rewriting
(OBJECT->FLOAT (REGISTER (? operand register-known-value)))
- (QUALIFIER
- (rtl:constant-flonum-test operand (lambda (v) v #T)))
+ ;; This is not quite what we want. We really want to rewrite all
+ ;; OBJECT->FLOAT expressions with known constant operands, not just
+ ;; the nonzero ones, and then decide later whether to put it in
+ ;; memory based on whether there is a temporary register that we can
+ ;; zero with XOR. By not rewriting this case when the constant is
+ ;; zero, using a temporary may cause some other register to be
+ ;; written to memory, which defeats the purpose of using XOR to
+ ;; avoid memory access.
+ (QUALIFIER (rtl:constant-flonum-test operand flo:nonzero?))
(rtl:make-object->float operand))
(define-rule rewriting
(FLONUM-2-ARGS FLONUM-SUBTRACT
- (REGISTER (? operand-1 register-known-value))
- (? operand-2)
- (? overflow?))
- (QUALIFIER (rtl:constant-flonum-test operand-1 flo:zero?))
- (rtl:make-flonum-2-args 'FLONUM-SUBTRACT operand-1 operand-2 overflow?))
-
-(define-rule rewriting
- (FLONUM-2-ARGS (? operation)
- (REGISTER (? operand-1 register-known-value))
- (? operand-2)
- (? overflow?))
- (QUALIFIER
- (and (memq operation
- '(FLONUM-ADD FLONUM-SUBTRACT FLONUM-MULTIPLY FLONUM-DIVIDE))
- (rtl:constant-flonum-test operand-1 flo:one?)))
- (rtl:make-flonum-2-args operation operand-1 operand-2 overflow?))
-
-(define-rule rewriting
- (FLONUM-2-ARGS (? operation)
- (? operand-1)
- (REGISTER (? operand-2 register-known-value))
- (? overflow?))
- (QUALIFIER
- (and (memq operation
- '(FLONUM-ADD FLONUM-SUBTRACT FLONUM-MULTIPLY FLONUM-DIVIDE))
- (rtl:constant-flonum-test operand-2 flo:one?)))
- (rtl:make-flonum-2-args operation operand-1 operand-2 overflow?))
-
-(define-rule rewriting
- (FLONUM-PRED-2-ARGS (? predicate)
- (? operand-1)
- (REGISTER (? operand-2 register-known-value)))
- (QUALIFIER (rtl:constant-flonum-test operand-2 flo:zero?))
- (list 'FLONUM-PRED-2-ARGS predicate operand-1 operand-2))
-
-(define-rule rewriting
- (FLONUM-PRED-2-ARGS (? predicate)
- (REGISTER (? operand-1 register-known-value))
- (? operand-2))
- (QUALIFIER (rtl:constant-flonum-test operand-1 flo:zero?))
- (list 'FLONUM-PRED-2-ARGS predicate operand-1 operand-2))
-\f
-#|
-;; These don't work as written. They are not simplified and are
-;; therefore passed whole to the back end, and there is no way to
-;; construct the graph at this level.
-
-;; acos (x) = atan ((sqrt (1 - x^2)) / x)
-
-(define-rule pre-cse-rewriting
- (FLONUM-1-ARG FLONUM-ACOS (? operand) #f)
- (rtl:make-flonum-2-args
- 'FLONUM-ATAN2
- (rtl:make-flonum-1-arg
- 'FLONUM-SQRT
- (rtl:make-flonum-2-args
- 'FLONUM-SUBTRACT
- (rtl:make-object->float (rtl:make-constant 1.))
- (rtl:make-flonum-2-args 'FLONUM-MULTIPLY operand operand false)
- false)
- false)
- operand
- false))
-
-;; asin (x) = atan (x / (sqrt (1 - x^2)))
-
-(define-rule pre-cse-rewriting
- (FLONUM-1-ARG FLONUM-ASIN (? operand) #f)
- (rtl:make-flonum-2-args
- 'FLONUM-ATAN2
- operand
- (rtl:make-flonum-1-arg
- 'FLONUM-SQRT
- (rtl:make-flonum-2-args
- 'FLONUM-SUBTRACT
- (rtl:make-object->float (rtl:make-constant 1.))
- (rtl:make-flonum-2-args 'FLONUM-MULTIPLY operand operand false)
- false)
- false)
- false))
-
-|#
+ (REGISTER (? operand1 register-known-value))
+ (? operand2)
+ (? overflow?))
+ (QUALIFIER (rtl:constant-flonum-test operand1 flo:zero?))
+ (rtl:make-flonum-1-arg 'FLONUM-NEGATE operand2 overflow?))
(define (rtl:constant-flonum-test expression predicate)
(and (rtl:object->float? expression)
(and (flo:flonum? n)
(predicate n)))))))
-(define (flo:one? value)
- (flo:= value 1.))
+(define-integrable (flo:nonzero? value)
+ (not (flo:= value 0.)))
\f
;;;; Indexed addressing modes
(MACHINE-CONSTANT (? value)))
(QUALIFIER (and (rtl:float-offset-address? base)
(rtl:simple-subexpressions? base)))
- (if (zero? value)
- (rtl:make-float-offset
- (rtl:float-offset-address-base base)
- (rtl:float-offset-address-offset base))
- (rtl:make-float-offset base (rtl:make-machine-constant value))))
-
-(define-rule rewriting
- (FLOAT-OFFSET (REGISTER (? base register-known-value))
- (MACHINE-CONSTANT (? value)))
- (QUALIFIER
- (and (rtl:offset-address? base)
- (rtl:simple-subexpressions? base)
- (rtl:machine-constant? (rtl:offset-address-offset base))))
(rtl:make-float-offset base (rtl:make-machine-constant value)))
;; This is here to avoid generating things like
ifdef(`WCC386R',
`define(EFR,`$1_')',
`define(EFR,`EPFR($1)')')
-
+\f
define(hook_reference,`EFR(asm_$1)')
define(define_data,`export_label(EVR($1))')
`define(allocate_space,`EVR($1) db $2 dup (0)')',
`define(allocate_space,`EVR($1):
.space $2')')
+
+ifdef(`DASM',
+ `define(define_double,`EVR($1) dq $2')',
+ `define(define_double,`EVR($1): .double $2')')
\f
ifdef(`DASM',
`define(HEX, `0$1H')',
`define(LOF,`$1($2)')')
ifdef(`DASM',
- `define(DOF,`qword ptr $1[$2]')',
- `define(DOF,`$1($2)')')
+ `define(QOF,`qword ptr $1[$2]')',
+ `define(QOF,`$1($2)')')
ifdef(`DASM',
`define(IDX,`dword ptr [$1] [$2]')',
# TAG doesn't work due to m4 stupidity, so define these magic
# constants here. These are computed in terms of the parameters
-# above.
+# above, and ordered lexicographically.
+define(IMM_FALSE, `IMM(HEX(0000000000000000))')
+define(IMM_FIXNUM_0, `IMM(HEX(6800000000000000))')
+define(IMM_FLONUM_0, `IMM(HEX(1800000000000000))')
define(IMM_MANIFEST_NM_VECTOR_1, `IMM(HEX(9c00000000000001))')
define(IMM_TRUE, `IMM(HEX(2000000000000000))')
-define(IMM_FALSE, `IMM(HEX(0000000000000000))')
+
+# Flonums are represented by tagged pointers to the first of two
+# quadwords (sixteen bytes) in memory, the first of which is a
+# non-marked vector manifest of length 1, so that the GC will not
+# trace the other one, which is an IEEE 754 double-precision format
+# value.
+define(FLONUM_DATA_OFFSET,8)
+define(FLONUM_STORAGE_SIZE,16)
define(REGBLOCK_VAL,16)
define(REGBLOCK_COMPILER_TEMP,32)
define_data(C_Frame_Pointer)
allocate_quadword(C_Frame_Pointer)
+
+declare_alignment(8)
+define_double(flonum_zero,0.0)
+define_double(flonum_one,1.0)
\f
DECLARE_CODE_SEGMENT()
declare_alignment(2)
` OP(mov,q) TW(ABS(EVR(RegistersPtr)),regs)',
` OP(lea,q) TW(ABS(EVR(Registers)),regs)')
OP(mov,q) TW(ABS(EVR(Free)),rfree) # Free pointer = %rdi
- OP(mov,q) TW(DOF(REGBLOCK_VAL(),regs),REG(rax)) # Value/dynamic link
+ OP(mov,q) TW(QOF(REGBLOCK_VAL(),regs),REG(rax)) # Value/dynamic link
OP(mov,q) TW(IMM(ADDRESS_MASK),rmask) # = %rbp
OP(mov,q) TW(ABS(EVR(stack_pointer)),REG(rsp))
OP(mov,q) TW(REG(rax),REG(rcx)) # Preserve if used
OP(and,q) TW(rmask,REG(rcx)) # Restore potential dynamic link
- OP(mov,q) TW(REG(rcx),DOF(REGBLOCK_DLINK(),regs))
+ OP(mov,q) TW(REG(rcx),QOF(REGBLOCK_DLINK(),regs))
jmp IJMP(REG(rdx))
IF_WIN32(`
define_jump_indirection(interrupt_continuation_2,3b)
define_hook_label(interrupt_dlink)
- OP(mov,q) TW(DOF(REGBLOCK_DLINK(),regs),REG(rdx))
+ OP(mov,q) TW(QOF(REGBLOCK_DLINK(),regs),REG(rdx))
OP(mov,b) TW(IMM(HEX(19)),REG(al))
jmp scheme_to_interface_call
OP(and,q) TW(rmask,IND(REG(rsp)))
OP(or,b) TW(IMM(TC_FIXNUM),REG(al))
OP(ror,q) TW(IMM(TC_LENGTH),REG(rax))
- OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+ OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs))
+ ret
+
+declare_alignment(2)
+asm_generic_flonum_result:
+ OP(and,q) TW(rmask,IND(REG(rsp)))
+ OP(mov,q) TW(IMM_MANIFEST_NM_VECTOR_1,REG(rcx))
+ OP(mov,q) TW(REG(rcx),IND(rfree))
+ movsd TW(REG(xmm0),QOF(FLONUM_DATA_OFFSET,rfree))
+ OP(mov,q) TW(IMM_FLONUM_0,REG(rax))
+ OP(or,q) TW(rfree,REG(rax))
+ OP(lea,q) TW(QOF(FLONUM_STORAGE_SIZE,rfree),rfree)
+ OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs))
ret
declare_alignment(2)
asm_generic_return_sharp_t:
OP(and,q) TW(rmask,IND(REG(rsp)))
OP(mov,q) TW(IMM_TRUE,REG(rax))
- OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+ OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs))
ret
declare_alignment(2)
asm_generic_return_sharp_f:
OP(and,q) TW(rmask,IND(REG(rsp)))
OP(mov,q) TW(IMM_FALSE,REG(rax))
- OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs))
+ OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs))
ret
\f
define(define_unary_operation,
OP(mov,q) TW(REG(rdx),REG(rax))
OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax))
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
+ je asm_generic_$1_fix
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
jne asm_generic_$1_fail
+asm_generic_$1_flo:
+ OP(and,q) TW(rmask,REG(rdx))
+ movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0))
+ $4 TW(ABS(flonum_one),REG(xmm0))
+ jmp asm_generic_flonum_result
+
asm_generic_$1_fix:
OP(mov,q) TW(REG(rdx),REG(rax))
OP(shl,q) TW(IMM(TC_LENGTH),REG(rax))
OP(mov,q) TW(REG(rdx),REG(rax))
OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax))
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
+ je asm_generic_$1_fix
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
jne asm_generic_$1_fail
+asm_generic_$1_flo:
+ OP(and,q) TW(rmask,REG(rdx))
+ movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0))
+ ucomisd TW(ABS(flonum_zero),REG(xmm0))
+ $3 asm_generic_return_sharp_t
+ jmp asm_generic_return_sharp_f
+
asm_generic_$1_fix:
OP(mov,q) TW(REG(rdx),REG(rax))
OP(shl,q) TW(IMM(TC_LENGTH),REG(rax))
jmp scheme_to_interface')
\f
define(define_binary_operation,
-`define_binary_operation_with_fixup($1,$2,$3,
+`define_binary_operation_with_fixup($1,$2,$3,$4,
`OP(shl,q) TW(IMM(TC_LENGTH),REG(rax))')')
define(define_binary_operation_with_fixup,
OP(cmp,b) TW(REG(al),REG(cl))
jne asm_generic_$1_fail
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
+ je asm_generic_$1_fix
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
jne asm_generic_$1_fail
+asm_generic_$1_flo:
+ OP(and,q) TW(rmask,REG(rdx))
+ OP(and,q) TW(rmask,REG(rbx))
+ movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0))
+ $4 TW(QOF(FLONUM_DATA_OFFSET,REG(rbx)),REG(xmm0))
+ jmp asm_generic_flonum_result
+
asm_generic_$1_fix:
OP(mov,q) TW(REG(rdx),REG(rax))
OP(mov,q) TW(REG(rbx),REG(rcx))
- $4 # Set up rax.
+ $5 # Set up rax.
OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx))
OP($3,q) TW(REG(rcx),REG(rax)) # subq
jno asm_generic_fixnum_result
OP(cmp,b) TW(REG(al),REG(cl))
jne asm_generic_$1_fail
OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
+ je asm_generic_$1_fix
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
jne asm_generic_$1_fail
+asm_generic_$1_flo:
+ OP(and,q) TW(rmask,REG(rdx))
+ OP(and,q) TW(rmask,REG(rbx))
+ movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0))
+ ucomisd TW(QOF(FLONUM_DATA_OFFSET,REG(rbx)),REG(xmm0))
+ $3 asm_generic_return_sharp_t
+ jmp asm_generic_return_sharp_f
+
asm_generic_$1_fix:
OP(shl,q) TW(IMM(TC_LENGTH),REG(rdx))
OP(shl,q) TW(IMM(TC_LENGTH),REG(rbx))
OP(mov,b) TW(IMM(HEX($2)),REG(al))
jmp scheme_to_interface')
\f
-#define_unary_operation(decrement,22,sub)
-#define_unary_operation(increment,26,add)
+# Division is hairy. I'm not sure whether this will do the right
+# thing for infinities and NaNs.
+
+define_hook_label(generic_divide)
+ OP(pop,q) REG(rdx)
+ OP(pop,q) REG(rbx)
+ # We want to divide rdx by rbx. First put the numerator's tag
+ # in al and the denominator's tag in cl.
+ OP(mov,q) TW(REG(rdx),REG(rax))
+ OP(mov,q) TW(REG(rbx),REG(rcx))
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax))
+ OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rcx))
+ OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al))
+ je asm_generic_divide_fix
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(al))
+ jne asm_generic_divide_fail
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
+ je asm_generic_divide_flo_by_flo
+ OP(cmp,b) TW(IMM(TC_FIXNUM),REG(cl))
+ jne asm_generic_divide_fail
+
+asm_generic_divide_flo_by_fix:
+ # Numerator (rdx) is a flonum, denominator (rbx) is a fixnum.
+ OP(mov,q) TW(REG(rbx),REG(rcx))
+ OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx))
+ # Division by zero -- bail.
+ jz asm_generic_divide_fail
+ OP(and,q) TW(rmask,REG(rdx))
+ OP(sar,q) TW(IMM(TC_LENGTH),REG(rcx))
+ movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0))
+ OP(cvtsi2sd,q) TW(REG(rcx),REG(xmm1))
+ divsd TW(REG(xmm1),REG(xmm0))
+ jmp asm_generic_flonum_result
+
+asm_generic_divide_fix:
+ OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl))
+ jne asm_generic_divide_fail
+\f
+asm_generic_divide_fix_by_flo:
+ # Numerator (rdx) is a fixnum, denominator (rbx) is a flonum.
+ OP(mov,q) TW(REG(rbx),REG(rax))
+ OP(and,q) TW(rmask,REG(rax))
+ OP(mov,q) TW(REG(rdx),REG(rcx))
+ movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rax)),REG(xmm1))
+ OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx))
+ jz asm_generic_divide_zero_by_flo
+ OP(sar,q) TW(IMM(TC_LENGTH),REG(rcx))
+ OP(cvtsi2sd,q) TW(REG(rcx),REG(xmm0))
+ divsd TW(REG(xmm1),REG(xmm0))
+ jmp asm_generic_flonum_result
+
+asm_generic_divide_zero_by_flo:
+ # rcx contains zero, representing a numerator exactly zero.
+ # Defer division of 0 by 0.0; otherwise, yield exactly zero.
+ OP(cvtsi2sd,q) TW(REG(rcx),REG(xmm0))
+ ucomisd TW(REG(xmm1),REG(xmm0))
+ je asm_generic_divide_fail
+ OP(and,q) TW(rmask,IND(REG(rsp)))
+ OP(mov,q) TW(IMM_FIXNUM_0,REG(rax))
+ OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs))
+ ret
+
+asm_generic_divide_flo_by_flo:
+ # Numerator (rdx) and denominator (rbx) are both flonums.
+ OP(mov,q) TW(REG(rdx),REG(rax))
+ OP(mov,q) TW(REG(rbx),REG(rcx))
+ OP(and,q) TW(rmask,REG(rax))
+ OP(and,q) TW(rmask,REG(rcx))
+ movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rax)),REG(xmm0))
+ movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rcx)),REG(xmm1))
+ ucomisd TW(ABS(flonum_zero),REG(xmm1))
+ je asm_generic_divide_fail
+ divsd TW(REG(xmm1),REG(xmm0))
+ jmp asm_generic_flonum_result
+
+asm_generic_divide_fail:
+ OP(push,q) REG(rbx)
+ OP(push,q) REG(rdx)
+ OP(mov,b) TW(IMM(HEX(23)),REG(al))
+ jmp scheme_to_interface
+\f
+define_unary_operation(decrement,22,sub,subsd)
+define_unary_operation(increment,26,add,addsd)
-#define_unary_predicate(negative,2a,jl)
-#define_unary_predicate(positive,2c,jg)
-#define_unary_predicate(zero,2d,je)
+# define_unary_predicate(name,index,jcc)
+# define_unary_predicate( $1, $2, $3)
+define_unary_predicate(negative,2a,jl)
+define_unary_predicate(positive,2c,jg)
+define_unary_predicate(zero,2d,je)
-# define_binary_operation(name,index,op)
-# define_binary_operation( $1, $2,$3)
-#define_binary_operation(add,2b,add)
-#define_binary_operation(subtract,28,sub)
+# define_binary_operation(name,index,fxop,flop)
+# define_binary_operation( $1, $2, $3, $4)
+define_binary_operation(add,2b,add,addsd)
+define_binary_operation(subtract,28,sub,subsd)
# No fixup -- leave it unshifted.
-#define_binary_operation_with_fixup(multiply,29,imul)
+define_binary_operation_with_fixup(multiply,29,imul,mulsd)
# define_binary_predicate(name,index,jcc)
-#define_binary_predicate(equal,24,je)
-#define_binary_predicate(greater,25,jg)
-#define_binary_predicate(less,27,jl)
-
-# At the moment, there is no advantage to using the above code, and in
-# fact using it is a waste, since the compiler open-codes the fixnum
-# case already. Later, the above code will also handle floating-point
-# arguments, which the compiler does not open-code.
-
-define_jump_indirection(generic_decrement,22)
-define_jump_indirection(generic_divide,23)
-define_jump_indirection(generic_equal,24)
-define_jump_indirection(generic_greater,25)
-define_jump_indirection(generic_increment,26)
-define_jump_indirection(generic_less,27)
-define_jump_indirection(generic_subtract,28)
-define_jump_indirection(generic_multiply,29)
-define_jump_indirection(generic_negative,2a)
-define_jump_indirection(generic_add,2b)
-define_jump_indirection(generic_positive,2c)
-define_jump_indirection(generic_zero,2d)
+# define_binary_predicate( $1, $2, $3)
+define_binary_predicate(equal,24,je)
+define_binary_predicate(greater,25,jg)
+define_binary_predicate(less,27,jl)
+
+#define_jump_indirection(generic_decrement,22)
+#define_jump_indirection(generic_divide,23)
+#define_jump_indirection(generic_equal,24)
+#define_jump_indirection(generic_greater,25)
+#define_jump_indirection(generic_increment,26)
+#define_jump_indirection(generic_less,27)
+#define_jump_indirection(generic_subtract,28)
+#define_jump_indirection(generic_multiply,29)
+#define_jump_indirection(generic_negative,2a)
+#define_jump_indirection(generic_add,2b)
+#define_jump_indirection(generic_positive,2c)
+#define_jump_indirection(generic_zero,2d)
define_jump_indirection(generic_quotient,37)
define_jump_indirection(generic_remainder,38)
define_jump_indirection(generic_modulo,39)