From: Taylor R Campbell Date: Wed, 11 Nov 2009 20:24:13 +0000 (-0500) Subject: Open-code floating-point primitives on AMD x86-64. X-Git-Tag: 20100708-Gtk~251 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=77873d2cea82ae189b2f2a7cc996b62e1d798974;p=mit-scheme.git Open-code floating-point primitives on AMD x86-64. Only lightly tested. Use with caution. Slippery when wet. Keep out of reach of children. --- diff --git a/src/compiler/machines/x86-64/insmac.scm b/src/compiler/machines/x86-64/insmac.scm index 9f8dede03..559354bdd 100644 --- a/src/compiler/machines/x86-64/insmac.scm +++ b/src/compiler/machines/x86-64/insmac.scm @@ -74,15 +74,20 @@ USA. (sc-macro-transformer (lambda (form environment) environment - (if (syntax-match? '(IDENTIFIER ? SYMBOL) (cdr form)) + (if (syntax-match? '(IDENTIFIER * SYMBOL) (cdr form)) `(DEFINE (,(cadr form) EXPRESSION) (LET ((MATCH-RESULT (PATTERN-LOOKUP ,ea-database-name EXPRESSION))) (AND MATCH-RESULT - ,(if (pair? (cddr form)) - `(LET ((EA (MATCH-RESULT))) - (AND (MEMQ ',(caddr form) (EA/CATEGORIES EA)) - EA)) - `(MATCH-RESULT))))) + ,(let ((categories (cddr form))) + (if (pair? categories) + `(LET ((EA (MATCH-RESULT))) + (AND + (OR + ,@(map (lambda (category) + `(MEMQ ',category (EA/CATEGORIES EA))) + categories)) + EA)) + `(MATCH-RESULT)))))) (ill-formed-syntax form))))) (define (parse-categories categories environment context) @@ -90,7 +95,7 @@ USA. (if (not (and (pair? categories) (eq? 'CATEGORIES (car categories)) (pair? (cdr categories)) - (memq (cadr categories) '(REGISTER MEMORY)) + (memq (cadr categories) '(REGISTER MEMORY XMM)) (null? (cddr categories)))) (error "Malformed CATEGORIES for effective address rule:" categories @@ -215,24 +220,38 @@ USA. (values tail 0)))) (define (collect-prefix options tail environment) - (let loop ((options options) (operand #f) (register #f) (r/m #f)) + (let loop ((options options) (operand #f) (register #f) (r/m #f) (float #f)) (if (pair? options) (case (caar options) - ((OPERAND) (loop (cdr options) (cadar options) register r/m)) + ((OPERAND) (loop (cdr options) (cadar options) register r/m float)) ((OPCODE-REGISTER) (loop (cdr options) operand (or (not (pair? (cdar options))) (cadar options)) - r/m)) + r/m + float)) ((ModR/M) ;; (ModR/M ), for fixed digits ;; (ModR/M ), for registers - (if (pair? (cddar options)) - (loop (cdr options) operand (cadar options) (caddar options)) - (loop (cdr options) operand #f (cadar options)))) + (receive (register r/m) + (if (pair? (cddar options)) + (values (cadar options) (caddar options)) + (values #f (cadar options))) + (loop (cdr options) operand register r/m float))) + ((FLOAT) + ;; (FLOAT ) + (loop (cdr options) operand register r/m (cdar options))) (else (error "Bad instruction prefix option:" (car options)))) - (let ((cons-prefix (close-syntax 'CONS-PREFIX environment))) - `(,cons-prefix ,operand ,register ,r/m ,tail))))) + (if float + (let ((cons-float-prefix + (close-syntax 'CONS-FLOAT-PREFIX environment))) + (if operand + (error "Float instructions can't have operand size prefix:" + operand)) + `(,cons-float-prefix ,register ,r/m ,(car float) ,(cadr float) + ,tail)) + (let ((cons-prefix (close-syntax 'CONS-PREFIX environment))) + `(,cons-prefix ,operand ,register ,r/m ,tail)))))) (define (collect-ModR/M field tail environment) (let ((digit-or-reg (car field)) diff --git a/src/compiler/machines/x86-64/instrf.scm b/src/compiler/machines/x86-64/instrf.scm index bbad64cf6..7e3021d0b 100644 --- a/src/compiler/machines/x86-64/instrf.scm +++ b/src/compiler/machines/x86-64/instrf.scm @@ -23,319 +23,689 @@ USA. |# -;;;; Intel i387/i486 Instruction Set +;;;; AMD x86-64 128-bit Media Instruction Set ;;; package: (compiler lap-syntaxer) -(declare (usual-integrations)) +;;; The mnemonics here don't entirely match the ones in the AMD +;;; manual, or in your typical x86-64 assembler. These mnemonics try +;;; to adhere to a convention of treating operand sizes, precisions, +;;; and packed/scalar choices as arguments to a common mnemonic, where +;;; it is sensible for there to be a choice. Sometimes this is not +;;; entirely clear, such as PSHUF (which works only with longword-size +;;; (32-bit) operands) and PSHUFH/PSHUFL (which work only with +;;; word-size (16-bit) operands). And sometimes this doesn't work +;;; very well (e.g., MOVQ). Most instructions for floating-point +;;; arithmetic have F suffixed to their names; e.g., rather than +;;; ADDSS, ADDSD, ADDPS, and ADDPD, there's a single ADDF mnemonic, to +;;; be used as (ADDF S S ...), (ADDF S D ...), &c. +;;; +;;; Would it have been better just to transcribe exactly the mnemonics +;;; in the AMD manual? Perhaps, and it might have caused fewer errors +;;; in transcription, since there would be fewer different formats +;;; that way. -#| +(declare (usual-integrations)) (let-syntax - ((define-binary-flonum - (sc-macro-transformer - (lambda (form environment) - environment - (let ((mnemonic (list-ref form 1)) - (pmnemonic (list-ref form 2)) - (imnemonic (list-ref form 3)) - (digit (list-ref form 4)) - (opcode1 (list-ref form 5)) - (opcode2 (list-ref form 6))) - `(begin - (define-instruction ,mnemonic - (((ST 0) (ST (? i))) - (BYTE (8 #xd8) - (8 (+ ,opcode1 i)))) - - (((ST (? i)) (ST 0)) - (BYTE (8 #xdc) - (8 (+ ,opcode2 i)))) - - (() - (BYTE (8 #xde) - (8 (+ ,opcode2 1)))) - - ((D (? source mW)) - (BYTE (8 #xdc)) - (ModR/M ,digit source)) - - ((S (? source mW)) - (BYTE (8 #xd8)) - (ModR/M ,digit source))) - - (define-instruction ,pmnemonic - (((ST (? i)) (ST 0)) - (BYTE (8 #xde) - (8 (+ ,opcode2 i))))) - - (define-instruction ,imnemonic - ((L (? source mW)) - (BYTE (8 #xda)) - (ModR/M ,digit source)) - - ((H (? source mW)) - (BYTE (8 #xde)) - (ModR/M ,digit source))))))))) - - ;; The i486 book (and 387, etc.) has inconsistent instruction - ;; descriptions and opcode assignments for FSUB and siblings, - ;; and FDIV and siblings. - ;; FSUB ST(i),ST is described as replacing ST(i) with ST-ST(i) - ;; while the opcode described replaces ST(i) with ST(i)-ST. - - ;; In the following, the F% forms follow the descriptions in the - ;; book, namely, F%SUB computes ST-ST(i) and F%SUBR computes - ;; ST(i)-ST, storing into their destination (first) argument. - - ;; The %-less forms follow the opcodes and usual convention, - ;; namely FSUB computes destination (first) argument - source - ;; argument FSUBR computes source - destination. - - (define-binary-flonum FADD FADDP FIADD 0 #xc0 #xc0) - (define-binary-flonum F%DIV F%DIVP F%IDIV 6 #xf0 #xf0) - (define-binary-flonum F%DIVR F%DIVPR F%IDIVR 7 #xf8 #xf8) - (define-binary-flonum FDIV FDIVP FIDIV 6 #xf0 #xf8) - (define-binary-flonum FDIVR FDIVPR FIDIVR 7 #xf8 #xf0) - (define-binary-flonum FMUL FMULP FIMUL 1 #xc8 #xc8) - (define-binary-flonum F%SUB F%SUBP F%ISUB 4 #xe0 #xe0) - (define-binary-flonum F%SUBR F%SUBPR F%ISUBR 5 #xe8 #xe8) - (define-binary-flonum FSUB FSUBP FISUB 4 #xe0 #xe8) - (define-binary-flonum FSUBR FSUBPR FISUBR 5 #xe8 #xe0)) + ((define-flop-instruction + (sc-macro-transformer + (lambda (form environment) + environment ;ignore + (let ((mnemonic (cadr form)) + (opcode (caddr form))) + `(define-instruction ,mnemonic + (((? p/s float-packed/scalar) + (? p float-precision) + (XMM (? target)) + (? source xmm/m-ea)) + (PREFIX (FLOAT p/s p) (ModR/M target source)) + (BITS (8 #x0F) + (8 ,opcode)) + (ModR/M target source)))))))) + (define-flop-instruction ADDF #x58) + (define-flop-instruction DIVF #x5E) + (define-flop-instruction MAXF #x5F) + (define-flop-instruction MINF #x5D) + (define-flop-instruction MULF #x59) + (define-flop-instruction SQRTF #x51) + (define-flop-instruction SUBF #x5C)) + +(let-syntax ((define-packed-flop-instruction + (sc-macro-transformer + (lambda (form environment) + environment ;ignore + (let ((mnemonic (cadr form)) + (opcode (caddr form))) + `(define-instruction ,mnemonic + ((P D (XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 #xD0)) + (ModR/M target source)) + + ((P S (XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #xF2) + (8 #x0F) + (8 #xD0)) + (ModR/M target source)))))))) + (define-packed-flop-instruction ADDSUBF #xD0) + (define-packed-flop-instruction HADDF #x7C) + (define-packed-flop-instruction HSUBF #x7D)) -(define-trivial-instruction F2XM1 #xd9 #xf0) -(define-trivial-instruction FABS #xd9 #xe1) - -(define-instruction FBLD - (((? source mW)) - (BYTE (8 #xd8)) - (ModR/M 4 source))) - -(define-instruction FBSTP - (((? target mW)) - (BYTE (8 #xdf)) - (ModR/M 6 target))) - -(define-trivial-instruction FCHS #xd9 #xe0) -(define-trivial-instruction FCLEX #x9b #xdb #xe2) ; = (FWAIT) (FNCLEX) -(define-trivial-instruction FNCLEX #xdb #xe2) - -(let-syntax - ((define-flonum-comparison - (sc-macro-transformer - (lambda (form environment) - environment - (let ((mnemonic (cadr form)) - (digit (caddr form)) - (opcode (cadddr form))) - `(define-instruction ,mnemonic - (((ST 0) (ST (? i))) - (BYTE (8 #xd8) - (8 (+ ,opcode i)))) - - (() - (BYTE (8 #xd8) - (8 (+ ,opcode 1)))) - - ((D (? source mW)) - (BYTE (8 #xdc)) - (ModR/M ,digit source)) - - ((S (? source mW)) - (BYTE (8 #xd8)) - (ModR/M ,digit source)))))))) - - (define-flonum-comparison FCOM 2 #xd0) - (define-flonum-comparison FCOMP 3 #xd8)) - -(define-trivial-instruction FCOMPP #xde #xd9) -(define-trivial-instruction FCOS #xd9 #xff) -(define-trivial-instruction FDECSTP #xd9 #xf6) - -(define-instruction FFREE - (((ST (? i))) - (BYTE (8 #xdd) - (8 (+ #xc0 i))))) - -(let-syntax - ((define-flonum-integer-comparison - (sc-macro-transformer - (lambda (form environment) - environment - (let ((mnemonic (cadr form)) - (digit (caddr form))) - `(define-instruction ,mnemonic - ((L (? source mW)) - (BYTE (8 #xda)) - (ModR/M ,digit source)) - - ((H (? source mW)) - (BYTE (8 #xde)) - (ModR/M ,digit source)))))))) - - (define-flonum-integer-comparison FICOM 2) - (define-flonum-integer-comparison FICOMP 3)) +(let-syntax ((define-packed-bitwise-instruction + (sc-macro-transformer + (lambda (form environment) + environment ;ignore + (let ((mnemonic (cadr form)) + (opcode (caddr form))) + `(define-instruction ,mnemonic + ((P + (? p float-precision) + (XMM (? target)) + (? source xmm/m-ea)) + (PREFIX (FLOAT 'P p) (ModR/M target source)) + (BITS (8 #x0F) + (8 ,opcode)) + (ModR/M target source)))))))) + (define-packed-bitwise-instruction ANDNF #x55) + (define-packed-bitwise-instruction ANDF #x54) + (define-packed-bitwise-instruction ORF #x56) + (define-packed-bitwise-instruction XORF #x57) + ;; Not really bitwise instruction, but these two fit the pattern. + (define-packed-bitwise-instruction UNPCKHF #x15) + (define-packed-bitwise-instruction UNPCKLF #x15)) + +(define-instruction CMPF + (((? comparator float-comparator) + (? p/s float-packed/scalar) + (? p float-precision) + (XMM (? source1)) + (? source2 xmm/m-ea)) + (PREFIX (FLOAT p/s p) (ModR/M source1 source2)) + (BITS (8 #x0F) + (8 #xC2)) + (ModR/M source1 source2) + (BITS (8 comparator)))) + +(let-syntax ((define-un/ordered-compare-instruction + (sc-macro-transformer + (lambda (form environment) + environment ;ignore + (let ((mnemonic (cadr form)) + (opcode (caddr form))) + `(define-instruction ,mnemonic + ((D (XMM (? source1)) (? source2 xmm/m-ea)) + (PREFIX (ModR/M source1 source2)) + (BITS (8 #x66) + (8 #x0F) + (8 ,opcode)) + (ModR/M source1 source2)) + + ((S (XMM (? source1)) (? source2 xmm/m-ea)) + (PREFIX (ModR/M source1 source2)) + (BITS (8 #x0F) + (8 ,opcode)) + (ModR/M source1 source2)))))))) + (define-un/ordered-compare-instruction COMISF #x2F) + (define-un/ordered-compare-instruction UCOMISF #x2E)) -(let-syntax - ((define-flonum-integer-memory - (sc-macro-transformer - (lambda (form environment) - environment - (let ((mnemonic (cadr form)) - (digit1 (caddr form)) - (digit2 (cadddr form))) - `(define-instruction ,mnemonic - ,@(if (not digit2) - `() - `(((Q (? source mW)) - (BYTE (8 #xdf)) - (ModR/M ,digit2 source)))) - - ((L (? source mW)) - (BYTE (8 #xdb)) - (ModR/M ,digit1 source)) - - ((H (? source mW)) - (BYTE (8 #xdf)) - (ModR/M ,digit1 source)))))))) - - (define-flonum-integer-memory FILD 0 5) - (define-flonum-integer-memory FIST 2 #f) - (define-flonum-integer-memory FISTP 3 7)) - -(define-trivial-instruction FINCSTP #xd9 #xf7) -(define-trivial-instruction FINIT #x9b #xdb #xe3) ; = (FWAIT) (FNINT) -(define-trivial-instruction FNINIT #xdb #xe3) - -(let-syntax - ((define-flonum-memory - (sc-macro-transformer - (lambda (form environment) - environment - (let ((mnemonic (list-ref form 1)) - (digit1 (list-ref form 2)) - (digit2 (list-ref form 3)) - (opcode1 (list-ref form 4)) - (opcode2 (list-ref form 5))) - `(define-instruction ,mnemonic - (((ST (? i))) - (BYTE (8 ,opcode1) - (8 (+ ,opcode2 i)))) - - ((D (? operand mW)) - (BYTE (8 #xdd)) - (ModR/M ,digit1 operand)) - - ((S (? operand mW)) - (BYTE (8 #xd9)) - (ModR/M ,digit1 operand)) - - ,@(if (not digit2) - `() - `(((X (? operand mW)) - (BYTE (8 #xdb)) - (ModR/M ,digit2 operand)))))))))) - - (define-flonum-memory FLD 0 5 #xd9 #xc0) - (define-flonum-memory FST 2 #f #xdd #xd0) - (define-flonum-memory FSTP 3 7 #xdd #xd8)) - -(define-trivial-instruction FLD1 #xd9 #xe8) -(define-trivial-instruction FLDL2T #xd9 #xe9) -(define-trivial-instruction FLDL2E #xd9 #xea) -(define-trivial-instruction FLDPI #xd9 #xeb) -(define-trivial-instruction FLDLG2 #xd9 #xec) -(define-trivial-instruction FLDLN2 #xd9 #xed) -(define-trivial-instruction FLDZ #xd9 #xee) +(let-syntax ((define-conversion-instruction + (sc-macro-transformer + (lambda (form environment) + environment ;ignore + (let ((mnemonic (cadr form)) + (rules-definitions (cddr form))) + `(define-instruction ,mnemonic + ,@(append-map + (lambda (rules-definition) + (if (not (eq? (car rules-definition) 'DEFINE-RULES)) + (error "Malformed conversion rules definition:" + rules-definition)) + (let ((pattern (cadr rules-definition)) + (prefix-options (caddr rules-definition)) + (rules (cdddr rules-definition))) + (map (lambda (rule) + (let ((conversion (car rule)) + (bytes + (map (lambda (byte) `(8 ,byte)) + (cdr rule)))) + `((,conversion ,@pattern) + (PREFIX ,@prefix-options + (ModR/M reg ea)) + (BITS ,@bytes) + (ModR/M reg ea)))) + rules))) + rules-definitions))))))) + + (define-conversion-instruction CVTF + (define-rules ((XMM (? reg)) (? ea xmm/m-ea)) + () + (DQ->PD #xF3 #x0F #xE6) + (DQ->PS #x0F #x5B) + (PD->DQ #xF2 #x0F #xE6) + (PD->PS #x66 #x0F #x5A) + (PS->DQ #x66 #x0F #x5B) + (PS->PD #x0F #x5A) + (SD->SS #xF2 #x0F #x5A) + (SS->SD #xF3 #x0F #x5A)) + + ;++ SIZE can be only L or Q, not W. + (define-rules ((? size operand-size) (R (? reg)) (? ea xmm/m-ea)) + ((OPERAND size)) + (SD->SI #xF2 #x0F #x2D) + (SS->SI #xF3 #x0F #x2D)) + + (define-rules ((? size operand-size) (XMM (? reg)) (? ea r/m-ea)) + ((OPERAND size)) + (SI->SD #xF2 #x0F #x2A) + (SI->SS #xF3 #x0F #x2A))) + + (define-conversion-instruction CVTFT ;Convert Truncated + (define-rules ((XMM (? reg)) (? ea xmm/m-ea)) + () + (PD->DQ #x66 #x0F #xE6) + (PS->DQ #xF3 #x0F #x5B)) + (define-rules ((? size operand-size) (R (? reg)) (? ea xmm/m-ea)) + () + (SD->SI #xF2 #x0F #x2C) + (SS->SI #xF3 #x0F #x2C)))) -(let-syntax - ((define-flonum-state - (sc-macro-transformer - (lambda (form environment) - environment - (let ((mnemonic (list-ref form 1)) - (opcode (list-ref form 2)) - (digit (list-ref form 3)) - (mnemonic2 (list-ref form 4))) - `(begin - ,@(if (not mnemonic2) - `() - `((define-instruction ,mnemonic2 - (((? source mW)) - (BYTE (8 #x9b) ; (FWAIT) - (8 ,opcode)) - (ModR/M ,digit source))))) - - (define-instruction ,mnemonic - (((? source mW)) - (BYTE (8 ,opcode)) - (ModR/M ,digit source))))))))) - - (define-flonum-state FNLDCW #xd9 5 FLDCW) - (define-flonum-state FLDENV #xd9 4 #f) - (define-flonum-state FNSTCW #xd9 7 FSTCW) - (define-flonum-state FNSTENV #xd9 6 FSTENV) - (define-flonum-state FRSTOR #xdb 4 #f) - (define-flonum-state FNSAVE #xdd 6 FSAVE)) - -(define-trivial-instruction FNOP #xd9 #xd0) -(define-trivial-instruction FPATAN #xd9 #xf3) -(define-trivial-instruction FPREM #xd9 #xf8) ; truncating remainder -(define-trivial-instruction FPREM1 #xd9 #xf5) ; IEEE remainder -(define-trivial-instruction FPTAN #xd9 #xf2) -(define-trivial-instruction FRNDINT #xd9 #xfc) -(define-trivial-instruction FSCALE #xd9 #xfd) -(define-trivial-instruction FSIN #xd9 #xfe) -(define-trivial-instruction FSINCOS #xd9 #xfb) -(define-trivial-instruction FSQRT #xd9 #xfa) - -(define-instruction FSTSW - (((? target mW)) - (BYTE (8 #x9b) ; (FWAIT) - (8 #xdf)) - (ModR/M 7 target)) - - (((R 0)) - (BYTE (8 #x9b) ; (FWAIT) - (8 #xdf) - (8 #xe0)))) - -(define-instruction FNSTSW - (((? target mW)) - (BYTE (8 #xdf)) - (ModR/M 7 target)) - - (((R 0)) - (BYTE (8 #xdf) - (8 #xe0)))) +(define-instruction EXTRQ ;SSE4A only + (((? target xmm-ea) + (&U (? size unsigned-5bit)) + (&U (? position unsigned-5bit))) + (PREFIX (ModR/M target)) + (BITS (8 #x66) + (8 #x0F) + (8 #x78)) + (ModR/M 0 target) + (BITS (8 size UNSIGNED) + (8 position UNSIGNED))) + + (((XMM (? target)) (? source xmm-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 #x79)) + (ModR/M target source))) + +(define-instruction INSERTQ ;SSE4A only + (((XMM (? target)) + (? source xmm-ea) + (&U (? size unsigned-5bit)) + (&U (? position unsigned-5bit))) + (PREFIX (ModR/M target source)) + (BITS (8 #xF2) + (8 #x0F) + (8 #x78)) + (ModR/M target source) + (BITS (8 size UNSIGNED) + (8 position UNSIGNED))) + + (((XMM (? target)) (? source xmm-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #xF2) + (8 #x0F) + (8 #x79)) + (ModR/M target source))) + +(define-instruction FXRSTOR + (((? source m-ea)) + (PREFIX (ModR/M source)) + (BITS (8 #x0F) + (8 #xAE)) + (ModR/M 1 source))) + +(define-instruction FXSAVE + (((? target m-ea)) + (PREFIX (ModR/M target)) + (BITS (8 #x0F) + (8 #xAE)) + (ModR/M 0 target))) + +;;; How does LDDQU differ from MOVDQU? + +(define-instruction LDDQU ;Load Double Quadword Unaligned + (((XMM (? target)) (? source m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #xF2) + (8 #x0F) + (8 #xF0)) + (ModR/M target source))) + +(define-instruction LDMXCSR + (((? source m-ea)) + (PREFIX (ModR/M source)) + (BITS (8 #x0F) + (8 #xAE)) + (ModR/M 2 source))) -(define-trivial-instruction FTST #xd9 #xe4) - -(let-syntax - ((define-binary-flonum - (sc-macro-transformer - (lambda (form environment) - environment - (let ((mnemonic (cadr form)) - (opcode1 (caddr form)) - (opcode2 (cadddr form))) - `(define-instruction ,mnemonic - (((ST 0) (ST (? i))) - (BYTE (8 ,opcode1) - (8 (+ ,opcode2 i)))) - - (() - (BYTE (8 ,opcode1) - (8 (+ ,opcode2 1)))))))))) - - (define-binary-flonum FUCOM #xdd #xe0) - (define-binary-flonum FUCOMP #xdd #xe8) - (define-binary-flonum FXCH #xd9 #xc8)) - -(define-trivial-instruction FUCOMPP #xda #xe9) -(define-trivial-instruction FWAIT #x9b) -(define-trivial-instruction FXAM #xd9 #xe5) -(define-trivial-instruction FXTRACT #xd9 #xf4) -(define-trivial-instruction FYL2X #xd9 #xf1) -(define-trivial-instruction FYL2XP1 #xd9 #xf9) - -|# \ No newline at end of file +(define-instruction MASKMOVDQU + (((@R 7) (XMM (? source1)) (? source2 xmm-ea)) + (PREFIX (ModR/M source1 source2)) + (BITS (8 #x66) + (8 #x0F) + (8 #xF7)) + (ModR/M source1 source2))) + +(define-instruction MOVAF ;Aligned + ((P (? p float-precision) (XMM (? target)) (? source xmm/m-ea)) + (PREFIX (FLOAT 'P p) (ModR/M target source)) + (BITS (8 #x0F) + (8 #x28)) + (ModR/M target source)) + + ((P (? p float-precision) (? target xmm/m-ea) (XMM (? source))) + (PREFIX (FLOAT 'P p) (ModR/M source target)) + (BITS (8 #x0F) + (8 #x29)) + (ModR/M source target))) + +(define-instruction MOVD + ;++ SIZE can be only L or Q, not W. + (((? size operand-size) (XMM (? target)) (? source r/m-ea)) + (PREFIX (OPERAND size) (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 #x6E)) + (ModR/M target source)) + + (((? size operand-size) (? target r/m-ea) (XMM (? source))) + (PREFIX (OPERAND size) (ModR/M source target)) + (BITS (8 #x66) + (8 #x0F) + (8 #x7E)) + (ModR/M source target))) + +(define-instruction MOVDDUP + (((XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #xF2) + (8 #x0F) + (8 #x12)) + (ModR/M target source))) + +(let-syntax ((define-move-dq-instruction + (sc-macro-transformer + (lambda (form environment) + environment ;ignore + (let ((mnemonic (cadr form)) + (prefix (caddr form))) + + `(define-instruction ,mnemonic + (((XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 ,prefix) + (8 #x0F) + (8 #x6F)) + (ModR/M target source)) + + (((? target xmm/m-ea) (XMM (? source))) + (PREFIX (ModR/M source target)) + (BITS (8 ,prefix) + (8 #x0F) + (8 #x7F)) + (ModR/M source target)))))))) + (define-move-dq-instruction MOVDQA #x66) + (define-move-dq-instruction MOVDQU #xF3)) + +(let-syntax ((define-move-high/low-instructions + (sc-macro-transformer + (lambda (form environment) + (let ((MOVxy (cadr form)) + (MOVx (caddr form)) + (opcode1 (cadddr form)) + (opcode2 (car (cddddr form)))) + + `(begin + (define-instruction ,MOVxy + ((P S (XMM (? target)) (? source xmm-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x0F) + (8 ,opcode1)) + (ModR/M target source))) + + (define-instruction ,MOVx + ((P D (XMM (? target)) (? source m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 ,opcode2)) + (ModR/M target source)) + + ((P D (? target m-ea) (XMM (? source))) + (PREFIX (ModR/M source target)) + (BITS (8 #x66) + (8 #x0F) + (8 ,(+ opcode2 1))) + (ModR/M source target)) + + ((P S (XMM (? target)) (? source m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x0F) + (8 ,opcode2)) + (ModR/M target source)) + + ((P S (? target m-ea) (XMM (? source))) + (PREFIX (ModR/M source target)) + (BITS (8 #x0F) + (8 ,(+ opcode2 1))) + (ModR/M source target))))))))) + ;; Note: (MOVL ...) is very different from (MOV L ...)! + (define-move-high/low-instructions MOVHL MOVH #x12 #x16) + (define-move-high/low-instructions MOVLH MOVL #x12 #x16)) + +(define-instruction MOVMSKF + ((P (? p float-precision) (R (? target)) (? source xmm-ea)) + (PREFIX (FLOAT 'P p) (ModR/M target source)) + (BITS (8 #x0F) + (8 #x50)) + (ModR/M target source))) + +(define-instruction MOVNTDQ + (((? target m-ea) (XMM (? source))) + (PREFIX (FLOAT 'P 'D) (ModR/M source target)) + (BITS (8 #x0F) + (8 #xE7)) + (ModR/M source target))) + +(define-instruction MOVNTF + (((? p/s float-packed/scalar) + (? p float-precision) + (? target m-ea) + (XMM (? source))) + (PREFIX (FLOAT p/s p) (ModR/M source target)) + (BITS (8 #x0F) + (8 #x2B)) + (ModR/M source target))) + +;; Note: (MOVQ ...) is very different from (MOV Q ...)! +(define-instruction MOVQ + (((XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #xF3) + (8 #x0F) + (8 #x7E)) + (ModR/M target source)) + + (((? target xmm/m-ea) (XMM (? source))) + (PREFIX (ModR/M source target)) + (BITS (8 #x66) + (8 #x0F) + (8 #xD6)) + (ModR/M source target))) + +;;; Using the mnemonic MOVF avoids conflict with the general MOVS +;;; instruction. + +(define-instruction MOVF + ((S (? p float-precision) (XMM (? target)) (? source xmm/m-ea)) + (PREFIX (FLOAT 'S p) (ModR/M target source)) + (BITS (8 #x0F) + (8 #x10)) + (ModR/M target source)) + + ((S (? p float-precision) (? target xmm/m-ea) (XMM (? source))) + (PREFIX (FLOAT 'S p) (ModR/M source target)) + (BITS (8 #x0F) + (8 #x11)) + (ModR/M source target))) + +(let-syntax ((define-mov/dup-instruction + (sc-macro-transformer + (lambda (form environment) + environment ;ignore + (let ((mnemonic (cadr form)) + (opcode (caddr form))) + `(define-instruction ,mnemonic + (((XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #xF3) + (8 #x0F) + (8 ,opcode)) + (ModR/M target source)))))))) + (define-mov/dup-instruction MOVSHDUP #x16) + (define-mov/dup-instruction MOVSLDUP #x12)) + +(define-instruction MOVUF ;Unaligned + ((P (? p float-precision) (XMM (? target)) (? source xmm/m-ea)) + (PREFIX (FLOAT 'P p) (ModR/M target source)) + (BITS (8 #x0F) + (8 #x10)) + (ModR/M target source)) + ((P (? p float-precision) (? target xmm/m-ea) (XMM (? source))) + (PREFIX (FLOAT 'P p) (ModR/M source target)) + (BITS (8 #x0F) + (8 #x11)) + (ModR/M source target))) + +(let-syntax ((define-packed-sized-instruction + (sc-macro-transformer + (lambda (form environment) + environment ;ignore + (let ((mnemonic (cadr form)) + (size/opcode-list (cddr form))) + `(define-instruction ,mnemonic + ,@(map (lambda (size/opcode) + (let ((size (car size/opcode)) + (opcode (cadr size/opcode))) + `((,size (XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 ,opcode)) + (ModR/M target source)))) + size/opcode-list))))))) + (define-packed-sized-instruction PACKS (B #x63) (UB #x67) (W #x6B)) + (define-packed-sized-instruction PADD (B #xFC) (W #xFD) (L #xFE) (Q #xD4)) + ;++ Should PADDU be considered a separate instruction from PADDS? + (define-packed-sized-instruction PADDS (B #xEC) (W #xED) (UB #xDC) (UW #xDD)) + (define-packed-sized-instruction PAVG (B #xE0) (W #xE3)) + (define-packed-sized-instruction PCMPEQ (B #x74) (W #x75) (L #x76)) + (define-packed-sized-instruction PCMPGT (B #x64) (W #x65) (L #x66)) + (define-packed-sized-instruction PMAX (W #xEE) (UB #xDE)) + (define-packed-sized-instruction PMIN (W #xEA) (UB #xDA)) + (define-packed-sized-instruction PSUB (B #xF8) (W #xF9) (L #xFA) (Q #xFB)) + ;++ Should PSUBSU be considered a separate instruction from PSUBS? + (define-packed-sized-instruction PSUBS (B #xE8) (W #xE9) (UB #xD8) (UW #xD9)) + ;++ Should the size indicate the source or target size? Right now + ;++ it indicates the source size. + (define-packed-sized-instruction PUNPCKH (B #x68) (W #x69) (L #x6A) (Q #x6D)) + (define-packed-sized-instruction PUNPCKL (B #x60) (W #x61) (L #x62) (Q #x6C)) + ) + +(let-syntax ((define-packed-instruction + (sc-macro-transformer + (lambda (form environment) + environment ;ignore + (let ((mnemonic (cadr form)) + (opcode (caddr form))) + `(define-instruction ,mnemonic + (((XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 ,opcode)) + (ModR/M target source)))))))) + (define-packed-instruction PAND #xDB) + (define-packed-instruction PANDN #xDF) + (define-packed-instruction PMADDWL #xF5) + (define-packed-instruction PMULUDQ #xF4) + (define-packed-instruction POR #xEB) + (define-packed-instruction PSADBW #xF6) + (define-packed-instruction PXOR #xEF)) + +(define-instruction PEXTR + ((W (R (? target)) (? source xmm-ea) (&U (? position unsigned-3bit))) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 #xC5)) + (ModR/M target source) + (BITS (8 position UNSIGNED)))) + +(define-instruction PINSR + ((W (XMM (? target)) (? source r-ea) (&U (? position unsigned-3bit))) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 #xC4)) + (ModR/M target source) + (BITS (8 position UNSIGNED)))) + +(define-instruction PMOVMSKB + (((R (? target)) (? source xmm-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 #xD7)) + (ModR/M target source))) + +(define-instruction PMULH + ((W (XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 #xE5)) + (ModR/M target source)) + + ((UW (XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 #xE4)) + (ModR/M target source))) + +(define-instruction PMULL + ((W (XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 #xD5)) + (ModR/M target source))) + +(define-instruction PSHUF + ((L (XMM (? target)) (? source xmm/m-ea) (&U (? wibblethwop unsigned-byte))) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 #x70)) + (ModR/M target source) + (BITS (8 wibblethwop)))) + +(define-instruction PSHUFH + ((W (XMM (? target)) (? source xmm/m-ea) (&U (? zob unsigned-byte))) + (PREFIX (ModR/M target source)) + (BITS (8 #xF3) + (8 #x0F) + (8 #x70)) + (ModR/M target source) + (BITS (8 zob)))) + +;;; Note: (PSHUF L ...) is very different from (PSHUFL ...)! (The +;;; latter must be (PSHUFL W ...) in any case.) + +(define-instruction PSHUFL + ((W (XMM (? target)) (? source xmm/m-ea) (&U (? veeblefitzer unsigned-byte))) + (PREFIX (ModR/M target source)) + (BITS (8 #xF3) + (8 #x0F) + (8 #x70)) + (ModR/M target source) + (BITS (8 veeblefitzer)))) + +(let-syntax ((define-shift-instruction + (sc-macro-transformer + (lambda (form environment) + environment ;ignore + (let ((mnemonic (cadr form)) + (digit (caddr form)) + (dq-digit (cadddr form)) + (opcode (car (cddddr form))) + (size/opcode-list (cddddr form))) + `(define-instruction ,mnemonic + ((DQ (? target xmm-ea) (&U count unsigned-byte)) + (PREFIX (ModR/M target)) + (BITS (8 #x66) + (8 #x0F) + (8 #x73)) + (ModR/M ,dq-digit target) + (BITS (8 count UNSIGNED))) + + (((? size operand-size) + (? target xmm-ea) + (&U (? count unsigned-byte))) + (PREFIX (ModR/M target)) + (BITS (8 #x66) + (8 #x0F) + (8 (case size ((Q) #x73) ((L) #x72) ((W) #x71)))) + (ModR/M ,digit target) + (BITS (8 count UNSIGNED))) + + (((? size operand-size) + (XMM (? target)) + (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 (case size + ((Q) ,(+ opcode 1)) + ((L) ,(+ opcode 2)) + ((W) ,(+ opcode 3))))) + (ModR/M target source)))))))) + (define-shift-instruction PSLL 6 7 #xF0) + (define-shift-instruction PSRL 2 3 #xD0)) + +(define-instruction PSRA + ;++ This does not admit an operand size of Q. + (((? size operand-size) (? target xmm-ea) (&U (? count unsigned-byte))) + (PREFIX (ModR/M target)) + (BITS (8 #x66) + (8 #x0F) + (8 (case size ((L) #x72) ((W) #x71)))) + (ModR/M 4 target) + (BITS (8 count UNSIGNED))) + + (((? size operand-size) (XMM (? target)) (? source xmm/m-ea)) + (PREFIX (ModR/M target source)) + (BITS (8 #x66) + (8 #x0F) + (8 (case size ((L) #xE2) ((W) #xE1)))) + (ModR/M target source))) + +(let-syntax ((define-reciprocal-instruction + (sc-macro-transformer + (lambda (form environment) + (let ((mnemonic (cadr form)) + (opcode (caddr form))) + `(define-instruction ,mnemonic + (((? p/s float-packed/scalar) + S ;Single-precision only. + (XMM (? target)) + (? source xmm/m-ea)) + (PREFIX (FLOAT p/s 'S) (ModR/M target source)) + (BITS (8 #x0F) + (8 ,opcode)) + (ModR/M target source)))))))) + (define-reciprocal-instruction RCPF #x53) + (define-reciprocal-instruction RSQRTF #x52)) + +(define-instruction SHUF + ((P (? p float-precision) + (XMM (? target)) + (? source xmm/m-ea) + (&U (? command unsigned-2bit))) + (PREFIX (FLOAT 'P p) (ModR/M target source)) + (BITS (8 #x0F) + (8 #xC6)) + (ModR/M target source) + (BITS (8 command UNSIGNED)))) + +(define-instruction STMXCSR + (((? target m-ea)) + (PREFIX (ModR/M target)) + (BITS (8 #x0F) + (8 #xAE)) + (ModR/M 3 target))) diff --git a/src/compiler/machines/x86-64/insutl.scm b/src/compiler/machines/x86-64/insutl.scm index 752816b21..fa3ab3b2a 100644 --- a/src/compiler/machines/x86-64/insutl.scm +++ b/src/compiler/machines/x86-64/insutl.scm @@ -42,6 +42,12 @@ USA. (MODE #b11) (R/M (register-bits r))) + ((XMM (? r)) + (CATEGORIES XMM) + (REX (B r)) + (MODE #b11) + (R/M (register-bits r))) + ;;;; Register-indirect ((@R (? r indirect-reg)) @@ -181,8 +187,11 @@ USA. (R/M 5) (BITS (32 offset SIGNED)))) -(define-ea-transformer r/m-ea) +(define-ea-transformer r-ea REGISTER) +(define-ea-transformer xmm-ea XMM) (define-ea-transformer m-ea MEMORY) +(define-ea-transformer r/m-ea REGISTER MEMORY) +(define-ea-transformer xmm/m-ea XMM MEMORY) (define-structure (effective-address (conc-name ea/) @@ -197,10 +206,53 @@ USA. (declare (integrate-operator register-rex)) (define-integrable (register-rex register rex) (declare (integrate register)) - (if (>= register r8) + (if (>= register 8) rex 0)) +(define (cons-ModR/M digit ea tail) + (cons-syntax (ea/register ea) + (cons-syntax digit + (cons-syntax (ea/mode ea) + (append-syntax! (ea/extra ea) tail))))) + +(declare (integrate-operator opcode-register)) +(define (opcode-register opcode register) + (declare (integrate opcode)) + (+ opcode (if (>= register 8) (- register 8) register))) + +(declare (integrate-operator float-comparator)) +(define (float-comparator comparator) + (case comparator + ((=) 0) + ((<) 1) + ((<=) 2) + ((UNORDERED) 3) + ((/=) 4) + ((>=) 5) + ((>) 6) + ((ORDERED) 7) + (else (error "Bad float comparator:" comparator)))) + +(declare (integrate-operator operand-size)) +(define (operand-size s) + ;; B must be handled separately in general. + (case s + ((W L Q) s) + (else #f))) + +(declare (integrate-operator float-packed/scalar)) +(define (float-packed/scalar s) + (case s + ((S P) s) + (else #f))) + +(declare (integrate-operator float-precision)) +(define (float-precision s) + (case s + ((D S) s) + (else #f))) + (define (cons-prefix operand-size register ea tail) (let ((tail (if (eq? operand-size 'W) @@ -222,28 +274,40 @@ USA. (else (error "Invalid operand size:" operand-size))) (let ((extended-register? (or (eqv? register #t) - (and register (>= register r8))))) + (and register (>= register 8))))) (if ea (fix:or (if extended-register? #x44 0) (ea/rex-prefix ea)) (if extended-register? #x41 0))))))) -(define (cons-ModR/M digit ea tail) - (cons-syntax (ea/register ea) - (cons-syntax digit - (cons-syntax (ea/mode ea) - (append-syntax! (ea/extra ea) tail))))) - -(declare (integrate-operator opcode-register)) -(define (opcode-register opcode register) - (declare (integrate opcode)) - (+ opcode (if (>= register r8) (- register r8) register))) +;;; The SSE instructions don't consistently use this pattern for their +;;; opcodes, but enough of them do that this approximate abstraction +;;; helps to clarify the instruction syntax. + +(define (cons-float-prefix register ea packed/scalar precision tail) + (let* ((tail + (let ((float (list packed/scalar precision))) + (if (equal? float '(P S)) + tail + (cons + (syntax-evaluation + (cond ((equal? float '(P D)) #x66) + ((equal? float '(S D)) #xF2) + ((equal? float '(S S)) #xF3) + (else (error "Bad float type:" float))) + coerce-8-bit-unsigned) + tail)))) + (rex-prefix + (let ((extended-register? + (or (eqv? register #t) + (and register (>= register 8))))) + (if ea + (fix:or (if extended-register? #x44 0) (ea/rex-prefix ea)) + (if extended-register? #x41 0))))) + (if (zero? rex-prefix) + tail + (cons-syntax (syntax-evaluation rex-prefix coerce-8-bit-unsigned) + tail)))) -(define (operand-size s) - ;; B must be handled separately in general. - (case s - ((W L Q) s) - (else #f))) - (define-integrable (register-bits r) (fix:and r #b111)) @@ -297,6 +361,18 @@ USA. ((8) #b11) (else false))) +(declare (integrate-operator unsigned-2bit)) +(define (unsigned-2bit value) + (and (<= 0 value #b11) value)) + +(declare (integrate-operator unsigned-3bit)) +(define (unsigned-3bit value) + (and (<= 0 value #b111) value)) + +(declare (integrate-operator unsigned-5bit)) +(define (unsigned-5bit value) + (and (<= 0 value #b11111) value)) + (define (signed-byte value) (and (fits-in-signed-byte? value) value)) @@ -328,7 +404,7 @@ USA. (define (unsigned-quad value) (and (fits-in-unsigned-quad? value) value)) - + (define (sign-extended-byte value) (and (fits-in-signed-byte? value) value)) diff --git a/src/compiler/machines/x86-64/lapgen.scm b/src/compiler/machines/x86-64/lapgen.scm index a28795447..55f9f85c1 100644 --- a/src/compiler/machines/x86-64/lapgen.scm +++ b/src/compiler/machines/x86-64/lapgen.scm @@ -35,48 +35,20 @@ USA. ;; rbp holds the pointer mask ;; rsi holds the register array pointer ;; rdi holds the free pointer - ;++ float - ;; fr7 is not used so that we can always push on the stack once. (list rax rcx rdx rbx r8 r9 r10 r11 r12 r13 r14 r15 - ;++ float - ;; fr0 fr1 fr2 fr3 fr4 fr5 fr6 - ;; mmx0 mmx1 mmx2 mmx3 mmx4 mmx5 mmx6 mmx7 - ;; xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 - ;; xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15 - )) + xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 + xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15)) (define (sort-machine-registers registers) registers) -;++ float - -#; -(define (sort-machine-registers registers) - ;; FR0 is preferable to other FPU regs. We promote it to the front - ;; if we find another FPU reg in front of it. - (let loop ((regs registers)) - (cond ((null? regs) registers) ; no float regs at all - ((general-register? (car regs)); ignore general regs - (loop (cdr regs))) - ((= (car regs) fr0) ; found FR0 first - registers) - ((memq fr0 regs) ; FR0 not first, is it present? - (cons fr0 (delq fr0 registers)) ; move to front - registers) - (else ; FR0 absent - registers)))) - (define (register-type register) (cond ((machine-register? register) (vector-ref '#(GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL GENERAL - ;++ float - ;; FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT ;x87 fp - ;; FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT ;MMX 64bit - ;; MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA ;XMM 128bit - ;; MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA MEDIA - ) + FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT + FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT FLOAT) register)) ((register-value-class=word? register) 'GENERAL) @@ -87,19 +59,14 @@ USA. (define register-reference (let ((references (make-vector number-of-machine-registers))) - (do ((i rax (+ i 1))) - ((> i r15)) - (vector-set! references i (INST-EA (R ,i)))) - ;++ float - ;; (do ((i fr0 (+ i 1))) - ;; ((>= i fr7)) - ;; (vector-set! references i (INST-EA (ST ,(floreg->sti i))))) - ;; (do ((i mmx0 (+ i 1))) - ;; ((>= i mmx7)) - ;; (vector-set! references i (INST-EA (MMX ...)))) - ;; (do ((i xmm0 (+ i 1))) - ;; ((>= i xmm15)) - ;; (vector-set! references i (INST-EA (XMM ...)))) + (do ((r rax (+ r 1)) + (i 0 (+ i 1))) + ((> r r15)) + (vector-set! references r (INST-EA (R ,i)))) + (do ((r xmm0 (+ r 1)) + (i 0 (+ i 1))) + ((> r xmm15)) + (vector-set! references r (INST-EA (XMM ,i)))) (lambda (register) (vector-ref references register)))) @@ -109,10 +76,9 @@ USA. (define (reference->register-transfer source target) (cond ((equal? (register-reference target) source) (LAP)) - ;++ float ((float-register-reference? source) ;; Assume target is a float register - (LAP (FLD ,source))) + (LAP (MOVF S D ,(register-reference target) ,source))) (else (memory->machine-register source target)))) @@ -126,14 +92,9 @@ USA. (define (register->home-transfer source target) (machine->pseudo-register source target)) -;++ float - (define-integrable (float-register-reference? ea) - ea - #f - #; (and (pair? ea) - (eq? (car ea) 'ST))) + (eq? (car ea) 'XMM))) ;;;; Linearizer interface @@ -162,34 +123,22 @@ USA. ;;;; Utilities for the register allocator interface -(define-integrable (machine->machine-register source target) +(define (generate-move register source-ref target-ref) + (if (float-register? register) + (LAP (MOVF S D ,target-ref ,source-ref)) + (LAP (MOV Q ,target-ref ,source-ref)))) + +(define (machine->machine-register source target) (guarantee-registers-compatible source target) - ;++ float - (if (not (float-register? source)) - (LAP (MOV Q ,(register-reference target) ,(register-reference source))) - (let ((ssti (floreg->sti source)) - (tsti (floreg->sti target))) - (if (zero? ssti) - (LAP (FST (ST ,tsti))) - (LAP (FLD (ST ,ssti)) - (FSTP (ST ,(1+ tsti)))))))) + (generate-move source + (register-reference source) + (register-reference target))) (define (machine-register->memory source target) - ;++ float - (if (not (float-register? source)) - (LAP (MOV Q ,target ,(register-reference source))) - (let ((ssti (floreg->sti source))) - (if (zero? ssti) - (LAP (FST D ,target)) - (LAP (FLD (ST ,ssti)) - (FSTP D ,target)))))) + (generate-move source (register-reference source) target)) (define (memory->machine-register source target) - ;++ float - (if (not (float-register? target)) - (LAP (MOV Q ,(register-reference target) ,source)) - (LAP (FLD D ,source) - (FSTP (ST ,(1+ (floreg->sti target))))))) + (generate-move target source (register-reference target))) (define-integrable (offset-referenceable? offset) (byte-offset-referenceable? (* address-units-per-object offset))) @@ -213,7 +162,7 @@ USA. (error "Negative unsigned offset:" offset)) ;; We don't have unsigned addressing modes. (byte-offset-reference register offset)) - + ;;; This returns an offset in objects, not bytes. (define-integrable (pseudo-register-offset register) @@ -226,29 +175,11 @@ USA. (define-integrable (machine->pseudo-register source target) (machine-register->memory source (pseudo-register-home target))) -;++ float - -(define (general-register? register) - register - #t) - -(define (float-register? register) - register - #f) - -(define (floreg->sti reg) - (error "x87 floating-point not supported:" `(FLOREG->STI ,reg))) - -#| -(define-integrable (floreg->sti reg) - (- reg fr0)) - (define-integrable (general-register? register) - (< register fr0)) + (< register xmm0)) (define-integrable (float-register? register) - (<= fr0 register fr7)) -|# + (>= register xmm0)) ;;;; Utilities for the rules @@ -271,6 +202,10 @@ USA. (flush-register! machine-reg) (add-pseudo-register-alias! rtl-reg machine-reg)))) +;;; OBJECT->MACHINE-REGISTER! takes only general registers, not float +;;; registers. Otherwise, (INST-EA (R ,mreg)) would need to be +;;; (register-reference mreg). + (define (object->machine-register! object mreg) ;; This ordering allows LOAD-CONSTANT to use MREG as a temporary. (let ((code (load-constant (INST-EA (R ,mreg)) object))) @@ -391,7 +326,7 @@ USA. temp ;ignore (LAP ,@prefix ,@(receiver operand)))) - + ;;; SIGNED-IMMEDIATE-OPERAND and UNSIGNED-IMMEDIATE-OPERAND abstract ;;; the pattern of performing an operation with an instruction that ;;; takes an immediate operand of 32 bits, but using a value that may @@ -410,6 +345,9 @@ USA. (cond ((fits-in-signed-long? value) (values #f (LAP) operand)) ((fits-in-signed-quad? value) + ;; (values #f + ;; (LAP) + ;; (INST-EA (@PCR ,(allocate-signed-quad-label value)))) (let ((temp (temporary-reference))) (values temp (LAP (MOV Q ,temp ,operand)) temp))) (else @@ -420,10 +358,39 @@ USA. (cond ((fits-in-unsigned-long? value) (values #f (LAP) operand)) ((fits-in-unsigned-quad? value) + ;; (values #f + ;; (LAP) + ;; (INST-EA (@PCR ,(allocate-unsigned-quad-label value)))) (let ((temp (temporary-reference))) (values temp (LAP (MOV Q ,temp ,operand)) temp))) (else (error "Unsigned immediate value too large:" value))))) + +(define (allocate-data-label datum block-name offset alignment data) + (let* ((block + (or (find-extra-code-block block-name) + (let ((block + (declare-extra-code-block! block-name 'ANYWHERE '()))) + (add-extra-code! + block + (LAP (PADDING ,offset ,alignment ,padding-string))) + block))) + (pairs (extra-code-block/xtra block)) + (place (assoc datum pairs))) + (if place + (cdr place) + (let ((label (generate-label block-name))) + (set-extra-code-block/xtra! + block + (cons (cons datum label) pairs)) + (add-extra-code! block (LAP (LABEL ,label) ,@data)) + label)))) + +(define (allocate-unsigned-quad-label quad) + (allocate-data-label quad 'QUADS 0 8 (LAP (QUAD U ,quad)))) + +(define (allocate-signed-quad-label quad) + (allocate-data-label quad 'QUADS 0 8 (LAP (QUAD S ,quad)))) (define (target-register target) (delete-dead-registers!) @@ -461,6 +428,48 @@ USA. (define-integrable (allocate-indirection-register! register) (load-alias-register! register 'GENERAL)) + +(define (binary-register-operation operate commutative? type move + target source1 source2) + (let* ((worst-case + (lambda (target source1 source2) + (LAP ,@(move target source1) + ,@(operate target source2)))) + (new-target-alias! + (lambda () + (let ((source1 (standard-register-reference source1 type #f)) + (source2 (standard-register-reference source2 type #f))) + (delete-dead-registers!) + (worst-case + (register-reference + (or (register-alias target type) + (allocate-alias-register! target type))) + source1 + source2))))) + (cond ((not (pseudo-register? target)) + (if (not (eq? (register-type target) type)) + (error "binary-register-operation: Wrong type register" + target + type) + (worst-case (register-reference target) + (standard-register-reference source1 type #f) + (standard-register-reference source2 type #f)))) + ((register-copy-if-available source1 type target) + => (lambda (get-alias-ref) + (if (= source2 source1) + (let ((ref (get-alias-ref))) + (operate ref ref)) + (let ((source2 + (standard-register-reference source2 type #f))) + (operate (get-alias-ref) source2))))) + ((not commutative?) + (new-target-alias!)) + ((register-copy-if-available source2 type target) + => (lambda (get-alias-ref) + (let ((source1 (standard-register-reference source1 type #f))) + (operate (get-alias-ref) source1)))) + (else + (new-target-alias!))))) (define (with-indexed-address base* index* scale b-offset protect recvr) (let* ((base (allocate-indirection-register! base*)) diff --git a/src/compiler/machines/x86-64/machin.scm b/src/compiler/machines/x86-64/machin.scm index c28e8c63f..f6aa1e57d 100644 --- a/src/compiler/machines/x86-64/machin.scm +++ b/src/compiler/machines/x86-64/machin.scm @@ -169,48 +169,26 @@ USA. (define r14 14) (define r15 15) -;;; x87 floating-point stack locations, allocated as if registers. - -(define fr0 16) -(define fr1 17) -(define fr2 18) -(define fr3 19) -(define fr4 20) -(define fr5 21) -(define fr6 22) -(define fr7 23) - -;;; 64-bit media registers (deprecated). - -(define mmx0 24) -(define mmx1 25) -(define mmx2 26) -(define mmx3 27) -(define mmx4 28) -(define mmx5 29) -(define mmx6 30) -(define mmx7 31) - ;;; 128-bit media registers. -(define xmm0 32) -(define xmm1 33) -(define xmm2 34) -(define xmm3 35) -(define xmm4 36) -(define xmm5 37) -(define xmm6 38) -(define xmm7 39) -(define xmm8 40) -(define xmm9 41) -(define xmm10 42) -(define xmm11 43) -(define xmm12 44) -(define xmm13 45) -(define xmm14 46) -(define xmm15 47) +(define xmm0 16) +(define xmm1 17) +(define xmm2 18) +(define xmm3 19) +(define xmm4 20) +(define xmm5 21) +(define xmm6 22) +(define xmm7 23) +(define xmm8 24) +(define xmm9 25) +(define xmm10 26) +(define xmm11 27) +(define xmm12 28) +(define xmm13 29) +(define xmm14 30) +(define xmm15 31) -(define number-of-machine-registers 16) +(define number-of-machine-registers 32) (define number-of-temporary-registers 256) (define-integrable regnum:stack-pointer rsp) @@ -233,12 +211,8 @@ USA. value-class=address) ((<= r8 register r15) value-class=object) - ((<= fr0 register fr7) - value-class=float) - ((<= mmx0 register mmx7) - (error "MMX media registers not allocated:" register)) ((<= xmm0 register xmm15) - (error "XMM media registers not allocated:" register)) + value-class=float) (else (error "Invalid machine register:" register)))) @@ -392,64 +366,54 @@ USA. (error "Unknown register type" locative))) (define (rtl:constant-cost expression) - ;; i486 clock count for instruction to construct/fetch into register. - (let ((if-integer - (lambda (value) - value ; ignored - ;; Can this be done in fewer bytes for suitably small values? - 1)) ; MOV immediate - (get-pc-cost - (+ 3 ; CALL - 4)) ; POP - (based-reference-cost - 1) ; MOV r/m - (address-offset-cost - 1)) ; LEA instruction - - (define (if-synthesized-constant type datum) - (if-integer (make-non-pointer-literal type datum))) - + ;; Counts derived from the AMD64 Software Optimization Guide, Rev + ;; 3.06, from September 2005. Scaled by two because LEA costs 1/2! + ;; This is pretty silly, but probably better than using i486 clock + ;; counts. + (let ((cost:lea 1) + (cost:mov-mem 6) + (cost:mov-imm 2) + (cost:or 2)) (case (rtl:expression-type expression) ((CONSTANT) (let ((value (rtl:constant-value expression))) (if (non-pointer-object? value) - (if-synthesized-constant (object-type value) - (careful-object-datum value)) - (+ get-pc-cost based-reference-cost)))) + cost:mov-imm + cost:mov-mem))) ((MACHINE-CONSTANT) - (if-integer (rtl:machine-constant-value expression))) - ((ENTRY:PROCEDURE - ENTRY:CONTINUATION) - (+ get-pc-cost address-offset-cost)) - ((ASSIGNMENT-CACHE - VARIABLE-CACHE) - (+ get-pc-cost based-reference-cost)) - ((OFFSET-ADDRESS - BYTE-OFFSET-ADDRESS - FLOAT-OFFSET-ADDRESS) - address-offset-cost) + cost:mov-imm) + ((ENTRY:PROCEDURE ENTRY:CONTINUATION) + (+ cost:mov-imm cost:lea cost:or)) + ((OFFSET-ADDRESS BYTE-OFFSET-ADDRESS FLOAT-OFFSET-ADDRESS) + (receive (offset-selector scale) + (case (rtl:expression-type expression) + ((OFFSET-ADDRESS) + (values rtl:offset-address-offset address-units-per-object)) + ((BYTE-OFFSET-ADDRESS) + (values rtl:byte-offset-address-offset 1)) + ((FLOAT-OFFSET-ADDRESS) + (values rtl:float-offset-address-offset + address-units-per-float))) + (let ((offset (offset-selector expression))) + (if (and (rtl:machine-constant? offset) + (not + (fits-in-signed-long? + (* scale (rtl:machine-constant-value offset))))) + (+ cost:mov-imm cost:lea) + cost:lea)))) ((CONS-POINTER) (and (rtl:machine-constant? (rtl:cons-pointer-type expression)) (rtl:machine-constant? (rtl:cons-pointer-datum expression)) - (if-synthesized-constant - (rtl:machine-constant-value (rtl:cons-pointer-type expression)) - (rtl:machine-constant-value - (rtl:cons-pointer-datum expression))))) - (else - false)))) + cost:mov-imm)) + (else #f)))) (define compiler:open-code-floating-point-arithmetic? - false) + #t) (define compiler:primitives-with-no-open-coding '(DIVIDE-FIXNUM &/ - FLOATING-VECTOR-CONS FLOATING-VECTOR-LENGTH - FLOATING-VECTOR-REF FLOATING-VECTOR-SET! FLONUM-ABS - FLONUM-ACOS FLONUM-ADD FLONUM-ASIN FLONUM-ATAN FLONUM-ATAN2 - FLONUM-CEILING FLONUM-COS FLONUM-DIVIDE FLONUM-EQUAL? - FLONUM-EXP FLONUM-FLOOR FLONUM-GREATER? FLONUM-LESS? - FLONUM-LOG FLONUM-MULTIPLY FLONUM-NEGATE FLONUM-NEGATIVE? - FLONUM-POSITIVE? FLONUM-ROUND FLONUM-SIN FLONUM-SQRT - FLONUM-SUBTRACT FLONUM-TAN FLONUM-TRUNCATE FLONUM-ZERO? - FLONUM? GCD-FIXNUM STRING-ALLOCATE VECTOR-CONS)) \ No newline at end of file + FLOATING-VECTOR-CONS FLONUM-ACOS FLONUM-ASIN FLONUM-ATAN + FLONUM-ATAN2 FLONUM-CEILING FLONUM-COS FLONUM-EXP FLONUM-FLOOR + FLONUM-LOG FLONUM-ROUND FLONUM-SIN FLONUM-TAN FLONUM-TRUNCATE + GCD-FIXNUM STRING-ALLOCATE VECTOR-CONS)) \ No newline at end of file diff --git a/src/compiler/machines/x86-64/rules1.scm b/src/compiler/machines/x86-64/rules1.scm index d65fa052e..d3b265155 100644 --- a/src/compiler/machines/x86-64/rules1.scm +++ b/src/compiler/machines/x86-64/rules1.scm @@ -372,6 +372,7 @@ USA. ,(if signed? (INST-EA (& ,n)) (INST-EA (&U ,n)))) + ;++ Check that SCALE is a valid SIB scale. (LEA Q ,target (@RI ,source ,temp ,scale))))))))))) (define-integrable (load-displaced-register target source n scale) @@ -394,28 +395,64 @@ USA. (define (load-pc-relative-address/typed target type label) ;++ This is pretty horrid, especially since it happens for every - ;++ continuation pushed! Neither alternative is much good. - ;; Twenty bytes. + ;++ continuation pushed! None of the alternatives is much good. + ;; Twenty bytes, but only three instructions and no extra memory. (let ((temp (temporary-register-reference))) (LAP (MOV Q ,temp (&U ,(make-non-pointer-literal type 0))) (LEA Q ,target (@PCR ,label)) (OR Q ,target ,temp))) #| - ;; Nineteen bytes. + ;; Nineteen bytes, but rather complicated (and needs syntax for an + ;; addressing mode not presently supported). + (cond ((zero? type) + (LAP (LEA Q ,target (@PCR ,label)))) + ((zero? (remainder type 8)) + (receive (type-divisor scale scale-log) + (cond ((not (zero? (remainder type #x10))) (values 8 8 3)) + ((not (zero? (remainder type #x20))) (values #x10 4 2)) + ((not (zero? (remainder type #x40))) (values #x20 2 1)) + (else (error "Type too large:" type))) + (let ((offset (quotient type type-divisor))) + (LAP (LEA Q ,target (@PCR ,label)) + (LEA Q ,target (@OI ,offset ,target ,scale)) + (ROR Q ,target (&U ,scale-log)))))) + (else ...)) + |# + #| + ;; This would be brilliant, except that it needs (PC * 2^6)-relative + ;; addressing, rather than PC-relative addressing. + (let* ((reference-point (generate-label 'PC)) + (offset + `(+ ,type + (* ,(expt 2 scheme-type-width) (- ,label ,reference-point))))) + (LAP (LEA Q ,target (@PCO ,offset)) + (LABEL ,reference-point) + (ROR Q ,target (&U ,scheme-type-width)))) + |# + #| + ;; Nineteen bytes and no temporaries, but four instructions. (LAP (LEA Q ,target (@PCR ,label)) (SHL Q ,target (&U ,scheme-type-width)) (OR Q ,target (&U ,type)) (ROR Q ,target (&U ,scheme-type-width))) |# - ;++ This doesn't work because CONSTANT->LABEL will give us a label - ;++ for the Scheme number object, not for the machine bit string. #| - ;; Seventeen bytes -- but we need the label to work. + ;; Seventeen bytes, but this requires reading eight bytes of memory. (let ((temp (temporary-register-reference)) (literal (make-non-pointer-literal type 0))) - (LAP (MOV Q ,temp (@PCR ,(constant->label literal))) + (LAP (MOV Q ,temp (@PCR ,(allocate-unsigned-quad-label literal))) (LEA Q ,target (@PCR ,label)) (OR Q ,target ,temp))) + |# + #| + ;; Fourteen bytes and no temporaries, but this requires reading + ;; eight bytes of memory. + (let* ((reference-point (generate-label 'REFERENCE-POINT)) + (expression + `(+ ,(make-non-pointer-literal type 0) (- ,label ,reference-point)))) + (LAP (LABEL ,reference-point) + (LEA Q ,target (@PCR ,reference-point)) + (ADD Q ,target (@PCR ,(allocate-unsigned-quad-label expression))))) |#) (define (load-char-into-register type source target) @@ -464,17 +501,18 @@ USA. (rtl:offset-address-offset base)))) expression)) -(define (with-decoded-detagged-offset expression recvr) +(define (with-decoded-detagged-offset expression receiver) (let ((base (rtl:offset-base expression))) (let ((base* (rtl:offset-address-base base)) (index (rtl:offset-address-offset base))) - (recvr (rtl:register-number (if (rtl:register? base*) - base* - (rtl:object->address-expression base*))) - (rtl:register-number (if (rtl:register? index) - index - (rtl:object->datum-expression index))) - (rtl:machine-constant-value (rtl:offset-offset expression)))))) + (receiver + (rtl:register-number (if (rtl:register? base*) + base* + (rtl:object->address-expression base*))) + (rtl:register-number (if (rtl:register? index) + index + (rtl:object->datum-expression index))) + (rtl:machine-constant-value (rtl:offset-offset expression)))))) ;;;; Improved string references diff --git a/src/compiler/machines/x86-64/rulfix.scm b/src/compiler/machines/x86-64/rulfix.scm index 9ca807e42..863d17d7b 100644 --- a/src/compiler/machines/x86-64/rulfix.scm +++ b/src/compiler/machines/x86-64/rulfix.scm @@ -316,54 +316,14 @@ USA. FIXNUM-AND FIXNUM-OR FIXNUM-XOR))) - + (define ((fixnum-2-args/standard commutative? operate) target source1 source2 overflow?) overflow? ; ignored - (two-arg-register-operation operate - commutative? - target - source1 - source2)) - -(define (two-arg-register-operation operate commutative? - target source1 source2) - (let* ((worst-case - (lambda (target source1 source2) - (LAP (MOV Q ,target ,source1) - ,@(operate target source2)))) - (new-target-alias! - (lambda () - (let ((source1 (any-reference source1)) - (source2 (any-reference source2))) - (delete-dead-registers!) - (worst-case (target-register-reference target) - source1 - source2))))) - (cond ((not (pseudo-register? target)) - (if (not (eq? (register-type target) 'GENERAL)) - (error "two-arg-register-operation: Wrong type register" - target 'GENERAL) - (worst-case (register-reference target) - (any-reference source1) - (any-reference source2)))) - ((register-copy-if-available source1 'GENERAL target) - => - (lambda (get-alias-ref) - (if (= source2 source1) - (let ((ref (get-alias-ref))) - (operate ref ref)) - (let ((source2 (any-reference source2))) - (operate (get-alias-ref) source2))))) - ((not commutative?) - (new-target-alias!)) - ((register-copy-if-available source2 'GENERAL target) - => - (lambda (get-alias-ref) - (let ((source1 (any-reference source1))) - (operate (get-alias-ref) source1)))) - (else - (new-target-alias!))))) + (binary-register-operation operate commutative? 'GENERAL + (lambda (target source) + (LAP (MOV Q ,target ,source))) + target source1 source2)) (define (fixnum-2-args/register*constant operator target source constant overflow?) @@ -508,11 +468,10 @@ USA. (lambda (target source1 source2 overflow?) overflow? ; ignored (require-register! rcx) - (two-arg-register-operation operate - #f - target - source1 - source2)))) + (binary-register-operation operate #f 'GENERAL + (lambda (target source) + (LAP (MOV Q ,target ,source))) + target source1 source2)))) (define (do-division target source1 source2 result-reg) (prefix-instructions! (load-machine-register! source1 rax)) diff --git a/src/compiler/machines/x86-64/rulflo.scm b/src/compiler/machines/x86-64/rulflo.scm index 885624014..e5ca59fb8 100644 --- a/src/compiler/machines/x86-64/rulflo.scm +++ b/src/compiler/machines/x86-64/rulflo.scm @@ -27,805 +27,369 @@ USA. ;;; package: (compiler lap-syntaxer) (declare (usual-integrations)) - -#| -;; **** -;; Missing: 2 argument operations and predicates with non-trivial -;; constant arguments. -;; Also missing with (OBJECT->FLOAT (REGISTER ...)) operands. -;; **** +(define (flonum-source! source) + (or (register-alias source 'FLOAT) + (load-alias-register! source 'FLOAT))) -(define (flonum-source! register) - (floreg->sti (load-alias-register! register 'FLOAT))) +(define-integrable (flonum-source-reference! source) + (register-reference (flonum-source! source))) -(define (flonum-target! pseudo-register) +(define (flonum-target! target) (delete-dead-registers!) - (floreg->sti (allocate-alias-register! pseudo-register 'FLOAT))) + (or (register-alias target 'FLOAT) + (allocate-alias-register! target 'FLOAT))) + +(define-integrable (flonum-target-reference! target) + (register-reference (flonum-target! target))) + +;;; FLONUM-DATA-OFFSET is the number of bytes after the location +;;; addressed by a FLONUM-tagged pointer before the actual flonum data +;;; begin. -(define (flonum-temporary!) - (allocate-temporary-register! 'FLOAT)) +(define-integrable flonum-data-offset address-units-per-object) (define-rule statement - ;; convert a floating-point number to a flonum object - (ASSIGN (REGISTER (? target)) - (FLOAT->OBJECT (REGISTER (? source)))) - (let* ((source (register-alias source 'FLOAT)) - (target (target-register-reference target))) - (LAP (MOV W (@R ,regnum:free-pointer) - (&U ,(make-non-pointer-literal - (ucode-type manifest-nm-vector) - 2))) - ,@(if (not source) - ;; Value is in memory home - (let ((off (pseudo-register-offset source)) - (temp (temporary-register-reference))) - (LAP (MOV W ,target - ,(offset-reference regnum:regs-pointer off)) - (MOV W ,temp - ,(offset-reference regnum:regs-pointer (1+ off))) - (MOV W (@RO B ,regnum:free-pointer 4) ,target) - (MOV W (@RO B ,regnum:free-pointer 8) ,temp))) - (store-float (floreg->sti source) - (INST-EA (@RO B ,regnum:free-pointer 4)))) - (LEA ,target - (@RO UW ,regnum:free-pointer - ,(make-non-pointer-literal (ucode-type flonum) 0))) - (ADD W (R ,regnum:free-pointer) (& 12))))) - -#| + (ASSIGN (REGISTER (? target)) (FLOAT->OBJECT (REGISTER (? source)))) + (let* ((source (flonum-source-reference! source)) + (target (target-register-reference target))) + (LAP ,@(with-unsigned-immediate-operand + (make-non-pointer-literal (ucode-type MANIFEST-NM-VECTOR) 1) + (lambda (operand) + (LAP (MOV Q (@R ,regnum:free-pointer) ,operand)))) + (MOVF S D (@RO ,regnum:free-pointer ,flonum-data-offset) ,source) + (MOV Q ,target (&U ,(make-non-pointer-literal (ucode-type FLONUM) 0))) + (OR Q ,target (R ,regnum:free-pointer)) + (LEA Q (R ,regnum:free-pointer) + (@RO ,regnum:free-pointer + ,(+ flonum-data-offset address-units-per-float)))))) + (define-rule statement - ;; convert a flonum object to a floating-point number (ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (REGISTER (? source)))) (let* ((source (move-to-temporary-register! source 'GENERAL)) - (target (flonum-target! target))) + (target (flonum-target-reference! target))) (LAP ,@(object->address (register-reference source)) - ,@(load-float (INST-EA (@RO B ,source 4)) target)))) -|# - -(define-rule statement - ;; Convert a flonum object to a floating-point number. Unlike the - ;; version above which has an implicits OBJECT->ADDRESS, this one - ;; uses the addressing mode to remove the type-code. Saves a cycle - ;; and maybe a register spill if SOURCE is live after instruction. - (ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (REGISTER (? source)))) - (let* ((source (source-register source)) - (target (flonum-target! target))) - (object->float source target))) - -(define (object->float source-register target) - (let ((untagging+offset - (- 4 (make-non-pointer-literal (ucode-type flonum) 0)))) - (load-float (INST-EA (@RO W ,source-register ,untagging+offset)) target))) + (MOVF S D ,target (@RO ,source ,flonum-data-offset))))) -;;;; Floating-point vector support. - (define-rule statement (ASSIGN (REGISTER (? target)) (? expression rtl:simple-float-offset?)) (let* ((source (float-offset->reference! expression)) - (target (flonum-target! target))) - (load-float source target))) + (target (flonum-target-reference! target))) + (LAP (MOVF S D ,target ,source)))) (define-rule statement (ASSIGN (? expression rtl:simple-float-offset?) (REGISTER (? source))) - (let ((source (flonum-source! source)) - (target (float-offset->reference! expression))) - (store-float source target))) + (let ((source (flonum-source-reference! source)) + (target (float-offset->reference! expression))) + (LAP (MOVF S D ,target ,source)))) (define-rule statement - (ASSIGN (REGISTER (? target)) - (? expression rtl:detagged-float-offset?)) + (ASSIGN (REGISTER (? target)) (? expression rtl:detagged-float-offset?)) (with-detagged-float-location expression - (lambda (temp) - (load-float temp target)))) + (lambda (source) + (LAP (MOVF S D ,(flonum-target-reference! target) ,source))))) (define-rule statement - (ASSIGN (? expression rtl:detagged-float-offset?) - (REGISTER (? source))) + (ASSIGN (? expression rtl:detagged-float-offset?) (REGISTER (? source))) (with-detagged-float-location expression - (lambda (temp) - (store-float (flonum-source! source) temp)))) + (lambda (target) + (LAP (MOVF S D ,target ,(flonum-source-reference! source)))))) -(define (with-detagged-float-location rtl-expression recvr) - ;; Never needs to protect a register because it is a float register! +(define (with-detagged-float-location rtl-expression receiver) (with-decoded-detagged-float-offset rtl-expression - (lambda (base index w-offset) - (with-indexed-address base index 8 (* 4 w-offset) false recvr)))) + (lambda (base float-index object-offset) + (with-indexed-address base float-index address-units-per-float + (* address-units-per-object object-offset) + ;; No general registers to protect -- the target and source + ;; will always be float registers. + #f + receiver)))) + +;;; These are nearly identical copies of RTL:DETAGGED-OFFSET? and +;;; WITH-DECODED-DETAGGED-OFFSET, with FLOAT-OFFSET substituted for +;;; OFFSET. It is unfortunate that the RTL doesn't have a clearer +;;; abstraction of offsets and addresses with arbitrary data. (define (rtl:detagged-float-offset? expression) (and (rtl:float-offset? expression) - (let ((base (rtl:float-offset-base expression)) - (offset (rtl:float-offset-offset expression))) - (and (rtl:offset-address? base) - (rtl:machine-constant? (rtl:offset-address-offset base)) - (rtl:detagged-index? (rtl:offset-address-base base) - offset))) + (rtl:machine-constant? (rtl:float-offset-offset expression)) + (let ((base (rtl:float-offset-base expression))) + (and (rtl:float-offset-address? base) + (rtl:detagged-index? (rtl:float-offset-address-base base) + (rtl:float-offset-address-offset base)))) expression)) -(define (with-decoded-detagged-float-offset expression recvr) - (let ((base (rtl:float-offset-base expression)) - (index (rtl:float-offset-offset expression))) - (let ((base* (rtl:offset-address-base base))) - (recvr (rtl:register-number (if (rtl:register? base*) - base* - (rtl:object->address-expression base*))) - (rtl:register-number (if (rtl:register? index) - index - (rtl:object->datum-expression index))) - (rtl:machine-constant-value (rtl:offset-address-offset base)))))) - -(define (load-float ea sti) - (LAP (FLD D ,ea) - (FSTP (ST ,(1+ sti))))) - -(define (store-float sti ea) - (if (zero? sti) - (LAP (FST D ,ea)) - (LAP (FLD (ST ,sti)) - (FSTP D ,ea)))) +(define (with-decoded-detagged-float-offset expression receiver) + (let ((base (rtl:float-offset-base expression))) + (let ((base* (rtl:float-offset-address-base base)) + (index (rtl:float-offset-address-offset base))) + (receiver + (rtl:register-number (if (rtl:register? base*) + base* + (rtl:object->address-expression base*))) + (rtl:register-number (if (rtl:register? index) + index + (rtl:object->datum-expression index))) + (rtl:machine-constant-value (rtl:float-offset-offset expression)))))) ;;;; Flonum Arithmetic (define-rule statement (ASSIGN (REGISTER (? target)) - (FLONUM-1-ARG (? operation) (REGISTER (? source)) (? overflow?))) - overflow? ;ignore - ((flonum-1-arg/operator operation) target source)) - -(define ((flonum-unary-operation/general operate) target source) - (define (default) - (let* ((source (flonum-source! source)) - (target (flonum-target! target))) - (operate target source))) - ;; Attempt to reuse source for target if it is in ST(0). - ;; Otherwise we will target ST(0) by sorting the machine registers. - (cond ((and (pseudo-register? target) (pseudo-register? source) - (eqv? fr0 (pseudo-register-alias *register-map* 'FLOAT source))) - (reuse-pseudo-register-alias - source 'FLOAT - (lambda (alias) - (let* ((sti (floreg->sti alias))) - (delete-register! alias) - (delete-dead-registers!) - (add-pseudo-register-alias! target alias) - (operate sti sti))) - default)) - (else (default)))) - -'(define ((flonum-unary-operation/general operate) target source) - (define (default) - (let* ((source (flonum-source! source)) - (target (flonum-target! target))) - (operate target source))) - ;; Attempt to reuse source for target. This works well when the - ;; source is ST(0). We try to arrange this by sorting the registers - ;; to give allocation preference to ST(0). - (cond ((pseudo-register? target) - (reuse-pseudo-register-alias - source 'FLOAT - (lambda (alias) - (let* ((sti (floreg->sti alias))) - (delete-register! alias) - (delete-dead-registers!) - (add-pseudo-register-alias! target alias) - (operate sti sti))) - default)) - (else (default)))) - -'(define ((flonum-unary-operation/general operate) target source) - (define (default) - (let* ((source (flonum-source! source)) - (target (flonum-target! target))) - (operate target source))) - ;; Attempt to reuse source for target. This works well when the - ;; source is ST(0). We try to arrange this by sorting the registers - ;; to give allocation preference to ST(0). - (cond ((pseudo-register? target) - (let ((alias - (and (dead-register? source) - (pseudo-register-alias *register-map* 'FLOAT source)))) - (if alias - (default))) - - (reuse-pseudo-register-alias - source 'FLOAT - (lambda (alias) - (let* ((sti (floreg->sti alias))) - (delete-register! alias) - (delete-dead-registers!) - (add-pseudo-register-alias! target alias) - (operate sti sti))) - default)) - (else (default)))) - -(define (flonum-1-arg/operator operation) + (FLONUM-1-ARG (? operator) (REGISTER (? source)) (? overflow?))) + overflow? ;ignore + (let* ((source (flonum-source-reference! source)) + (target (flonum-target-reference! target))) + ((flonum-1-arg/operator operator) target source))) + +(define-integrable (flonum-1-arg/operator operation) (lookup-arithmetic-method operation flonum-methods/1-arg)) (define flonum-methods/1-arg (list 'FLONUM-METHODS/1-ARG)) - -;;; Notice the weird ,', syntax here. -;;; If LAP changes, this may also have to change. -(let-syntax - ((define-flonum-operation - (sc-macro-transformer - (lambda (form environment) - environment - (let ((primitive-name (cadr form)) - (opcode (caddr form))) - `(define-arithmetic-method ',primitive-name flonum-methods/1-arg - (flonum-unary-operation/general - (lambda (target source) - (if (and (zero? target) (zero? source)) - (LAP (,opcode)) - (LAP (FLD (ST ,', source)) - (,opcode) - (FSTP (ST ,',(1+ target))))))))))))) - (define-flonum-operation FLONUM-NEGATE FCHS) - (define-flonum-operation FLONUM-ABS FABS) - ;; Disabled: FSIN and FCOS limited to pi * 2^62. - ;;(define-flonum-operation FLONUM-SIN FSIN) - ;;(define-flonum-operation FLONUM-COS FCOS) - (define-flonum-operation FLONUM-SQRT FSQRT) - (define-flonum-operation FLONUM-ROUND FRNDINT)) - -;; These (and FLONUM-ROUND above) presume that the default rounding mode -;; is round-to-nearest/even - -(define (define-rounding prim-name mode) - (define-arithmetic-method prim-name flonum-methods/1-arg - (flonum-unary-operation/general - (lambda (target source) - (let ((temp (temporary-register-reference))) - (LAP (FSTCW (@R ,regnum:free-pointer)) - ,@(if (and (zero? target) (zero? source)) - (LAP) - (LAP (FLD (ST ,source)))) - (MOV B ,temp (@RO B ,regnum:free-pointer 1)) - (OR B (@RO B ,regnum:free-pointer 1) (&U ,mode)) - (FNLDCW (@R ,regnum:free-pointer)) - (FRNDINT) - (MOV B (@RO B ,regnum:free-pointer 1) ,temp) - ,@(if (and (zero? target) (zero? source)) - (LAP) - (LAP (FSTP (ST ,(1+ target))))) - (FNLDCW (@R ,regnum:free-pointer)))))))) - -(define-rounding 'FLONUM-CEILING #x08) -(define-rounding 'FLONUM-FLOOR #x04) -(define-rounding 'FLONUM-TRUNCATE #x0c) - -;; This is used in order to avoid using two stack locations for -;; the remainder unary operations. - -(define ((flonum-unary-operation/stack-top operate) target source) - (define (finish source->top) - ;; Perhaps this can be improved? - (rtl-target:=machine-register! target fr0) - (LAP ,@source->top - ,@(operate))) - - (if (or (machine-register? source) - (not (is-alias-for-register? fr0 source)) - (not (dead-register? source))) - (finish (load-machine-register! source fr0)) - (begin - (delete-dead-registers!) - (finish (LAP))))) - -(define-arithmetic-method 'FLONUM-LOG flonum-methods/1-arg - (flonum-unary-operation/stack-top - (lambda () - (LAP (FLDLN2) - (FXCH (ST 0) (ST 1)) - (FYL2X))))) - -(define-arithmetic-method 'FLONUM-EXP flonum-methods/1-arg - (flonum-unary-operation/stack-top - (lambda () - ;; Hair to avoid arithmetic for non-finite inputs: exp(-inf) = 0, - ;; but exp(x) = x for any other non-finite x. We use the first - ;; free slot (1) to pick apart the double format to check for - ;; non-finite inputs, and (2) to avoid using two stack slots. - (let ((temp (temporary-register-reference)) - (infinity-or-nan (generate-label 'INFINITY-OR-NAN)) - (join (generate-label 'JOIN)) - (temp-pointer regnum:free-pointer)) - (LAP (FST D (@R ,temp-pointer)) - (MOV W ,temp (@RO W ,temp-pointer 4)) - (AND W ,temp (&U #x7FFFFFFF)) - (CMP W ,temp (&U #x7FF00000)) - (JAE B (@PCR ,infinity-or-nan)) - ;; Compute 2^(x log_2 e) with F2XM1 and FSCALE. - (FLDL2E) ;st0 = lg e, st1 = x - (FMULP (ST 1) (ST 0)) ;st0 = x lg e - (FLD (ST 0)) ;st0 = x lg e, st1 = x lg e - (FRNDINT) ;st0 = I(x lg e), st1 = x lg e - (FSUB (ST 1) (ST 0)) ;st0 = I(x lg e), st1 = F(x lg e) - (FSTP D (@R ,temp-pointer)) ;st0 = F(x lg e), save I(x lg e) - (F2XM1) ;st0 = 2^F(x lg e) - 1 - (FLD1) ;st0 = 1, st1 = 2^F(x lg e) - 1 - (FADD) ;st0 = 2^F(x lg e) - (FLD D (@R ,temp-pointer)) ;st0 = I(x lg e), st1 = 2^F(x lg e) - (FXCH (ST 0) (ST 1)) ;st0 = 2^F(x lg e), st1 = I(x lg e) - (FSCALE) ;st0 = 2^F(x lg e) * 2^I(x lg e), - ;st1 = I(x lg e) - (FSTP (ST 1)) ;Drop st1, leaving in st0 the value - (JMP B (@PCR ,join)) ; 2^(F(x lg e) + I(x lg e)) = e^x. - (LABEL ,infinity-or-nan) - (CMP W (@RO W ,temp-pointer 4) (&U #xFFF00000)) - (JNE B (@PCR ,join)) - (CMP W (@RO W ,temp-pointer 0) (& 0)) - (JNE B (@PCR ,join)) - (FSTP (ST 0)) ;Pop argument. - (FLDZ) ;Return zero. - (LABEL ,join)))))) +(define ((flonum-unary-operation/target-bits bit-string operate) target source) + (LAP (MOVF S D ,target (@PCR ,(allocate-double-float-bits-label bit-string))) + ,@(operate target source))) + +(define double-flobits:negative-zero + (let ((bit-string (make-bit-string 64 #f))) + (bit-string-set! bit-string 63) + bit-string)) + +(define-arithmetic-method 'FLONUM-ABS flonum-methods/1-arg + (flonum-unary-operation/target-bits + (bit-string-not double-flobits:negative-zero) + (lambda (target source) + ;; No scalar version, but doing this packed is harmless. + (LAP (ANDF P D ,target ,source))))) + +(define-arithmetic-method 'FLONUM-NEGATE flonum-methods/1-arg + (flonum-unary-operation/target-bits + double-flobits:negative-zero + (lambda (target source) + ;; No scalar version, but doing this packed is harmless. + (LAP (XORF P D ,target ,source))))) + +(define-arithmetic-method 'FLONUM-SQRT flonum-methods/1-arg + (lambda (target source) + (LAP (SQRTF S D ,target ,source)))) -#| -;; Disabled: FPTAN limited to pi * 2^62. -(define-arithmetic-method 'FLONUM-TAN flonum-methods/1-arg - (flonum-unary-operation/stack-top - (lambda () - (LAP (FPTAN) - (FSTP (ST 0)) ; FPOP - )))) -|# +(define-rule statement + (ASSIGN (REGISTER (? target)) + (FLONUM-2-ARGS (? operator) + (REGISTER (? source1)) + (REGISTER (? source2)) + (? overflow?))) + overflow? ;ignore + ((flonum-2-args/operator operator) target source1 source2)) -(define-arithmetic-method 'FLONUM-ATAN flonum-methods/1-arg - (flonum-unary-operation/stack-top - (lambda () - (LAP (FLD1) - (FPATAN))))) - -;; For now, these preserve values in memory -;; in order to avoid flushing a stack location. - -(define-arithmetic-method 'FLONUM-ACOS flonum-methods/1-arg - (flonum-unary-operation/stack-top - (lambda () - (LAP (FST D (@R ,regnum:free-pointer)) - (FMUL (ST 0) (ST 0)) - (FLD1) - (F%SUBP (ST 1) (ST 0)) - (FSQRT) - (FLD D (@R ,regnum:free-pointer)) - (FPATAN))))) - -(define-arithmetic-method 'FLONUM-ASIN flonum-methods/1-arg - (flonum-unary-operation/stack-top - (lambda () - (LAP (FST D (@R ,regnum:free-pointer)) - (FMUL (ST 0) (ST 0)) - (FLD1) - (F%SUBP (ST 1) (ST 0)) - (FSQRT) - (FLD D (@R ,regnum:free-pointer)) - (FXCH (ST 0) (ST 1)) - (FPATAN))))) - (define-rule statement (ASSIGN (REGISTER (? target)) - (FLONUM-2-ARGS (? operation) - (REGISTER (? source1)) - (REGISTER (? source2)) - (? overflow?))) - overflow? ;ignore - ((flonum-2-args/operator operation) target source1 source2)) - -;; Binary instructions all use ST(0), and are of the forms -;; Fop ST(0),ST(i) -;; Fop ST(i),ST(0) -;; FopP ST(i),ST(0) -;; Fop ST(0),memory -;; -;; If possible, we like to target ST(0) since it is likely to be the -;; source of a subsequent operation. Failing that, it is good to -;; reuse one of the source aliases. - -(define ((flonum-binary-operation operate) target source1 source2) - (define (default) - (let* ((sti1 (flonum-source! source1)) - (sti2 (flonum-source! source2))) - (operate (flonum-target! target) sti1 sti2))) - (define (try-reuse-1 if-cannot) - (reuse-pseudo-register-alias - source1 'FLOAT - (lambda (alias1) - (let* ((sti1 (floreg->sti alias1)) - (sti2 (if (= source1 source2) - sti1 - (flonum-source! source2)))) - (delete-register! alias1) - (delete-dead-registers!) - (add-pseudo-register-alias! target alias1) - (operate sti1 sti1 sti2))) - if-cannot)) - (define (try-reuse-2 if-cannot) - (reuse-pseudo-register-alias - source2 'FLOAT - (lambda (alias2) - (let* ((sti2 (floreg->sti alias2)) - (sti1 (if (= source1 source2) - sti2 - (flonum-source! source1)))) - (delete-register! alias2) - (delete-dead-registers!) - (add-pseudo-register-alias! target alias2) - (operate sti2 sti1 sti2))) - if-cannot)) - (cond ((pseudo-register? target) - (if (is-alias-for-register? fr0 source1) - (try-reuse-1 (lambda () (try-reuse-2 default))) - (try-reuse-2 (lambda () (try-reuse-1 default))))) - ((not (eq? (register-type target) 'FLOAT)) - (error "flonum-2-args: Wrong type register" target 'FLOAT)) - (else (default)))) - -(define (flonum-2-args/operator operation) - (lookup-arithmetic-method operation flonum-methods/2-args)) + (FLONUM-2-ARGS (? operator) + (REGISTER (? source)) + (OBJECT->FLOAT (CONSTANT (? constant))) + (? overflow?))) + overflow? ;ignore + ((flonum-register*constant/operator operator) target source constant)) + +(define-rule statement + (ASSIGN (REGISTER (? target)) + (FLONUM-2-ARGS (? operator) + (OBJECT->FLOAT (CONSTANT (? constant))) + (REGISTER (? source)) + (? overflow?))) + overflow? ;ignore + ((flonum-constant*register/operator operator) target constant source)) (define flonum-methods/2-args (list 'FLONUM-METHODS/2-ARGS)) -(define (flonum-1-arg%1/operator operation) - (lookup-arithmetic-method operation flonum-methods/1-arg%1)) +(define flonum-methods/register*constant + (list 'FLONUM-METHODS/REGISTER*CONSTANT)) -(define flonum-methods/1-arg%1 - (list 'FLONUM-METHODS/1-ARG%1)) +(define flonum-methods/constant*register + (list 'FLONUM-METHODS/CONSTANT*REGISTER)) -(define (flonum-1%1-arg/operator operation) - (lookup-arithmetic-method operation flonum-methods/1%1-arg)) +(define-integrable (flonum-2-args/operator operator) + (lookup-arithmetic-method operator flonum-methods/2-args)) -(define flonum-methods/1%1-arg - (list 'FLONUM-METHODS/1%1-ARG)) +(define-integrable (flonum-register*constant/operator operator) + (lookup-arithmetic-method operator flonum-methods/register*constant)) -(define (binary-flonum-arithmetic? operation) - (memq operation '(FLONUM-ADD FLONUM-SUBTRACT FLONUM-MULTIPLY FLONUM-DIVIDE))) +(define-integrable (flonum-constant*register/operator operator) + (lookup-arithmetic-method operator flonum-methods/constant*register)) -(let-syntax - ((define-flonum-operation - (sc-macro-transformer - (lambda (form environment) - environment - (let ((primitive-name (list-ref form 1)) - (op1%2 (list-ref form 2)) - (op1%2p (list-ref form 3)) - (op2%1 (list-ref form 4)) - (op2%1p (list-ref form 5))) - `(begin - (define-arithmetic-method ',primitive-name flonum-methods/2-args - (flonum-binary-operation - (lambda (target source1 source2) - (cond ((= target source1) - (cond ((zero? target) - (LAP (,op1%2 (ST 0) (ST ,',source2)))) - ((zero? source2) - (LAP (,op2%1 (ST ,',target) (ST 0)))) - (else - (LAP (FLD (ST ,',source2)) - (,op2%1p (ST ,',(1+ target)) (ST 0)))))) - ((= target source2) - (cond ((zero? target) - (LAP (,op2%1 (ST 0) (ST ,',source1)))) - ((zero? source1) - (LAP (,op1%2 (ST ,',target) (ST 0)))) - (else - (LAP (FLD (ST ,',source1)) - (,op1%2p (ST ,',(1+ target)) (ST 0)))))) - (else - (LAP (FLD (ST ,',source1)) - (,op1%2 (ST 0) (ST ,',(1+ source2))) - (FSTP (ST ,',(1+ target))))))))) - - (define-arithmetic-method ',primitive-name - flonum-methods/1%1-arg - (flonum-unary-operation/general - (lambda (target source) - (if (= source target) - (LAP (FLD1) - (,op1%2p (ST ,',(1+ target)) (ST 0))) - (LAP (FLD1) - (,op1%2 (ST 0) (ST ,',(1+ source))) - (FSTP (ST ,',(1+ target)))))))) - - (define-arithmetic-method ',primitive-name - flonum-methods/1-arg%1 - (flonum-unary-operation/general - (lambda (target source) - (if (= source target) - (LAP (FLD1) - (,op2%1p (ST ,',(1+ target)) (ST 0))) - (LAP (FLD1) - (,op2%1 (ST 0) (ST ,',(1+ source))) - (FSTP (ST ,',(1+ target)))))))))))))) - - (define-flonum-operation FLONUM-ADD FADD FADDP FADD FADDP) - (define-flonum-operation FLONUM-SUBTRACT F%SUB F%SUBP F%SUBR F%SUBPR) - (define-flonum-operation FLONUM-MULTIPLY FMUL FMULP FMUL FMULP) - (define-flonum-operation FLONUM-DIVIDE F%DIV F%DIVP F%DIVR F%DIVPR)) - -(define-arithmetic-method 'FLONUM-ATAN2 flonum-methods/2-args - (lambda (target source1 source2) - (if (and (not (machine-register? source1)) - (is-alias-for-register? fr0 source1) - (dead-register? source1)) - (let ((source2 (flonum-source! source2))) - (delete-dead-registers!) - (rtl-target:=machine-register! target fr0) - (LAP (FLD (ST ,source2)) - (FPATAN))) - (begin - (prefix-instructions! (load-machine-register! source1 fr0)) - (need-register! fr0) - (let ((source2 - (if (= source2 source1) fr0 (flonum-source! source2)))) - (delete-dead-registers!) - (rtl-target:=machine-register! target fr0) - (LAP (FLD (ST ,source2)) - (FPATAN))))))) - -(define-arithmetic-method 'FLONUM-REMAINDER flonum-methods/2-args - (flonum-binary-operation - (lambda (target source1 source2) - (if (zero? source2) - (LAP (FLD (ST ,source1)) - (FPREM1) - (FSTP (ST ,(1+ target)))) - #| - ;; This sequence is one cycle shorter than the one below, - ;; but needs two spare stack locations instead of one. - ;; Since FPREM1 is a variable, very slow instruction, - ;; the difference in time will hardly be noticeable - ;; but the availability of an extra "register" may be. - (LAP (FLD (ST ,source2)) - (FLD (ST ,source1)) - (FPREM1) - (FSTP (ST ,(+ target 2))) - (FSTP (ST 0))) ; FPOP - |# - (LAP (FXCH (ST 0) (ST ,source2)) - (FLD (ST ,(if (zero? source1) source2 source1))) - (FPREM1) - (FSTP (ST ,(1+ (if (= target source2) - 0 - target)))) - (FXCH (ST 0) (ST ,source2))))))) - -(define-rule statement - (ASSIGN (REGISTER (? target)) - (FLONUM-2-ARGS FLONUM-SUBTRACT - (OBJECT->FLOAT (CONSTANT 0.)) - (REGISTER (? source)) - (? overflow?))) - overflow? ;ignore - ((flonum-unary-operation/general - (lambda (target source) - (if (and (zero? target) (zero? source)) - (LAP (FCHS)) - (LAP (FLD (ST ,source)) - (FCHS) - (FSTP (ST ,(1+ target))))))) - target source)) +(define ((flonum-2-args/standard commutative? operate) target source1 source2) + (binary-register-operation operate commutative? 'FLOAT + (lambda (target source) + (LAP (MOVF S D ,target ,source))) + target source1 source2)) + +(define ((flonum-register*constant/standard operate) target source constant) + (with-float-operand constant + (lambda (operand) + (operate + (register-reference (move-to-alias-register! source 'FLOAT target)) + operand)))) + +;++ Possible improvement, not currently easy with the generic register +;++ allocator operations provided: if the constant is zero and we have +;++ a temporary register available, we can zero that with XOR and use +;++ it in the place of loading a PC-relative double in memory. + +(define ((flonum-constant*register/commutative operate) target constant source) + (with-float-operand constant + (lambda (operand) + (operate + (register-reference (move-to-alias-register! source 'FLOAT target)) + operand)))) + +(define ((flonum-constant*register/noncommutative operate) + target constant source) + (let* ((source (flonum-source-reference! source)) + (target (flonum-target-reference! target))) + (with-float-operand constant + (lambda (operand) + (LAP (MOVF S D ,target ,operand) + ,@(operate target source)))))) -(define-rule statement - (ASSIGN (REGISTER (? target)) - (FLONUM-2-ARGS (? operation) - (REGISTER (? source)) - (OBJECT->FLOAT (CONSTANT 1.)) - (? overflow?))) - (QUALIFIER (binary-flonum-arithmetic? operation)) - overflow? ;ignore - ((flonum-1-arg%1/operator operation) target source)) - -(define-rule statement - (ASSIGN (REGISTER (? target)) - (FLONUM-2-ARGS (? operation) - (OBJECT->FLOAT (CONSTANT 1.)) - (REGISTER (? source)) - (? overflow?))) - (QUALIFIER (binary-flonum-arithmetic? operation)) - overflow? ;ignore - ((flonum-1%1-arg/operator operation) target source)) +(let-syntax + ((binary-operation + (sc-macro-transformer + (lambda (form environment) + environment ;ignore + (let ((name (cadr form)) + (op (caddr form)) + (commutative? (cadddr form))) + `(let ((operate + (lambda (target source) + (LAP (,op S D ,',target ,',source))))) + (define-arithmetic-method ',name flonum-methods/2-args + (flonum-2-args/standard ,commutative? operate)) + (define-arithmetic-method ',name flonum-methods/register*constant + (flonum-register*constant/standard operate)) + (define-arithmetic-method ',name flonum-methods/constant*register + (,(if commutative? + 'flonum-constant*register/commutative + 'flonum-constant*register/noncommutative) + operate)))))))) + (binary-operation FLONUM-ADD ADDF #t) + (binary-operation FLONUM-DIVIDE DIVF #f) + (binary-operation FLONUM-MULTIPLY MULF #t) + (binary-operation FLONUM-SUBTRACT SUBF #f)) ;;;; Flonum Predicates -(define-rule predicate - (FLONUM-PRED-1-ARG (? predicate) (REGISTER (? source))) - (flonum-compare-zero predicate source)) - -(define-rule predicate - (FLONUM-PRED-2-ARGS (? predicate) - (REGISTER (? source1)) - (REGISTER (? source2))) - (let* ((st1 (flonum-source! source1)) - (st2 (flonum-source! source2))) - (cond ((zero? st1) - (flonum-branch! predicate - (LAP (FCOM (ST 0) (ST ,st2))))) - ((zero? st2) - (flonum-branch! (commute-flonum-predicate predicate) - (LAP (FCOM (ST 0) (ST ,st1))))) - (else - (flonum-branch! predicate - (LAP (FLD (ST ,st1)) - (FCOMP (ST 0) (ST ,(1+ st2))))))))) +(define double-flobits:zero + (make-bit-string 64 #f)) (define-rule predicate - (FLONUM-PRED-2-ARGS (? predicate) - (REGISTER (? source)) - (OBJECT->FLOAT (CONSTANT 0.))) - (flonum-compare-zero predicate source)) + (FLONUM-PRED-1-ARG (? predicate) (REGISTER (? source))) + (flonum-branch! + predicate + (flonum-source-reference! source) + (INST-EA (@PCR ,(allocate-double-float-bits-label double-flobits:zero))))) (define-rule predicate (FLONUM-PRED-2-ARGS (? predicate) - (OBJECT->FLOAT (CONSTANT 0.)) - (REGISTER (? source))) - (flonum-compare-zero (commute-flonum-predicate predicate) source)) + (REGISTER (? source1)) + (REGISTER (? source2))) + (flonum-branch! predicate + (flonum-source-reference! source1) + (flonum-source-reference! source2))) (define-rule predicate (FLONUM-PRED-2-ARGS (? predicate) - (REGISTER (? source)) - (OBJECT->FLOAT (CONSTANT 1.))) - (flonum-compare-one predicate source)) + (REGISTER (? source)) + (OBJECT->FLOAT (CONSTANT (? constant)))) + (with-float-operand constant + (lambda (operand) + (flonum-branch! predicate (flonum-source-reference! source) operand)))) (define-rule predicate (FLONUM-PRED-2-ARGS (? predicate) - (OBJECT->FLOAT (CONSTANT 1.)) - (REGISTER (? source))) - (flonum-compare-one (commute-flonum-predicate predicate) source)) - -(define (flonum-compare-zero predicate source) - (let ((sti (flonum-source! source))) - (if (zero? sti) - (flonum-branch! predicate - (LAP (FTST))) - (flonum-branch! (commute-flonum-predicate predicate) - (LAP (FLDZ) - (FCOMP (ST 0) (ST ,(1+ sti)))))))) - -(define (flonum-compare-one predicate source) - (let ((sti (flonum-source! source))) - (flonum-branch! (commute-flonum-predicate predicate) - (LAP (FLD1) - (FCOMP (ST 0) (ST ,(1+ sti))))))) - -(define (commute-flonum-predicate pred) - (case pred + (OBJECT->FLOAT (CONSTANT (? constant))) + (REGISTER (? source))) + (with-float-operand constant + (lambda (operand) + (flonum-branch! (commute-flonum-predicate predicate) + (flonum-source-reference! source) + operand)))) + +(define (commute-flonum-predicate predicate) + (case predicate ((FLONUM-EQUAL? FLONUM-ZERO?) 'FLONUM-EQUAL?) ((FLONUM-LESS? FLONUM-NEGATIVE?) 'FLONUM-GREATER?) ((FLONUM-GREATER? FLONUM-POSITIVE?) 'FLONUM-LESS?) - (else - (error "commute-flonum-predicate: Unknown predicate" pred)))) + (else (error "commute-flonum-predicate: Unknown predicate" predicate)))) -(define (flonum-branch! predicate prefix) +(define (flonum-branch! predicate source1 source2) (case predicate ((FLONUM-EQUAL? FLONUM-ZERO?) (set-current-branches! (lambda (label) - (let ((unordered (generate-label 'UNORDERED))) - (LAP (JP (@PCR ,unordered)) - (JE (@PCR ,label)) - (LABEL ,unordered)))) - (lambda (label) - (LAP (JNE (@PCR ,label)) - (JP (@PCR ,label)))))) + (let ((unordered (generate-label 'UNORDERED))) + (LAP (JP (@PCR ,unordered)) + (JE (@PCR ,label)) + (LABEL ,unordered)))) + (lambda (label) + (LAP (JNE (@PCR ,label)) + (JP (@PCR ,label)))))) ((FLONUM-LESS? FLONUM-NEGATIVE?) (set-current-branches! (lambda (label) - (let ((unordered (generate-label 'UNORDERED))) - (LAP (JP (@PCR ,unordered)) - (JB (@PCR ,label)) - (LABEL ,unordered)))) - (lambda (label) - (LAP (JAE (@PCR ,label)) - (JP (@PCR ,label)))))) + (let ((unordered (generate-label 'UNORDERED))) + (LAP (JP (@PCR ,unordered)) + (JB (@PCR ,label)) + (LABEL ,unordered)))) + (lambda (label) + (LAP (JAE (@PCR ,label)) + (JP (@PCR ,label)))))) ((FLONUM-GREATER? FLONUM-POSITIVE?) (set-current-branches! (lambda (label) - (LAP (JA (@PCR ,label)))) - (lambda (label) - (LAP (JBE (@PCR ,label)))))) + (LAP (JA (@PCR ,label)))) + (lambda (label) + (LAP (JBE (@PCR ,label)))))) (else (error "flonum-branch!: Unknown predicate" predicate))) - (flush-register! eax) - (LAP ,@prefix - (FSTSW (R ,eax)) - (SAHF))) - -;; This is endianness dependent! - -(define (flonum-value->data-decl value) - (let ((high (make-bit-string 32 false)) - (low (make-bit-string 32 false))) - (read-bits! value 32 high) - (read-bits! value 64 low) - (LAP ,@(lap:comment `(FLOAT ,value)) - (LONG U ,(bit-string->unsigned-integer high)) - (LONG U ,(bit-string->unsigned-integer low))))) - -(define (flo:32-bit-representation-exact? value) - ;; Returns unsigned long representation if 32 bit representation - ;; exists, i.e. if all `1' significant mantissa bits fit in the 32 - ;; bit format and the exponent is within range. - (let ((mant-diff (make-bit-string (- 52 23) false))) - (read-bits! value (+ 32 0) mant-diff) - (and (bit-string-zero? mant-diff) - (let ((expt64 (make-bit-string 11 false))) - (read-bits! value (+ 32 52) expt64) - (let ((expt (- (bit-string->unsigned-integer expt64) 1022))) - (and (<= -127 expt 127) - (let ((sign (make-bit-string 1 false)) - (mant32 (make-bit-string 23 false))) - (read-bits! value (+ 32 52 11) sign) - (read-bits! value (+ 32 52 -23) mant32) - (bit-string->unsigned-integer - (bit-string-append - (bit-string-append - mant32 - (unsigned-integer->bit-string 8 (+ 126 expt))) - sign))))))))) - -(define (flonum->label value block-name alignment offset data) - (let* ((block - (or (find-extra-code-block block-name) - (let ((block (declare-extra-code-block! block-name - 'ANYWHERE - '()))) - (add-extra-code! - block - (LAP (PADDING ,offset ,alignment ,padding-string))) - block))) - (pairs (extra-code-block/xtra block)) - (place (assoc value pairs))) - (if place - (cdr place) - (let ((label (generate-label block-name))) - (set-extra-code-block/xtra! - block - (cons (cons value label) pairs)) - (add-extra-code! block - (LAP (LABEL ,label) - ,@data)) - label)))) - -(define (double-flonum->label fp-value) - (flonum->label fp-value 'DOUBLE-FLOATS 8 0 - (flonum-value->data-decl fp-value))) - -(define (single-flonum->label fp-value) - (flonum->label fp-value 'SINGLE-FLOATS 4 0 - (LAP ,@(lap:comment `(SINGLE-FLOAT ,fp-value)) - (LONG U ,(flo:32-bit-representation-exact? fp-value))))) - + (LAP (UCOMISF D ,source1 ,source2))) + (define-rule statement (ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (CONSTANT (? fp-value)))) (cond ((not (flo:flonum? fp-value)) - (error "OBJECT->FLOAT: Not a floating-point value" fp-value)) - ((flo:= fp-value 0.0) - (let ((target (flonum-target! target))) - (LAP (FLDZ) - (FSTP (ST ,(1+ target)))))) - ((flo:= fp-value 1.0) - (let ((target (flonum-target! target))) - (LAP (FLD1) - (FSTP (ST ,(1+ target)))))) - (compiler:cross-compiling? - (let* ((temp (allocate-temporary-register! 'GENERAL)) - (target (flonum-target! target))) - (LAP ,@(load-constant (register-reference temp) fp-value) - ,@(object->float temp target)))) - (else - (let ((target (flonum-target! target))) - (with-pcr-float fp-value - (lambda (ea size) - (LAP (FLD ,size ,ea) - (FSTP (ST ,(1+ target)))))))))) - -(define (with-pcr-float fp-value receiver) - (define (generate-ea label-expr size) - (with-pc - (lambda (pc-label pc-register) - (receiver (INST-EA (@RO W ,pc-register (- ,label-expr ,pc-label))) - size)))) - (if (flo:32-bit-representation-exact? fp-value) - (generate-ea (single-flonum->label fp-value) 'S) - (generate-ea (double-flonum->label fp-value) 'D))) -|# + (error "OBJECT->FLOAT: Not a floating-point value" fp-value)) + ((flo:= fp-value 0.0) + (let ((target (flonum-target-reference! target))) + (LAP (XORF P D ,target ,target)))) + (else + (with-float-operand fp-value + (lambda (operand) + (LAP (MOVF S D ,(flonum-target-reference! target) ,operand))))))) + +(define (with-float-operand fp-value receiver) + (if (not (flo:flonum? fp-value)) + (error "Invalid constant flonum operand:" fp-value)) + (if compiler:cross-compiling? + (let ((temp (allocate-temporary-register! 'GENERAL))) + (LAP ,@(load-constant (register-reference temp) fp-value) + ,@(object->address (register-reference temp)) + ,@(receiver (INST-EA (@RO ,temp ,flonum-data-offset))))) + (receiver (INST-EA (@PCR ,(allocate-double-float-label fp-value)))))) + +(define (allocate-double-float-bits-label bit-string) + (allocate-data-label bit-string 'DOUBLE-FLOATS 0 8 + (LAP (QUAD U ,(bit-string->unsigned-integer bit-string))))) + +(define (allocate-single-float-bits-label bit-string) + (allocate-data-label bit-string 'SINGLE-FLOATS 0 4 + (LAP (LONG U ,(bit-string->unsigned-integer bit-string))))) + +(define (allocate-double-float-label flonum) + (allocate-double-float-bits-label + (let ((bit-string (make-bit-string 64 #f))) + ;; Skip the manifest preceding the flonum data. Is there a + ;; better way to express this? + (let* ((bytes-per-object (vector-ref (gc-space-status) 0)) + (bits-per-object (* 8 bytes-per-object)) + (flonum-data-offset-in-bits bits-per-object)) + (read-bits! flonum flonum-data-offset-in-bits bit-string)) + bit-string))) diff --git a/src/compiler/machines/x86-64/rulrew.scm b/src/compiler/machines/x86-64/rulrew.scm index 70259dc8e..185d1bd4c 100644 --- a/src/compiler/machines/x86-64/rulrew.scm +++ b/src/compiler/machines/x86-64/rulrew.scm @@ -215,94 +215,24 @@ USA. (define-rule rewriting (OBJECT->FLOAT (REGISTER (? operand register-known-value))) - (QUALIFIER - (rtl:constant-flonum-test operand (lambda (v) v #T))) + ;; This is not quite what we want. We really want to rewrite all + ;; OBJECT->FLOAT expressions with known constant operands, not just + ;; the nonzero ones, and then decide later whether to put it in + ;; memory based on whether there is a temporary register that we can + ;; zero with XOR. By not rewriting this case when the constant is + ;; zero, using a temporary may cause some other register to be + ;; written to memory, which defeats the purpose of using XOR to + ;; avoid memory access. + (QUALIFIER (rtl:constant-flonum-test operand flo:nonzero?)) (rtl:make-object->float operand)) (define-rule rewriting (FLONUM-2-ARGS FLONUM-SUBTRACT - (REGISTER (? operand-1 register-known-value)) - (? operand-2) - (? overflow?)) - (QUALIFIER (rtl:constant-flonum-test operand-1 flo:zero?)) - (rtl:make-flonum-2-args 'FLONUM-SUBTRACT operand-1 operand-2 overflow?)) - -(define-rule rewriting - (FLONUM-2-ARGS (? operation) - (REGISTER (? operand-1 register-known-value)) - (? operand-2) - (? overflow?)) - (QUALIFIER - (and (memq operation - '(FLONUM-ADD FLONUM-SUBTRACT FLONUM-MULTIPLY FLONUM-DIVIDE)) - (rtl:constant-flonum-test operand-1 flo:one?))) - (rtl:make-flonum-2-args operation operand-1 operand-2 overflow?)) - -(define-rule rewriting - (FLONUM-2-ARGS (? operation) - (? operand-1) - (REGISTER (? operand-2 register-known-value)) - (? overflow?)) - (QUALIFIER - (and (memq operation - '(FLONUM-ADD FLONUM-SUBTRACT FLONUM-MULTIPLY FLONUM-DIVIDE)) - (rtl:constant-flonum-test operand-2 flo:one?))) - (rtl:make-flonum-2-args operation operand-1 operand-2 overflow?)) - -(define-rule rewriting - (FLONUM-PRED-2-ARGS (? predicate) - (? operand-1) - (REGISTER (? operand-2 register-known-value))) - (QUALIFIER (rtl:constant-flonum-test operand-2 flo:zero?)) - (list 'FLONUM-PRED-2-ARGS predicate operand-1 operand-2)) - -(define-rule rewriting - (FLONUM-PRED-2-ARGS (? predicate) - (REGISTER (? operand-1 register-known-value)) - (? operand-2)) - (QUALIFIER (rtl:constant-flonum-test operand-1 flo:zero?)) - (list 'FLONUM-PRED-2-ARGS predicate operand-1 operand-2)) - -#| -;; These don't work as written. They are not simplified and are -;; therefore passed whole to the back end, and there is no way to -;; construct the graph at this level. - -;; acos (x) = atan ((sqrt (1 - x^2)) / x) - -(define-rule pre-cse-rewriting - (FLONUM-1-ARG FLONUM-ACOS (? operand) #f) - (rtl:make-flonum-2-args - 'FLONUM-ATAN2 - (rtl:make-flonum-1-arg - 'FLONUM-SQRT - (rtl:make-flonum-2-args - 'FLONUM-SUBTRACT - (rtl:make-object->float (rtl:make-constant 1.)) - (rtl:make-flonum-2-args 'FLONUM-MULTIPLY operand operand false) - false) - false) - operand - false)) - -;; asin (x) = atan (x / (sqrt (1 - x^2))) - -(define-rule pre-cse-rewriting - (FLONUM-1-ARG FLONUM-ASIN (? operand) #f) - (rtl:make-flonum-2-args - 'FLONUM-ATAN2 - operand - (rtl:make-flonum-1-arg - 'FLONUM-SQRT - (rtl:make-flonum-2-args - 'FLONUM-SUBTRACT - (rtl:make-object->float (rtl:make-constant 1.)) - (rtl:make-flonum-2-args 'FLONUM-MULTIPLY operand operand false) - false) - false) - false)) - -|# + (REGISTER (? operand1 register-known-value)) + (? operand2) + (? overflow?)) + (QUALIFIER (rtl:constant-flonum-test operand1 flo:zero?)) + (rtl:make-flonum-1-arg 'FLONUM-NEGATE operand2 overflow?)) (define (rtl:constant-flonum-test expression predicate) (and (rtl:object->float? expression) @@ -312,8 +242,8 @@ USA. (and (flo:flonum? n) (predicate n))))))) -(define (flo:one? value) - (flo:= value 1.)) +(define-integrable (flo:nonzero? value) + (not (flo:= value 0.))) ;;;; Indexed addressing modes @@ -336,19 +266,6 @@ USA. (MACHINE-CONSTANT (? value))) (QUALIFIER (and (rtl:float-offset-address? base) (rtl:simple-subexpressions? base))) - (if (zero? value) - (rtl:make-float-offset - (rtl:float-offset-address-base base) - (rtl:float-offset-address-offset base)) - (rtl:make-float-offset base (rtl:make-machine-constant value)))) - -(define-rule rewriting - (FLOAT-OFFSET (REGISTER (? base register-known-value)) - (MACHINE-CONSTANT (? value))) - (QUALIFIER - (and (rtl:offset-address? base) - (rtl:simple-subexpressions? base) - (rtl:machine-constant? (rtl:offset-address-offset base)))) (rtl:make-float-offset base (rtl:make-machine-constant value))) ;; This is here to avoid generating things like diff --git a/src/microcode/cmpauxmd/x86-64.m4 b/src/microcode/cmpauxmd/x86-64.m4 index d134362f5..52cf07995 100644 --- a/src/microcode/cmpauxmd/x86-64.m4 +++ b/src/microcode/cmpauxmd/x86-64.m4 @@ -170,7 +170,7 @@ ifdef(`SUPPRESS_LEADING_UNDERSCORE', ifdef(`WCC386R', `define(EFR,`$1_')', `define(EFR,`EPFR($1)')') - + define(hook_reference,`EFR(asm_$1)') define(define_data,`export_label(EVR($1))') @@ -211,6 +211,10 @@ ifdef(`DASM', `define(allocate_space,`EVR($1) db $2 dup (0)')', `define(allocate_space,`EVR($1): .space $2')') + +ifdef(`DASM', + `define(define_double,`EVR($1) dq $2')', + `define(define_double,`EVR($1): .double $2')') ifdef(`DASM', `define(HEX, `0$1H')', @@ -257,8 +261,8 @@ ifdef(`DASM', `define(LOF,`$1($2)')') ifdef(`DASM', - `define(DOF,`qword ptr $1[$2]')', - `define(DOF,`$1($2)')') + `define(QOF,`qword ptr $1[$2]')', + `define(QOF,`$1($2)')') ifdef(`DASM', `define(IDX,`dword ptr [$1] [$2]')', @@ -290,11 +294,21 @@ define(TC_COMPILED_ENTRY,40) # TAG doesn't work due to m4 stupidity, so define these magic # constants here. These are computed in terms of the parameters -# above. +# above, and ordered lexicographically. +define(IMM_FALSE, `IMM(HEX(0000000000000000))') +define(IMM_FIXNUM_0, `IMM(HEX(6800000000000000))') +define(IMM_FLONUM_0, `IMM(HEX(1800000000000000))') define(IMM_MANIFEST_NM_VECTOR_1, `IMM(HEX(9c00000000000001))') define(IMM_TRUE, `IMM(HEX(2000000000000000))') -define(IMM_FALSE, `IMM(HEX(0000000000000000))') + +# Flonums are represented by tagged pointers to the first of two +# quadwords (sixteen bytes) in memory, the first of which is a +# non-marked vector manifest of length 1, so that the GC will not +# trace the other one, which is an IEEE 754 double-precision format +# value. +define(FLONUM_DATA_OFFSET,8) +define(FLONUM_STORAGE_SIZE,16) define(REGBLOCK_VAL,16) define(REGBLOCK_COMPILER_TEMP,32) @@ -333,6 +347,10 @@ allocate_quadword(C_Stack_Pointer) define_data(C_Frame_Pointer) allocate_quadword(C_Frame_Pointer) + +declare_alignment(8) +define_double(flonum_zero,0.0) +define_double(flonum_one,1.0) DECLARE_CODE_SEGMENT() declare_alignment(2) @@ -421,12 +439,12 @@ ifdef(`WIN32', # Register block = %rsi ` OP(mov,q) TW(ABS(EVR(RegistersPtr)),regs)', ` OP(lea,q) TW(ABS(EVR(Registers)),regs)') OP(mov,q) TW(ABS(EVR(Free)),rfree) # Free pointer = %rdi - OP(mov,q) TW(DOF(REGBLOCK_VAL(),regs),REG(rax)) # Value/dynamic link + OP(mov,q) TW(QOF(REGBLOCK_VAL(),regs),REG(rax)) # Value/dynamic link OP(mov,q) TW(IMM(ADDRESS_MASK),rmask) # = %rbp OP(mov,q) TW(ABS(EVR(stack_pointer)),REG(rsp)) OP(mov,q) TW(REG(rax),REG(rcx)) # Preserve if used OP(and,q) TW(rmask,REG(rcx)) # Restore potential dynamic link - OP(mov,q) TW(REG(rcx),DOF(REGBLOCK_DLINK(),regs)) + OP(mov,q) TW(REG(rcx),QOF(REGBLOCK_DLINK(),regs)) jmp IJMP(REG(rdx)) IF_WIN32(` @@ -471,7 +489,7 @@ define_jump_indirection(interrupt_closure,18) define_jump_indirection(interrupt_continuation_2,3b) define_hook_label(interrupt_dlink) - OP(mov,q) TW(DOF(REGBLOCK_DLINK(),regs),REG(rdx)) + OP(mov,q) TW(QOF(REGBLOCK_DLINK(),regs),REG(rdx)) OP(mov,b) TW(IMM(HEX(19)),REG(al)) jmp scheme_to_interface_call @@ -551,21 +569,33 @@ asm_generic_fixnum_result: OP(and,q) TW(rmask,IND(REG(rsp))) OP(or,b) TW(IMM(TC_FIXNUM),REG(al)) OP(ror,q) TW(IMM(TC_LENGTH),REG(rax)) - OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs)) + OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs)) + ret + +declare_alignment(2) +asm_generic_flonum_result: + OP(and,q) TW(rmask,IND(REG(rsp))) + OP(mov,q) TW(IMM_MANIFEST_NM_VECTOR_1,REG(rcx)) + OP(mov,q) TW(REG(rcx),IND(rfree)) + movsd TW(REG(xmm0),QOF(FLONUM_DATA_OFFSET,rfree)) + OP(mov,q) TW(IMM_FLONUM_0,REG(rax)) + OP(or,q) TW(rfree,REG(rax)) + OP(lea,q) TW(QOF(FLONUM_STORAGE_SIZE,rfree),rfree) + OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs)) ret declare_alignment(2) asm_generic_return_sharp_t: OP(and,q) TW(rmask,IND(REG(rsp))) OP(mov,q) TW(IMM_TRUE,REG(rax)) - OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs)) + OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs)) ret declare_alignment(2) asm_generic_return_sharp_f: OP(and,q) TW(rmask,IND(REG(rsp))) OP(mov,q) TW(IMM_FALSE,REG(rax)) - OP(mov,q) TW(REG(rax),LOF(REGBLOCK_VAL(),regs)) + OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs)) ret define(define_unary_operation, @@ -575,8 +605,16 @@ define_hook_label(generic_$1) OP(mov,q) TW(REG(rdx),REG(rax)) OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al)) + je asm_generic_$1_fix + OP(cmp,b) TW(IMM(TC_FLONUM),REG(al)) jne asm_generic_$1_fail +asm_generic_$1_flo: + OP(and,q) TW(rmask,REG(rdx)) + movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0)) + $4 TW(ABS(flonum_one),REG(xmm0)) + jmp asm_generic_flonum_result + asm_generic_$1_fix: OP(mov,q) TW(REG(rdx),REG(rax)) OP(shl,q) TW(IMM(TC_LENGTH),REG(rax)) @@ -595,8 +633,17 @@ define_hook_label(generic_$1) OP(mov,q) TW(REG(rdx),REG(rax)) OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al)) + je asm_generic_$1_fix + OP(cmp,b) TW(IMM(TC_FLONUM),REG(al)) jne asm_generic_$1_fail +asm_generic_$1_flo: + OP(and,q) TW(rmask,REG(rdx)) + movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0)) + ucomisd TW(ABS(flonum_zero),REG(xmm0)) + $3 asm_generic_return_sharp_t + jmp asm_generic_return_sharp_f + asm_generic_$1_fix: OP(mov,q) TW(REG(rdx),REG(rax)) OP(shl,q) TW(IMM(TC_LENGTH),REG(rax)) @@ -610,7 +657,7 @@ asm_generic_$1_fail: jmp scheme_to_interface') define(define_binary_operation, -`define_binary_operation_with_fixup($1,$2,$3, +`define_binary_operation_with_fixup($1,$2,$3,$4, `OP(shl,q) TW(IMM(TC_LENGTH),REG(rax))')') define(define_binary_operation_with_fixup, @@ -625,12 +672,21 @@ define_hook_label(generic_$1) OP(cmp,b) TW(REG(al),REG(cl)) jne asm_generic_$1_fail OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al)) + je asm_generic_$1_fix + OP(cmp,b) TW(IMM(TC_FLONUM),REG(al)) jne asm_generic_$1_fail +asm_generic_$1_flo: + OP(and,q) TW(rmask,REG(rdx)) + OP(and,q) TW(rmask,REG(rbx)) + movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0)) + $4 TW(QOF(FLONUM_DATA_OFFSET,REG(rbx)),REG(xmm0)) + jmp asm_generic_flonum_result + asm_generic_$1_fix: OP(mov,q) TW(REG(rdx),REG(rax)) OP(mov,q) TW(REG(rbx),REG(rcx)) - $4 # Set up rax. + $5 # Set up rax. OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx)) OP($3,q) TW(REG(rcx),REG(rax)) # subq jno asm_generic_fixnum_result @@ -653,8 +709,18 @@ define_hook_label(generic_$1) OP(cmp,b) TW(REG(al),REG(cl)) jne asm_generic_$1_fail OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al)) + je asm_generic_$1_fix + OP(cmp,b) TW(IMM(TC_FLONUM),REG(al)) jne asm_generic_$1_fail +asm_generic_$1_flo: + OP(and,q) TW(rmask,REG(rdx)) + OP(and,q) TW(rmask,REG(rbx)) + movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0)) + ucomisd TW(QOF(FLONUM_DATA_OFFSET,REG(rbx)),REG(xmm0)) + $3 asm_generic_return_sharp_t + jmp asm_generic_return_sharp_f + asm_generic_$1_fix: OP(shl,q) TW(IMM(TC_LENGTH),REG(rdx)) OP(shl,q) TW(IMM(TC_LENGTH),REG(rbx)) @@ -668,43 +734,122 @@ asm_generic_$1_fail: OP(mov,b) TW(IMM(HEX($2)),REG(al)) jmp scheme_to_interface') -#define_unary_operation(decrement,22,sub) -#define_unary_operation(increment,26,add) +# Division is hairy. I'm not sure whether this will do the right +# thing for infinities and NaNs. + +define_hook_label(generic_divide) + OP(pop,q) REG(rdx) + OP(pop,q) REG(rbx) + # We want to divide rdx by rbx. First put the numerator's tag + # in al and the denominator's tag in cl. + OP(mov,q) TW(REG(rdx),REG(rax)) + OP(mov,q) TW(REG(rbx),REG(rcx)) + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rax)) + OP(shr,q) TW(IMM(DATUM_LENGTH),REG(rcx)) + OP(cmp,b) TW(IMM(TC_FIXNUM),REG(al)) + je asm_generic_divide_fix + OP(cmp,b) TW(IMM(TC_FLONUM),REG(al)) + jne asm_generic_divide_fail + OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl)) + je asm_generic_divide_flo_by_flo + OP(cmp,b) TW(IMM(TC_FIXNUM),REG(cl)) + jne asm_generic_divide_fail + +asm_generic_divide_flo_by_fix: + # Numerator (rdx) is a flonum, denominator (rbx) is a fixnum. + OP(mov,q) TW(REG(rbx),REG(rcx)) + OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx)) + # Division by zero -- bail. + jz asm_generic_divide_fail + OP(and,q) TW(rmask,REG(rdx)) + OP(sar,q) TW(IMM(TC_LENGTH),REG(rcx)) + movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rdx)),REG(xmm0)) + OP(cvtsi2sd,q) TW(REG(rcx),REG(xmm1)) + divsd TW(REG(xmm1),REG(xmm0)) + jmp asm_generic_flonum_result + +asm_generic_divide_fix: + OP(cmp,b) TW(IMM(TC_FLONUM),REG(cl)) + jne asm_generic_divide_fail + +asm_generic_divide_fix_by_flo: + # Numerator (rdx) is a fixnum, denominator (rbx) is a flonum. + OP(mov,q) TW(REG(rbx),REG(rax)) + OP(and,q) TW(rmask,REG(rax)) + OP(mov,q) TW(REG(rdx),REG(rcx)) + movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rax)),REG(xmm1)) + OP(shl,q) TW(IMM(TC_LENGTH),REG(rcx)) + jz asm_generic_divide_zero_by_flo + OP(sar,q) TW(IMM(TC_LENGTH),REG(rcx)) + OP(cvtsi2sd,q) TW(REG(rcx),REG(xmm0)) + divsd TW(REG(xmm1),REG(xmm0)) + jmp asm_generic_flonum_result + +asm_generic_divide_zero_by_flo: + # rcx contains zero, representing a numerator exactly zero. + # Defer division of 0 by 0.0; otherwise, yield exactly zero. + OP(cvtsi2sd,q) TW(REG(rcx),REG(xmm0)) + ucomisd TW(REG(xmm1),REG(xmm0)) + je asm_generic_divide_fail + OP(and,q) TW(rmask,IND(REG(rsp))) + OP(mov,q) TW(IMM_FIXNUM_0,REG(rax)) + OP(mov,q) TW(REG(rax),QOF(REGBLOCK_VAL(),regs)) + ret + +asm_generic_divide_flo_by_flo: + # Numerator (rdx) and denominator (rbx) are both flonums. + OP(mov,q) TW(REG(rdx),REG(rax)) + OP(mov,q) TW(REG(rbx),REG(rcx)) + OP(and,q) TW(rmask,REG(rax)) + OP(and,q) TW(rmask,REG(rcx)) + movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rax)),REG(xmm0)) + movsd TW(QOF(FLONUM_DATA_OFFSET,REG(rcx)),REG(xmm1)) + ucomisd TW(ABS(flonum_zero),REG(xmm1)) + je asm_generic_divide_fail + divsd TW(REG(xmm1),REG(xmm0)) + jmp asm_generic_flonum_result + +asm_generic_divide_fail: + OP(push,q) REG(rbx) + OP(push,q) REG(rdx) + OP(mov,b) TW(IMM(HEX(23)),REG(al)) + jmp scheme_to_interface + +define_unary_operation(decrement,22,sub,subsd) +define_unary_operation(increment,26,add,addsd) -#define_unary_predicate(negative,2a,jl) -#define_unary_predicate(positive,2c,jg) -#define_unary_predicate(zero,2d,je) +# define_unary_predicate(name,index,jcc) +# define_unary_predicate( $1, $2, $3) +define_unary_predicate(negative,2a,jl) +define_unary_predicate(positive,2c,jg) +define_unary_predicate(zero,2d,je) -# define_binary_operation(name,index,op) -# define_binary_operation( $1, $2,$3) -#define_binary_operation(add,2b,add) -#define_binary_operation(subtract,28,sub) +# define_binary_operation(name,index,fxop,flop) +# define_binary_operation( $1, $2, $3, $4) +define_binary_operation(add,2b,add,addsd) +define_binary_operation(subtract,28,sub,subsd) # No fixup -- leave it unshifted. -#define_binary_operation_with_fixup(multiply,29,imul) +define_binary_operation_with_fixup(multiply,29,imul,mulsd) # define_binary_predicate(name,index,jcc) -#define_binary_predicate(equal,24,je) -#define_binary_predicate(greater,25,jg) -#define_binary_predicate(less,27,jl) - -# At the moment, there is no advantage to using the above code, and in -# fact using it is a waste, since the compiler open-codes the fixnum -# case already. Later, the above code will also handle floating-point -# arguments, which the compiler does not open-code. - -define_jump_indirection(generic_decrement,22) -define_jump_indirection(generic_divide,23) -define_jump_indirection(generic_equal,24) -define_jump_indirection(generic_greater,25) -define_jump_indirection(generic_increment,26) -define_jump_indirection(generic_less,27) -define_jump_indirection(generic_subtract,28) -define_jump_indirection(generic_multiply,29) -define_jump_indirection(generic_negative,2a) -define_jump_indirection(generic_add,2b) -define_jump_indirection(generic_positive,2c) -define_jump_indirection(generic_zero,2d) +# define_binary_predicate( $1, $2, $3) +define_binary_predicate(equal,24,je) +define_binary_predicate(greater,25,jg) +define_binary_predicate(less,27,jl) + +#define_jump_indirection(generic_decrement,22) +#define_jump_indirection(generic_divide,23) +#define_jump_indirection(generic_equal,24) +#define_jump_indirection(generic_greater,25) +#define_jump_indirection(generic_increment,26) +#define_jump_indirection(generic_less,27) +#define_jump_indirection(generic_subtract,28) +#define_jump_indirection(generic_multiply,29) +#define_jump_indirection(generic_negative,2a) +#define_jump_indirection(generic_add,2b) +#define_jump_indirection(generic_positive,2c) +#define_jump_indirection(generic_zero,2d) define_jump_indirection(generic_quotient,37) define_jump_indirection(generic_remainder,38) define_jump_indirection(generic_modulo,39)