From: Taylor R Campbell Date: Sun, 25 Aug 2019 19:48:23 +0000 (+0000) Subject: Open-code floating-point arithmetic on aarch64. X-Git-Tag: mit-scheme-pucked-10.1.20~11^2~66^2 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=8659d5cb62a614993c064ddf11ba95b138dd0103;p=mit-scheme.git Open-code floating-point arithmetic on aarch64. Disabled by default for now due to limited testing. --- diff --git a/src/compiler/machines/aarch64/instr1.scm b/src/compiler/machines/aarch64/instr1.scm index 791c65c79..5a07dec7f 100644 --- a/src/compiler/machines/aarch64/instr1.scm +++ b/src/compiler/machines/aarch64/instr1.scm @@ -839,281 +839,6 @@ USA. (define-load/store-pair-instruction LDP 1) (define-load/store-pair-instruction STP 0)) -;;; C3.2.9 Load/Store scalar SIMD and floating-point - -(let-syntax - ((define-simd/fp-load/store-instruction - (sc-macro-transformer - (lambda (form environment) - environment - (receive (mnemonic load/store . extra) (apply values (cdr form)) - `(define-instruction ,mnemonic - ;; LDR immediate, SIMD&FP, pre/post-index with signed - ;; byte offset (C7.2.176) - ;; STR immediate, SIMD&FP, pre/post-index with signed - ;; byte offset (C7.2.315) - (((? sz load/store-simd/fp-size) - (? Vt vregister) - ((? pre/post load/store-pre/post-index) - (? Rn register-31=sp) - (& (? offset signed-9)))) - (BITS (2 sz LOAD/STORE-SIMD/FP-SIZE) - (3 #b111) - (1 1) ;SIMD/FP - (2 #b00) - (1 sz LOAD/STORE-SIMD/FP-OPCHI) - (1 ,load/store) ;opc[0] - (1 0) - (9 offset SIGNED) - (2 pre/post) - (5 Rn) - (5 Vt))) - ;; LDR immediate, SIMD&FP, zero offset (C7.2.176) - ;; STR immediate, SIMD&FP, zero offset (C7.2.315) - (((? sz load/store-simd/fp-size) - (? Vt vregister) - (? Rn register-31=sp)) - (BITS (2 sz LOAD/STORE-SIMD/FP-SIZE) - (3 #b111) - (1 1) ;SIMD/FP - (2 #b01) - (1 sz LOAD/STORE-SIMD/FP-OPCHI) - (1 ,load/store) ;opc[0] - (12 0) ;offset=0 - (5 Rn) - (5 Vt))) - ;; LDR immediate, SIMD&FP (B), unsigned byte offset (C7.2.176) - ;; STR immediate, SIMD&FP (B), unsigned byte offset (C7.2.315) - ((B (? Vt vregister) - (+ (? Rn register-31=sp) - (&U (? offset unsigned-12)))) - (BITS (2 #b00) ;size=B, 8-bit - (3 #b111) - (1 1) ;SIMD/FP - (2 #b01) - (1 0) ;opc[1] - (1 ,load/store) ;opc[0] - (12 offset) - (5 Rn) - (5 Vt))) - ;; LDR immediate, SIMD&FP (B), unsigned byte offset (C7.2.176) - ;; STR immediate, SIMD&FP (B), unsigned byte offset (C7.2.315) - ;; [same as above] - ((B (? Vt vregister) - (+ (? Rn register-31=sp) - (&U (* 1 (? offset unsigned-12))))) - (BITS (2 #b00) ;size=B, 8-bit - (3 #b111) - (1 1) ;SIMD/FP - (2 #b01) - (1 0) ;opc[1] - (1 ,load/store) ;opc[0] - (12 offset) - (5 Rn) - (5 Vt))) - - ;; LDR immediate, SIMD&FP (H), unsigned 2-byte offset (C7.2.176) - ;; STR immediate, SIMD&FP (H), unsigned 2-byte offset (C7.2.315) - ((H (? Vt vregister) - (+ (? Rn register-31=sp) - (&U (* 2 (? offset unsigned-12))))) - (BITS (2 #b01) ;size=H, 16-bit - (3 #b111) - (1 1) ;SIMD/FP - (2 #b01) - (1 0) ;opc[1] - (1 ,load/store) ;opc[0] - (12 offset) - (5 Rn) - (5 Vt))) - ;; LDR immediate, SIMD&FP (S), unsigned 4-byte offset (C7.2.176) - ;; STR immediate, SIMD&FP (S), unsigned 4-byte offset (C7.2.315) - ((S (? Vt vregister) - (+ (? Rn register-31=sp) - (&U (* 4 (? offset unsigned-12))))) - (BITS (2 #b10) ;size=S, 32-bit - (3 #b111) - (1 1) ;SIMD/FP - (2 #b01) - (1 0) ;opc[1] - (1 ,load/store) ;opc[0] - (12 offset) - (5 Rn) - (5 Vt))) - ;; LDR immediate, SIMD&FP (D), unsigned 8-byte offset (C7.2.176) - ;; STR immediate, SIMD&FP (D), unsigned 8-byte offset (C7.2.315) - ((D (? Vt vregister) - (+ (? Rn register-31=sp) - (&U (* 8 (? offset unsigned-12))))) - (BITS (2 #b11) ;size=D, 64-bit - (3 #b111) - (1 1) ;SIMD/FP - (2 #b01) - (1 0) ;opc[1] - (1 ,load/store) ;opc[0] - (12 offset) - (5 Rn) - (5 Vt))) - ;; LDR immediate, SIMD&FP (Q), unsigned 16-byte offset (C7.2.176) - ;; STR immediate, SIMD&FP (Q), unsigned 16-byte offset (C7.2.315) - ((Q (? Vt vregister) - (+ (? Rn register-31=sp) - (&U (* 16 (? offset unsigned-12))))) - (BITS (2 #b00) ;`size' - (3 #b111) - (1 1) ;SIMD/FP - (2 #b01) - (1 0) ;opc[1] - (1 ,load/store) ;opc[0] - (12 offset) - (5 Rn) - (5 Vt))) - - ;; LDR register, SIMD&FP, no extend (C7.2.178) - ;; STR register, SIMD&FP, no extend (C7.3.316) - (((? sz load/store-simd/fp-size) - (? Vt vregister) - (+ (? Rn register-31=sp) - (? Rm register-31=z))) - (BITS (2 sz LOAD/STORE-SIMD/FP-SIZE) - (3 #b111) - (1 1) ;SIMD/FP - (2 #b00) - (1 sz LOAD/STORE-SIMD/FP-OPCHI) ;opc[1] - (1 ,load/store) ;opc[0] - (1 1) - (5 Rm) - (3 #b011) ;option=LSL - (1 0) ;shift=0 - (2 #b10) - (5 Rn) - (5 Vt))) - ;; LDR register, SIMD&FP (B), (C7.2.178) - ;; STR register, SIMD&FP (B), (C7.2.316) - ((B (? Vt vregister) - (+ (? Rn register-31=sp) - ((? option load/store-extend-type) - (? Rm register-31=z) - (? S load/store8-extend-amount)))) - (BITS (2 #b00) ;size=B - (3 #b111) - (1 1) ;SIMD/FP - (2 #b00) - (1 0) ;opc[1] - (1 ,load/store) ;opc[0] - (1 1) - (5 Rm) - (3 option) - (1 S) - (2 #b10) - (5 Rn) - (5 Vt))) - ;; LDR register, SIMD&FP (H), (C7.2.178) - ;; STR register, SIMD&FP (H), (C7.2.316) - ((H (? Vt vregister) - (+ (? Rn register-31=sp) - ((? option load/store-extend-type) - (? Rm register-31=z) - (? S load/store16-extend-amount)))) - (BITS (2 #b01) ;size=H - (3 #b111) - (1 1) ;SIMD/FP - (2 #b00) - (1 0) ;opc[1] - (1 ,load/store) ;opc[0] - (1 1) - (5 Rm) - (3 option) - (1 S) - (2 #b10) - (5 Rn) - (5 Vt))) - ;; LDR register, SIMD&FP (S), (C7.2.178) - ;; STR register, SIMD&FP (S), (C7.2.316) - ((S (? Vt vregister) - (+ (? Rn register-31=sp) - ((? option load/store-extend-type) - (? Rm register-31=z) - (? S load/store32-extend-amount)))) - (BITS (2 #b10) ;size=H - (3 #b111) - (1 1) ;SIMD/FP - (2 #b00) - (1 0) ;opc[1] - (1 ,load/store) ;opc[0] - (1 1) - (5 Rm) - (3 option) - (1 S) - (2 #b10) - (5 Rn) - (5 Vt))) - - ;; LDR register, SIMD&FP (D), (C7.2.178) - ;; STR register, SIMD&FP (D), (C7.2.316) - ((D (? Vt vregister) - (+ (? Rn register-31=sp) - ((? option load/store-extend-type) - (? Rm register-31=z) - (? S load/store64-extend-amount)))) - (BITS (2 #b11) ;size=D - (3 #b111) - (1 1) ;SIMD/FP - (2 #b00) - (1 0) ;opc[1] - (1 ,load/store) ;opc[0] - (1 1) - (5 Rm) - (3 option) - (1 S) - (2 #b10) - (5 Rn) - (5 Vt))) - ;; LDR register, SIMD&FP (Q), (C7.2.178) - ;; STR register, SIMD&FP (Q), (C7.2.316) - ((Q (? Vt vregister) - (+ (? Rn register-31=sp) - ((? option load/store-extend-type) - (? Rm register-31=z) - (? S load/store128-extend-amount)))) - (BITS (2 #b00) ;size=Q - (3 #b111) - (1 1) ;SIMD/FP - (2 #b00) - (1 1) ;opc[1] - (1 ,load/store) ;opc[0] - (1 1) - (5 Rm) - (3 option) - (1 S) - (2 #b10) - (5 Rn) - (5 Vt))) - ,@extra)))))) - ;; The ARM assembler has `LDRB w13,...' for byte-sized load into - ;; general register 13, and `LDR b13,...' for byte-sized load into - ;; vector register 13. We use a separate mnemonic for general - ;; registers and vector registers. - (define-simd/fp-load/store-instruction STR.V 0) - (define-simd/fp-load/store-instruction LDR.V 1 - ;; LDR PC-relative literal, SIMD&FP (C7.2.177) - (((? opc ldr-literal-simd/fp-size) - (? Vt vregister) - (@PCO (? offset signed-19))) - (BITS (2 opc) - (3 #b011) - (1 1) ;SIMD/FP - (2 #b00) - (19 offset SIGNED) - (5 Vt))) - (((? size) (? Vt) (@PCR (? label) (? temp register<31))) - (VARIABLE-WIDTH offset `(/ (- ,label *PC*) 4) - ((#x-40000 #x3ffff) - (MACRO 32 (LDR.V ,size ,Vt (@PCO (* 4 ,offset))))) - ((#x-100000000 #xffffffff) - (MACRO 64 (ADRP-ADD X ,temp (@PCO ,(* 4 offset)))) ;(*) - (MACRO 32 (LDR.V X ,Vt ,temp))))))) - ;; Load register signed (i.e., sign-extended). This does not detect ;; the nonsensical (LDRS W W ...) operand combination -- it will ;; assemble into a possibly different instruction. diff --git a/src/compiler/machines/aarch64/instrf.scm b/src/compiler/machines/aarch64/instrf.scm index b00bbdd82..dc15dc093 100644 --- a/src/compiler/machines/aarch64/instrf.scm +++ b/src/compiler/machines/aarch64/instrf.scm @@ -29,6 +29,638 @@ USA. (declare (usual-integrations)) -;;; XXX not yet +;;; C3.2.9 Load/Store scalar SIMD and floating-point -#f +(let-syntax + ((define-simd/fp-load/store-instruction + (sc-macro-transformer + (lambda (form environment) + environment + (receive (mnemonic load/store . extra) (apply values (cdr form)) + `(define-instruction ,mnemonic + ;; LDR immediate, SIMD&FP, pre/post-index with signed + ;; byte offset (C7.2.176) + ;; STR immediate, SIMD&FP, pre/post-index with signed + ;; byte offset (C7.2.315) + (((? sz:opchi load/store-simd/fp-size) + (? Vt vregister) + ((? pre/post load/store-pre/post-index) + (? Rn register-31=sp) + (& (? offset signed-9)))) + (BITS (2 (fix:lsh sz:opchi -1)) + (3 #b111) + (1 1) ;SIMD/FP + (2 #b00) + (1 (fix:and 1 sz:opchi)) ;opc[1] + (1 ,load/store) ;opc[0] + (1 0) + (9 offset SIGNED) + (2 pre/post) + (5 Rn) + (5 Vt))) + ;; LDR immediate, SIMD&FP, zero offset (C7.2.176) + ;; STR immediate, SIMD&FP, zero offset (C7.2.315) + (((? sz:opchi load/store-simd/fp-size) + (? Vt vregister) + (? Rn register-31=sp)) + (BITS (2 (fix:lsh sz:opchi -1)) + (3 #b111) + (1 1) ;SIMD/FP + (2 #b01) + (1 (fix:and 1 sz:opchi)) ;opc[1] + (1 ,load/store) ;opc[0] + (12 0) ;offset=0 + (5 Rn) + (5 Vt))) + ;; LDR immediate, SIMD&FP (B), unsigned byte offset (C7.2.176) + ;; STR immediate, SIMD&FP (B), unsigned byte offset (C7.2.315) + ((B (? Vt vregister) + (+ (? Rn register-31=sp) + (&U (? offset unsigned-12)))) + (BITS (2 #b00) ;size=B, 8-bit + (3 #b111) + (1 1) ;SIMD/FP + (2 #b01) + (1 0) ;opc[1] + (1 ,load/store) ;opc[0] + (12 offset) + (5 Rn) + (5 Vt))) + ;; LDR immediate, SIMD&FP (B), unsigned byte offset (C7.2.176) + ;; STR immediate, SIMD&FP (B), unsigned byte offset (C7.2.315) + ;; [same as above] + ((B (? Vt vregister) + (+ (? Rn register-31=sp) + (&U (* 1 (? offset unsigned-12))))) + (BITS (2 #b00) ;size=B, 8-bit + (3 #b111) + (1 1) ;SIMD/FP + (2 #b01) + (1 0) ;opc[1] + (1 ,load/store) ;opc[0] + (12 offset) + (5 Rn) + (5 Vt))) + + ;; LDR immediate, SIMD&FP (H), unsigned 2-byte offset (C7.2.176) + ;; STR immediate, SIMD&FP (H), unsigned 2-byte offset (C7.2.315) + ((H (? Vt vregister) + (+ (? Rn register-31=sp) + (&U (* 2 (? offset unsigned-12))))) + (BITS (2 #b01) ;size=H, 16-bit + (3 #b111) + (1 1) ;SIMD/FP + (2 #b01) + (1 0) ;opc[1] + (1 ,load/store) ;opc[0] + (12 offset) + (5 Rn) + (5 Vt))) + ;; LDR immediate, SIMD&FP (S), unsigned 4-byte offset (C7.2.176) + ;; STR immediate, SIMD&FP (S), unsigned 4-byte offset (C7.2.315) + ((S (? Vt vregister) + (+ (? Rn register-31=sp) + (&U (* 4 (? offset unsigned-12))))) + (BITS (2 #b10) ;size=S, 32-bit + (3 #b111) + (1 1) ;SIMD/FP + (2 #b01) + (1 0) ;opc[1] + (1 ,load/store) ;opc[0] + (12 offset) + (5 Rn) + (5 Vt))) + ;; LDR immediate, SIMD&FP (D), unsigned 8-byte offset (C7.2.176) + ;; STR immediate, SIMD&FP (D), unsigned 8-byte offset (C7.2.315) + ((D (? Vt vregister) + (+ (? Rn register-31=sp) + (&U (* 8 (? offset unsigned-12))))) + (BITS (2 #b11) ;size=D, 64-bit + (3 #b111) + (1 1) ;SIMD/FP + (2 #b01) + (1 0) ;opc[1] + (1 ,load/store) ;opc[0] + (12 offset) + (5 Rn) + (5 Vt))) + ;; LDR immediate, SIMD&FP (Q), unsigned 16-byte offset (C7.2.176) + ;; STR immediate, SIMD&FP (Q), unsigned 16-byte offset (C7.2.315) + ((Q (? Vt vregister) + (+ (? Rn register-31=sp) + (&U (* 16 (? offset unsigned-12))))) + (BITS (2 #b00) ;`size' + (3 #b111) + (1 1) ;SIMD/FP + (2 #b01) + (1 0) ;opc[1] + (1 ,load/store) ;opc[0] + (12 offset) + (5 Rn) + (5 Vt))) + + ;; LDR register, SIMD&FP, no extend (C7.2.178) + ;; STR register, SIMD&FP, no extend (C7.3.316) + (((? sz:opchi load/store-simd/fp-size) + (? Vt vregister) + (+ (? Rn register-31=sp) + (? Rm register-31=z))) + (BITS (2 (fix:lsh sz:opchi -1)) + (3 #b111) + (1 1) ;SIMD/FP + (2 #b00) + (1 (fix:and 1 sz:opchi)) ;opc[1] + (1 ,load/store) ;opc[0] + (1 1) + (5 Rm) + (3 #b011) ;option=LSL + (1 0) ;shift=0 + (2 #b10) + (5 Rn) + (5 Vt))) + ;; LDR register, SIMD&FP (B), (C7.2.178) + ;; STR register, SIMD&FP (B), (C7.2.316) + ((B (? Vt vregister) + (+ (? Rn register-31=sp) + ((? option load/store-extend-type) + (? Rm register-31=z) + (? S load/store8-extend-amount)))) + (BITS (2 #b00) ;size=B + (3 #b111) + (1 1) ;SIMD/FP + (2 #b00) + (1 0) ;opc[1] + (1 ,load/store) ;opc[0] + (1 1) + (5 Rm) + (3 option) + (1 S) + (2 #b10) + (5 Rn) + (5 Vt))) + ;; LDR register, SIMD&FP (H), (C7.2.178) + ;; STR register, SIMD&FP (H), (C7.2.316) + ((H (? Vt vregister) + (+ (? Rn register-31=sp) + ((? option load/store-extend-type) + (? Rm register-31=z) + (? S load/store16-extend-amount)))) + (BITS (2 #b01) ;size=H + (3 #b111) + (1 1) ;SIMD/FP + (2 #b00) + (1 0) ;opc[1] + (1 ,load/store) ;opc[0] + (1 1) + (5 Rm) + (3 option) + (1 S) + (2 #b10) + (5 Rn) + (5 Vt))) + ;; LDR register, SIMD&FP (S), (C7.2.178) + ;; STR register, SIMD&FP (S), (C7.2.316) + ((S (? Vt vregister) + (+ (? Rn register-31=sp) + ((? option load/store-extend-type) + (? Rm register-31=z) + (? S load/store32-extend-amount)))) + (BITS (2 #b10) ;size=H + (3 #b111) + (1 1) ;SIMD/FP + (2 #b00) + (1 0) ;opc[1] + (1 ,load/store) ;opc[0] + (1 1) + (5 Rm) + (3 option) + (1 S) + (2 #b10) + (5 Rn) + (5 Vt))) + + ;; LDR register, SIMD&FP (D), (C7.2.178) + ;; STR register, SIMD&FP (D), (C7.2.316) + ((D (? Vt vregister) + (+ (? Rn register-31=sp) + ((? option load/store-extend-type) + (? Rm register-31=z) + (? S load/store64-extend-amount)))) + (BITS (2 #b11) ;size=D + (3 #b111) + (1 1) ;SIMD/FP + (2 #b00) + (1 0) ;opc[1] + (1 ,load/store) ;opc[0] + (1 1) + (5 Rm) + (3 option) + (1 S) + (2 #b10) + (5 Rn) + (5 Vt))) + ;; LDR register, SIMD&FP (Q), (C7.2.178) + ;; STR register, SIMD&FP (Q), (C7.2.316) + ((Q (? Vt vregister) + (+ (? Rn register-31=sp) + ((? option load/store-extend-type) + (? Rm register-31=z) + (? S load/store128-extend-amount)))) + (BITS (2 #b00) ;size=Q + (3 #b111) + (1 1) ;SIMD/FP + (2 #b00) + (1 1) ;opc[1] + (1 ,load/store) ;opc[0] + (1 1) + (5 Rm) + (3 option) + (1 S) + (2 #b10) + (5 Rn) + (5 Vt))) + ,@extra)))))) + ;; The ARM assembler has `LDRB w13,...' for byte-sized load into + ;; general register 13, and `LDR b13,...' for byte-sized load into + ;; vector register 13. We use a separate mnemonic for general + ;; registers and vector registers. + (define-simd/fp-load/store-instruction STR.V 0) + (define-simd/fp-load/store-instruction LDR.V 1 + ;; LDR PC-relative literal, SIMD&FP (C7.2.177) + (((? opc ldr-literal-simd/fp-size) + (? Vt vregister) + (@PCO (? offset signed-19))) + (BITS (2 opc) + (3 #b011) + (1 1) ;SIMD/FP + (2 #b00) + (19 offset SIGNED) + (5 Vt))) + (((? size) (? Vt) (@PCR (? label) (? temp register<31))) + (VARIABLE-WIDTH offset `(/ (- ,label *PC*) 4) + ((#x-40000 #x3ffff) + (MACRO 32 (LDR.V ,size ,Vt (@PCO (* 4 ,offset))))) + ((#x-100000000 #xffffffff) + (MACRO 64 (ADRP-ADD X ,temp (@PCO ,(* 4 offset)))) ;(*) + (MACRO 32 (LDR.V X ,Vt ,temp))))))) + +(let-syntax + ((define-aes-instruction + (sc-macro-transformer + (lambda (form environment) + environment + (receive (mnemonic opcode) (apply values (cdr form)) + `(define-instruction ,mnemonic + (((? Rd vregister) (? Rn vregister)) + (BITS (8 #b01001110) + (2 #b00) ;size + (5 #b10100) + (5 ,opcode) + (2 #b10) + (5 Rn) + (5 Rd))))))))) + (define-aes-instruction AESE #b00100) + (define-aes-instruction AESD #b00101) + (define-aes-instruction AESMC #b00110) + (define-aes-instruction AESIMC #b00111)) + +(let-syntax + ((define-sha-3reg-instruction + (sc-macro-transformer + (lambda (form environment) + environment + (receive (mnemonic opcode) (apply values (cdr form)) + `(define-instruction ,mnemonic + (((? Rd vregister) (? Rn vregister) (? Rm vregister)) + (BITS (8 #b01011110) + (2 #b00) ;size + (1 0) + (5 Rm) + (1 0) + (3 ,opcode) + (2 #b00) + (5 Rn) + (5 Rd))))))))) + (define-sha-3reg-instruction SHA1C #b000) + (define-sha-3reg-instruction SHA1P #b001) + (define-sha-3reg-instruction SHA1M #b010) + (define-sha-3reg-instruction SHA1SU0 #b011) + (define-sha-3reg-instruction SHA256H #b100) + (define-sha-3reg-instruction SHA256H2 #b101) + (define-sha-3reg-instruction SHA256SU1 #b110)) + +(let-syntax + ((define-sha-2reg-instruction + (sc-macro-transformer + (lambda (form environment) + environment + (receive (mnemonic opcode) (apply values (cdr form)) + `(define-instruction ,mnemonic + (((? Rd vregister) (? Rn vregister)) + (BITS (8 #b01011110) + (2 #b00) ;size + (5 #b10100) + (5 ,opcode) + (2 #b10) + (5 Rn) + (5 Rd))))))))) + (define-sha-2reg-instruction SHA1H #b00000) + (define-sha-2reg-instruction SHA1SU1 #b00001) + (define-sha-2reg-instruction SHA256SU0 #b00010)) + +(let-syntax + ((define-fp-unary-instruction + (sc-macro-transformer + (lambda (form environment) + environment + (receive (mnemonic opcode-s U a opcode-v) (apply values (cdr form)) + `(define-instruction ,mnemonic + ;; scalar + (((? type fp-scalar-size) (? Rd vregister) (? Rn vregister)) + (BITS (1 0) ;M=0 + (1 0) ;? + (1 0) ;S=0 + (5 #b11110) + (2 type) + (1 1) + (6 ,opcode-s) + (5 #b10000) + (5 Rn) + (5 Rd))) + ;; vector + (((? sz:Q fp-vector-size) + (? Rd vregister) + (? Rn vregister)) + (BITS (1 0) ;M=0 + (1 (fix:and sz:Q 1)) ;Q + (1 ,U) + (5 #b01110) + (1 ,a) + (6 (fix:lsh sz:Q -1)) ;sz + (5 ,opcode-v) + (2 #b10) + (5 Rn) + (5 Rd))))))))) + ;; arithmetic + (define-fp-unary-instruction FABS #b000001 0 1 #b01111) + (define-fp-unary-instruction FNEG #b000010 1 1 #b01111) + (define-fp-unary-instruction FSQRT #b000011 1 1 #b11111) + ;; rounding + (define-fp-unary-instruction FRINTN #b001000 0 0 #b11000) + (define-fp-unary-instruction FRINTP #b001001 0 1 #b11000) + (define-fp-unary-instruction FRINTM #b001010 0 0 #b11001) + (define-fp-unary-instruction FRINTZ #b001011 0 1 #b11001) + (define-fp-unary-instruction FRINTA #b001100 1 0 #b11000) + (define-fp-unary-instruction FRINTX #b001110 1 0 #b11001) + (define-fp-unary-instruction FRINTI #b001111 1 1 #b11001)) + +(let-syntax + ((define-fp-binary-instruction + (sc-macro-transformer + (lambda (form environment) + environment + (receive (mnemonic opcode-s U a S opcode-v) (apply values (cdr form)) + `(define-instruction ,mnemonic + (((? type fp-scalar-size) + (? Rd vregister) + (? Rn vregister) + (? Rm vregister)) + (BITS (1 0) ;M + (1 0) + (1 0) ;S + (5 #b11110) + (2 type) + (1 1) + (5 Rm) + (4 ,opcode-s) + (2 #b10) + (5 Rn) + (5 Rd))) + (((? Q fp16-vector-size) + (? Rd vregister) + (? Rn vregister) + (? Rm vregister)) + (BITS (1 0) + (1 Q) + (1 ,U) + (5 #b01110) + (1 ,a) + (1 0) + (1 1) + (5 Rm) + (2 #b00) + (4 ,opcode-v) + (5 Rn) + (5 Rd))) + (((? sz:Q fp32/64-vector-size) + (? Rd vregister) + (? Rn vregister) + (? Rm vregister)) + (BITS (1 0) + (1 (fix:and sz:Q 1)) ;Q + (1 ,U) + (5 #b01110) + (1 ,S) + (1 (fix:lsh sz:Q -1)) ;sz + (1 1) + (5 Rm) + (2 #b11) + (4 ,opcode-v) + (5 Rn) + (5 Rd))))))))) + (define-fp-binary-instruction FMUL #b0000 1 0 0 #b011) + (define-fp-binary-instruction FDIV #b0001 1 0 0 #b111) + (define-fp-binary-instruction FADD #b0010 0 0 0 #b010) + (define-fp-binary-instruction FSUB #b0011 0 1 1 #b010) + (define-fp-binary-instruction FMAX #b0100 0 0 0 #b110) + (define-fp-binary-instruction FMIN #b0101 0 1 1 #b110) + (define-fp-binary-instruction FMAXNM #b0110 0 0 0 #b000) + (define-fp-binary-instruction FMINNM #b0111 0 1 1 #b000)) + +(let-syntax + ((define-fp-compare-instruction + (sc-macro-transformer + (lambda (form environment) + environment + (receive (mnemonic opc-hi) (apply values (cdr form)) + `(define-instruction ,mnemonic + (((? type fp-scalar-size) (? Rn vregister) Z) + (BITS (1 0) ;M + (1 0) + (1 0) ;S + (5 #b11110) + (2 type) + (1 1) + (5 #b0000) ;Rm = #0.0 + (2 #b00) + (4 #b1000) + (5 Rn) + (1 ,opc-hi) + (1 1) ;zero variant + (3 #b000))) + (((? type fp-scalar-size) (? Rn vregister) (? Rm vregister)) + (BITS (1 0) ;M + (1 0) + (1 0) ;S + (5 #b11110) + (2 type) + (1 1) + (5 Rm) + (2 0) + (4 #b1000) + (5 Rn) + (1 ,opc-hi) + (1 0) ;register variant + (3 #b000))))))))) + ;; quiet compare + (define-fp-compare-instruction FCMP 0) + ;; compare and raise exceptions + (define-fp-compare-instruction FCMPE 1)) + +(let-syntax + ((define-fp-conditional-compare-instruction + (sc-macro-transformer + (lambda (form environment) + environment + (receive (mnemonic op) (apply values (cdr form)) + `(define-instruction ,mnemonic + (((? type fp-scalar-size) + (? Rn vregister) + (? Rm vregister) + (&U (? nzcv nzcv-value)) + (? cc branch-condition)) + (BITS (1 0) ;M + (1 0) + (1 0) ;S + (5 #b11110) + (2 type) + (1 1) + (5 Rm) + (4 cc) + (2 #b01) + (5 Rn) + (1 ,op) + (4 nzcv))))))))) + ;; quiet compare + (define-fp-conditional-compare-instruction FCCMP 0) + ;; compare and raise exceptions + (define-fp-conditional-compare-instruction FCCMPE 1)) + +(let-syntax + ((define-simd-byte-instruction + (sc-macro-transformer + (lambda (form environment) + environment + (receive (mnemonic U opc2 opcode) (apply values (cdr form)) + `(define-instruction ,mnemonic + (((? Q simd-byte-vector-size) + (? Rd vregister) + (? Rn vregister) + (? Rm vregister)) + (BITS (1 0) + (1 Q) + (1 ,U) + (5 #b01110) + (2 ,opc2) + (1 1) + (5 Rm) + (5 ,opcode) + (1 1) + (5 Rn) + (5 Rd))))))))) + (define-simd-byte-instruction BSL 1 #b01 #b00011) + (define-simd-byte-instruction BIT 1 #b10 #b00011)) + +(define-instruction FMOV + ;; vector, immediate (C7.2.122) + (((? U:op:Q fmov-vector-immediate-size) + (? Rd vregister) + (&U (? abc fmov-abc) (? defgh fmov-defgh))) + (BITS (1 0) + (1 (fix:and 1 U:op:Q)) + (1 (fix:and 1 (fix:lsh U:op:Q -2))) + (5 #b01111) + (5 #b00000) + (3 abc) + (4 #b1111) + (1 (fix:and 1 (fix:lsh U:op:Q -1))) + (1 1) + (5 defgh) + (5 Rd))) + (((? U:op:Q fmov-vector-immediate-size) + (? Rd vregister) + (&. (? abcdefgh fmov-abcdefgh))) + (BITS (1 0) + (1 (fix:and 1 U:op:Q)) + (1 (fix:and 1 (fix:lsh U:op:Q -2))) + (5 #b01111) + (5 #b00000) + (3 (fix:lsh abcdefgh -5)) + (4 #b1111) + (1 (fix:and 1 (fix:lsh U:op:Q -1))) + (1 1) + (5 (fix:and #b11111 abcdefgh)) + (5 Rd))) + ;; register (C7.2.123) + (((? type fp-scalar-size) (? Rd vregister) (? Rn vregister)) + (BITS (1 0) + (1 0) + (1 0) + (5 #b11110) + (2 type) + (1 1) + (4 #b0000) + (2 #b00) ;opc + (5 #b10000) + (5 Rn) + (5 Rd))) + ;; general (C7.2.124) + (((? sf sf-size) (? Rd register-31=z) + (? rmode:type fmov-general-size) (? Rn vregister)) + (BITS (1 sf) + (1 0) + (1 0) + (5 #b11110) + (2 (fix:and #b11 rmode:type)) + (1 1) + (2 (fix:lsh rmode:type -2)) + (3 #b110) ;opcode + (6 #b000000) + (5 Rn) + (5 Rd))) + (((? rmode:type fmov-general-size) (? Rd vregister) + (? sf sf-size) (? Rn register-31=z)) + (BITS (1 sf) + (1 0) + (1 0) + (5 #b11110) + (2 (fix:and #b11 rmode:type)) + (1 1) + (2 (fix:lsh rmode:type -2)) + (3 #b111) ;opcode + (6 #b000000) + (5 Rn) + (5 Rd))) + ;; scalar, immediate (C7.2.125) + (((? type fp-scalar-size) (? Rd vregister) (&U (? imm8 unsigned-8))) + (BITS (1 0) + (1 0) + (1 0) + (5 #b11110) + (2 type) + (1 1) + (8 imm8) + (3 #b100) + (5 #b00000) + (5 Rd))) + (((? type fp-scalar-size) (? Rd vregister) (&. (? imm8 fp-binary8))) + (BITS (1 0) + (1 0) + (1 0) + (5 #b11110) + (2 type) + (1 1) + (8 imm8) + (3 #b100) + (5 #b00000) + (5 Rd)))) diff --git a/src/compiler/machines/aarch64/insutl.scm b/src/compiler/machines/aarch64/insutl.scm index ad91370b9..9307fc270 100644 --- a/src/compiler/machines/aarch64/insutl.scm +++ b/src/compiler/machines/aarch64/insutl.scm @@ -98,6 +98,11 @@ USA. (<= 0 x #x7f) x)) +(define (unsigned-8 x) + (and (exact-nonnegative-integer? x) + (<= 0 x #xff) + x)) + (define (unsigned-12 x) (and (exact-nonnegative-integer? x) (<= 0 x #xfff) @@ -154,6 +159,11 @@ USA. ((NV) 'AL) (else #f))) +(define (nzcv-value x) + (and (exact-integer? x) + (<= 0 x #xf) + x)) + (define (sf-size size) (case size ((W) 0) @@ -221,6 +231,13 @@ USA. ((X) #b01) (else #f))) +(define (ldr-literal-simd/fp-size t) + (case t + ((S) #b00) + ((D) #b01) + ((Q) #b10) + (else #f))) + (define (load/store-extend-type t) (case t ((UTXW) #b010) @@ -429,3 +446,103 @@ USA. ((H) #b01) ((W) #b10) (else #f))) + +(define (fp-scalar-size t) + (case t + ((H) #b11) + ((S) #b00) + ((D) #b01) + (else #f))) + +(define (fp-vector-size t) + ;; Low bit is Q, next five bits are opcode, high bit is sz. + (case t + ((H4) #b1111000) + ((H8) #b1111001) + ((S2) #b0100000) + ((S4) #b0100001) + ((D2) #b1100001) + (else #f))) + +(define (fp16-vector-size t) + ;; Q + (case t + ((H4) 0) + ((H8) 1) + (else #f))) + +(define (fp32/64-vector-size t) + (case t + ((S2) #b00) + ((S4) #b01) + ((D2) #b11) + (else #f))) + +(define (simd-byte-vector-size t) + ;; Q + (case t + ((B8) 0) + ((B16) 1) + (else #f))) + +(define (simd-integer-vector-size t) + ;; Low bit is Q, high two bits are sz. + (case t + ((B8) #b000) + ((B16) #b001) + ((H4) #b010) + ((H8) #b011) + ((S2) #b100) + ((S4) #b101) + (else #f))) + +(define (simd-double-integer-vector-size t) + ;; Low bit is Q, high two bits are sz. + (case t + ((B8) #b000) + ((B16) #b001) + ((H4) #b010) + ((H8) #b011) + ((S2) #b100) + ((S4) #b101) + ((D2) #b111) + (else #f))) + +(define (fmov-vector-immediate-size t) + ;; #b{U}{op}{Q} + (case t + ((H4) #b100) + ((H8) #b101) + ((S2) #b000) + ((S4) #b001) + ((D2) #b010) + (else #f))) + +(define (fmov-general-size t) + ;; Low two bits are type; high two bits are rmode. + (case t + ((H) #b0011) + ((S) #b0000) + ((D) #b0001) + ((D1) #b0110) + (else #f))) + +(define (fmov-abc x) + (and (exact-nonnegative-integer? x) + (<= 0 x #b111) + x)) + +(define (fmov-defgh x) + (and (exact-nonnegative-integer? x) + (<= 0 x #b11111) + x)) + +;; XXX fixed-point +(define (fmov-abcdefgh x) + x + #f) + +;; XXX 8-bit floating-point +(define (fp-binary8 x) + x + #f) diff --git a/src/compiler/machines/aarch64/lapgen.scm b/src/compiler/machines/aarch64/lapgen.scm index ae9ad5595..9b5cff4db 100644 --- a/src/compiler/machines/aarch64/lapgen.scm +++ b/src/compiler/machines/aarch64/lapgen.scm @@ -78,6 +78,17 @@ USA. ((register-value-class=word? register) 'GENERAL) ((register-value-class=float? register) 'FLOAT) (else (error "Unknown register type:" register)))) + +;;; Convert between RTL repersentation of machine (not pseudo) +;;; registers, and LAP syntax for register numbers. + +(define (float-register->fpr register) + (assert (<= v0 register)) + (assert (<= register v31)) + (- register 32)) + +(define (fpr->float-register fpr) + (+ fpr 32)) ;;; References, for machine register allocator. Not used by LAP ;;; syntax. @@ -137,7 +148,9 @@ USA. (LAP (ADD X ,target ,source (&U 0))) (LAP (ORR X ,target Z ,source)))) ((FLOAT) - (LAP (FMOV D ,target ,source))) + (let ((source (float-register->fpr source)) + (target (float-register->fpr target))) + (LAP (FMOV D ,target ,source)))) (else (error "Unknown register type:" source target))))) @@ -162,13 +175,13 @@ USA. (define (load-register register ea) (case (register-type register) ((GENERAL) (LAP (LDR X ,register ,ea))) - ((FLOAT) (LAP (LDR D ,register ,ea))) + ((FLOAT) (LAP (LDR D ,(float-register->fpr register) ,ea))) (else (error "Unknown register type:" register)))) (define (store-register register ea) (case (register-type register) ((GENERAL) (LAP (STR X ,register ,ea))) - ((FLOAT) (LAP (STR D ,register ,ea))) + ((FLOAT) (LAP (STR D ,(float-register->fpr register) ,ea))) (else (error "Unknown register type:" register)))) ;;; Utilities @@ -311,6 +324,10 @@ USA. (LAP ,@(load-pc-relative-address target label) (LDR X ,target ,target))) +(define (load-pc-relative-float target temp label) + (LAP ,@(load-pc-relative-address temp label) + (LDR.V D ,target ,temp))) + (define (load-tagged-immediate target type datum) (load-unsigned-immediate target (make-non-pointer-literal type datum))) @@ -435,6 +452,28 @@ USA. (define (object->address target source) (object->datum target source)) +;;;; Data labels + +(define (allocate-data-label datum block-name offset alignment data) + (let* ((block + (or (find-extra-code-block block-name) + (let ((block + (declare-extra-code-block! block-name 'ANYWHERE '()))) + (add-extra-code! + block + (LAP (PADDING ,offset ,alignment ,padding-string))) + block))) + (pairs (extra-code-block/xtra block)) + (place (assoc datum pairs))) + (if place + (cdr place) + (let ((label (generate-label block-name))) + (set-extra-code-block/xtra! + block + (cons (cons datum label) pairs)) + (add-extra-code! block (LAP (LABEL ,label) ,@data)) + label)))) + ;;;; Linearizer interface (define (lap:make-label-statement label) diff --git a/src/compiler/machines/aarch64/machine.scm b/src/compiler/machines/aarch64/machine.scm index a9f52876e..bab1e107c 100644 --- a/src/compiler/machines/aarch64/machine.scm +++ b/src/compiler/machines/aarch64/machine.scm @@ -503,6 +503,7 @@ USA. ;; XXX doesn't belong here (define-integrable type-code:fixnum #x1a) +(define-integrable type-code:flonum #x06) (define-integrable type-code:manifest-closure #x0d) (define-integrable type-code:manifest-vector #x00) @@ -519,7 +520,6 @@ USA. (object-datum object)) (define compiler:open-code-floating-point-arithmetic? - ;; XXX not yet #f) (define compiler:primitives-with-no-open-coding @@ -529,30 +529,17 @@ USA. DIVIDE-FIXNUM ;nobody open-codes this FIXNUM-LSH ;open-coding not useful without constant operands FLOATING-VECTOR-CONS;nobody open-codes this - FLOATING-VECTOR-REF ;no flonum arithmetic yet - FLOATING-VECTOR-SET!;no flonum arithmetic yet - FLONUM-ABS ;no flonum arithmetic yet FLONUM-ACOS ;not useful to open-code hairy math - FLONUM-ADD ;no flonum arithmetic yet FLONUM-ASIN ;not useful to open-code hairy math FLONUM-ATAN ;not useful to open-code hairy math FLONUM-ATAN2 ;not useful to open-code hairy math - FLONUM-CEILING ;no flonum arithmetic yet FLONUM-COS ;not useful to open-code hairy math - FLONUM-DIVIDE ;no flonum arithmetic yet FLONUM-EXP ;not useful to open-code hairy math FLONUM-EXPM1 ;not useful to open-code hairy math - FLONUM-FLOOR ;no flonum arithmetic yet FLONUM-LOG ;not useful to open-code hairy math FLONUM-LOG1P ;not useful to open-code hairy math - FLONUM-MULTIPLY ;no flonum arithmetic yet - FLONUM-NEGATE ;no flonum arithmetic yet - FLONUM-ROUND ;no flonum arithmetic yet FLONUM-SIN ;not useful to open-code hairy math - FLONUM-SQRT ;no flonum arithmetic yet - FLONUM-SUBTRACT ;no flonum arithmetic yet FLONUM-TAN ;not useful to open-code hairy math - FLONUM-TRUNCATE ;no flonum arithmetic yet GCD-FIXNUM ;nobody open-codes this VECTOR-CONS ;nobody open-codes this )) diff --git a/src/compiler/machines/aarch64/rules2.scm b/src/compiler/machines/aarch64/rules2.scm index bf455ead8..6107f2006 100644 --- a/src/compiler/machines/aarch64/rules2.scm +++ b/src/compiler/machines/aarch64/rules2.scm @@ -115,6 +115,20 @@ USA. (lambda (label) (LAP (CBZ X ,source (@PCR ,label ,regnum:scratch-0)))) (lambda (label) (LAP (CBNZ X ,source (@PCR ,label ,regnum:scratch-0)))))) +(define (set-test-bit-set-branches! source bit) + (set-current-branches! + (lambda (label) + (LAP (TBNZ X ,source (&U ,bit) (@PCR ,label ,regnum:scratch-0)))) + (lambda (label) + (LAP (TBZ X ,source (&U ,bit) (@PCR ,label ,regnum:scratch-0)))))) + +(define (set-test-bit-clear-branches! source bit) + (set-current-branches! + (lambda (label) + (LAP (TBZ X ,source (&U ,bit) (@PCR ,label ,regnum:scratch-0)))) + (lambda (label) + (LAP (TBNZ X ,source (&U ,bit) (@PCR ,label ,regnum:scratch-0)))))) + (define (set-condition-branches! cc ~cc) (set-current-branches! (lambda (label) (LAP (B. ,cc (@PCR ,label ,regnum:scratch-0)))) diff --git a/src/compiler/machines/aarch64/rulflo.scm b/src/compiler/machines/aarch64/rulflo.scm index a73d1b7c3..6924bd56d 100644 --- a/src/compiler/machines/aarch64/rulflo.scm +++ b/src/compiler/machines/aarch64/rulflo.scm @@ -29,6 +29,379 @@ USA. (declare (usual-integrations)) -;;; XXX not yet +;; register: RTL representation of machine (not pseudo) register +;; fpr: LAP representation of SIMD/FP register +;; +;; (not needed for general because the numbers coincide) -#f +(define (float-source-register! register) + (assert (eq? 'FLOAT (register-type register))) + (load-alias-register! register 'FLOAT)) + +(define (float-source-fpr! register) + (float-register->fpr (float-source-register! register))) + +(define (float-move-to-temporary-register! register) + (assert (eq? 'FLOAT (register-type register))) + (move-to-temporary-register! register 'FLOAT)) + +(define (float-move-to-temporary-fpr! register) + (float-register->fpr (float-move-to-temporary-register! register))) + +(define (float-target-register! register) + (assert (eq? 'FLOAT (register-type register))) + (delete-dead-registers!) + (allocate-alias-register! register 'FLOAT)) + +(define (float-target-fpr! register) + (float-register->fpr (float-target-register! register))) + +(define (float-temporary-register!) + (allocate-temporary-register! 'FLOAT)) + +(define (float-temporary-fpr!) + (float-register->fpr (float-temporary-register!))) + +(define (allocate-binary64-label bit-string) + (allocate-data-label bit-string 'BINARY64 0 8 + (LAP (DATA 64 U ,(bit-string->unsigned-integer bit-string))))) + +(define binary64-bits:+inf + (unsigned-integer->bit-string 64 #x7ff0000000000000)) + +(define binary64-bits:smallest-normal + (unsigned-integer->bit-string 64 #x0010000000000000)) + +;; Layout: +;; +;; 0 manifest-nm-vector [length = 1 word] +;; 8 double (binary64) float +;; 16 + +;;;; Flonum Loads & Stores + +;; Float load/store. + +(define-rule statement + (ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (REGISTER (? source)))) + (let* ((source (standard-source! source)) + (target (float-target-fpr! target)) + (temp regnum:scratch-0)) + (LAP ,@(object->address temp source) + (LDR.V D ,target (+ ,temp (&U (* 8 1))))))) + +(define-rule statement + (ASSIGN (REGISTER (? target)) (FLOAT->OBJECT (REGISTER (? source)))) + (let* ((source (float-source-fpr! source)) + (target (standard-target! target)) + (temp regnum:scratch-0) + (Free regnum:free-pointer)) + (LAP + ;; target := Free + ,@(register->register-transfer Free target) + ;; temp := manifest-nm-vector, length = 1 word + ,@(load-tagged-immediate temp (ucode-type MANIFEST-NM-VECTOR) 1) + ;; *Free++ := temp + (STR X ,temp (POST+ ,Free (& 8))) + ;; *Free++ := source + (STR.V D ,source (POST+ ,Free (& 8))) + ;; Tag it. + ,@(affix-type target type-code:flonum target (lambda () temp))))) + +;; Constant-offset floating vector load/store. + +(define-rule statement + (ASSIGN (REGISTER (? target)) + (FLOAT-OFFSET (REGISTER (? base)) + (MACHINE-CONSTANT (? offset)))) + (QUALIFIER (fits-in-unsigned-12? offset)) + (let* ((base (standard-source! base)) + (target (float-target-fpr! target))) + (LAP (LDR.V D ,target (+ ,base (&U (* 8 ,offset))))))) + +(define-rule statement + (ASSIGN (FLOAT-OFFSET (REGISTER (? base)) + (MACHINE-CONSTANT (? offset))) + (REGISTER (? source))) + (QUALIFIER (fits-in-unsigned-12? offset)) + (let* ((base (standard-source! base)) + (source (float-source-fpr! source))) + (LAP (STR.V D ,source (+ ,base (&U (* 8 ,offset))))))) + +;; Variable-offset floating vector load/store. + +(define-rule statement + (ASSIGN (REGISTER (? target)) + (FLOAT-OFFSET (REGISTER (? base)) + (REGISTER (? offset)))) + (let* ((base (standard-source! base)) + (offset (standard-source! offset)) + (target (float-target-fpr! target))) + (LAP (LDR.V D ,target (+ ,base (LSL ,offset 3)))))) + +(define-rule statement + (ASSIGN (FLOAT-OFFSET (REGISTER (? base)) + (REGISTER (? offset))) + (REGISTER (? source))) + (let* ((base (standard-source! base)) + (offset (standard-source! offset)) + (source (float-source-fpr! source))) + (LAP (STR.V D ,source (+ ,base (LSL ,offset 3)))))) + +;;;; Flonum Arithmetic -- unary operations + +(define-rule statement + (ASSIGN (REGISTER (? target)) + (FLONUM-1-ARG (? operator) (REGISTER (? source)) (? overflow?))) + overflow? ;ignore + (let* ((source (float-source-fpr! source)) + (target (float-target-fpr! target))) + ((flonum-1-arg/operator operator) target source))) + +(define-integrable (flonum-1-arg/operator operation) + (lookup-arithmetic-method operation flonum-methods/1-arg)) + +(define flonum-methods/1-arg + (list 'FLONUM-METHODS/1-ARG)) + +(define-arithmetic-method 'FLONUM-ABS flonum-methods/1-arg + (lambda (target source) + (LAP (FABS D ,target ,source)))) + +(define-arithmetic-method 'FLONUM-NEGATE flonum-methods/1-arg + (lambda (target source) + (LAP (FNEG D ,target ,source)))) + +(define-arithmetic-method 'FLONUM-SQRT flonum-methods/1-arg + (lambda (target source) + (LAP (FSQRT D ,target ,source)))) + +(define-arithmetic-method 'FLONUM-CEILING flonum-methods/1-arg + (lambda (target source) + ;; round toward Plus infinity + (LAP (FRINTP D ,target ,source)))) + +(define-arithmetic-method 'FLONUM-FLOOR flonum-methods/1-arg + (lambda (target source) + ;; round toward Minus infinity + (LAP (FRINTM D ,target ,source)))) + +(define-arithmetic-method 'FLONUM-ROUND flonum-methods/1-arg + (lambda (target source) + ;; round to Nearest (ties to even) + (LAP (FRINTN D ,target ,source)))) + +(define-arithmetic-method 'FLONUM-TRUNCATE flonum-methods/1-arg + (lambda (target source) + ;; round toward Zero + (LAP (FRINTZ D ,target ,source)))) + +;;;; Flonum Arithmetic -- binary operations + +(define-rule statement + (ASSIGN (REGISTER (? target)) + (FLONUM-2-ARGS (? operator) + (REGISTER (? source1)) + (REGISTER (? source2)) + (? overflow?))) + overflow? ;ignore + ((flonum-2-args/operator operator) target source1 source2)) + +(define flonum-methods/2-args + (list 'FLONUM-METHODS/2-ARGS)) + +(define-integrable (flonum-2-args/operator operator) + (lookup-arithmetic-method operator flonum-methods/2-args)) + +(define ((flonum-2-args/standard operate) target source1 source2) + (let* ((source1 (float-source-fpr! source1)) + (source2 (float-source-fpr! source2)) + (target (float-target-fpr! target))) + (operate target source1 source2))) + +(define ((flonum-2-args/temporary operate) target source1 source2) + ;; Allocate a temporary register that we can write to without + ;; destroying source1 and source2; then assign it as an alias for the + ;; target at the end. + (let* ((source1 (float-source-fpr! source1)) + (source2 (float-source-fpr! source2)) + (temp-register (float-temporary-register!))) + (delete-dead-registers!) + (rtl-target:=machine-register! target temp-register) + (operate (float-register->fpr temp-register) source1 source2))) + +(define-arithmetic-method 'FLONUM-ADD flonum-methods/2-args + (flonum-2-args/standard + (lambda (target source1 source2) + (LAP (FADD D ,target ,source1 ,source2))))) + +(define-arithmetic-method 'FLONUM-SUBTRACT flonum-methods/2-args + (flonum-2-args/standard + (lambda (target source1 source2) + (LAP (FSUB D ,target ,source1 ,source2))))) + +(define-arithmetic-method 'FLONUM-MULTIPLY flonum-methods/2-args + (flonum-2-args/standard + (lambda (target source1 source2) + (LAP (FMUL D ,target ,source1 ,source2))))) + +(define-arithmetic-method 'FLONUM-DIVIDE flonum-methods/2-args + (flonum-2-args/standard + (lambda (target source1 source2) + (LAP (FDIV D ,target ,source1 ,source2))))) + +(define-arithmetic-method 'FLONUM-COPYSIGN flonum-methods/2-args + (flonum-2-args/temporary + (lambda (target source1 source2) + (let ((tempx (general-temporary!))) + ;; XXX do this without a temporary general register -- MOVI? + (LAP (MOVZ X ,tempx (LSL (&U #x8000) 48)) + (FMOV D ,target X ,tempx) + ;; target[i] := source2[i] if signbit[i] else source1[i] + (BSL B8 ,target ,source2 ,source1)))))) + +;;;; Flonum Predicates + +(define-rule predicate + (FLONUM-PRED-1-ARG (? predicate) (REGISTER (? source))) + (let* ((predicate (flonum-pred-1->2-args predicate)) + (source1 (float-source-fpr! source)) + (source2 'Z)) + (float-branch! predicate) + (float-comparison predicate source1 source2))) + +(define (flonum-pred-1->2-args predicate) + (case predicate + ((FLONUM-NEGATIVE?) 'FLONUM-LESS?) + ((FLONUM-IS-ZERO?) 'FLONUM-IS-EQUAL?) + ((FLONUM-ZERO?) 'FLONUM-EQUAL?) + ((FLONUM-POSITIVE?) 'FLONUM-POSITIVE?) + (else (error "Invalid flonum-pred-1-arg:" predicate)))) + +(define-rule predicate + (FLONUM-PRED-1-ARG FLONUM-IS-NEGATIVE? (REGISTER (? source))) + (let* ((source (float-source-fpr! source)) + ;; Avoid scratch-0, which may be used in the branches. + (temp regnum:scratch-1)) + (set-test-bit-set-branches! temp 63) + (LAP (FMOV X ,temp D ,source)))) + +(define-rule predicate + (FLONUM-PRED-1-ARG FLONUM-IS-NAN? (REGISTER (? source))) + (let ((source (float-source-fpr! source))) + (set-condition-branches! 'VS 'VC) + (LAP (FCMP D ,source ,source)))) + +(define-rule predicate + (FLONUM-PRED-1-ARG FLONUM-IS-INFINITE? (REGISTER (? source))) + ;; Discriminate on |x| = +inf, versus |x| =/= +inf or unordered. + (set-condition-branches! 'EQ 'NE) + (let ((source (float-source-fpr! source))) + (delete-dead-registers!) + (let* ((temp-source (float-temporary-fpr!)) + (temp-inf (float-temporary-fpr!)) + (label (allocate-binary64-label binary64-bits:+inf))) + (LAP (FABS D ,temp-source ,source) + ,@(load-pc-relative-float temp-inf regnum:scratch-0 label) + (FCMP D ,temp-source ,temp-inf))))) + +(define-rule predicate + (FLONUM-PRED-1-ARG FLONUM-IS-FINITE? (REGISTER (? source))) + ;; Discriminate on |x| < +inf, versus |x| >= +inf or unordered. + (set-condition-branches! 'LO 'HS) ;LO = lt; HS = gt, eq, or un + (let ((source (float-source-fpr! source))) + (delete-dead-registers!) + (let* ((temp-source (float-temporary-fpr!)) + (temp-inf (float-temporary-fpr!)) + (label (allocate-binary64-label binary64-bits:+inf))) + (LAP (FABS D ,temp-source ,source) + ,@(load-pc-relative-float temp-inf regnum:scratch-0 label) + (FCMP D ,temp-source ,temp-inf))))) + +(define-rule predicate + (FLONUM-PRED-1-ARG FLONUM-IS-NORMAL? (REGISTER (? source))) + ;; Break it into two steps: + ;; + ;; 1. Is |x| < +inf, versus |x| >= +inf or unordered? + ;; 2. Is |x| >= smallest normal, versus |x| < smallest normal? + ;; + ;; The branches are: + ;; + ;; GE: gt or eq, when n = v + ;; LO: lt, when c = 0 + ;; + (set-condition-branches! 'GE 'LO) + (let ((source (float-source-fpr! source))) + (delete-dead-registers!) + (let* ((temp-source (float-temporary-fpr!)) + (temp-inf (float-temporary-fpr!)) + (temp-norm (float-temporary-fpr!)) + (label-inf (allocate-binary64-label binary64-bits:+inf)) + (label-norm + (allocate-binary64-label binary64-bits:smallest-normal))) + ;; XXX Use LDP to load both registers in one go. + (LAP (FABS D ,temp-source ,source) + ,@(load-pc-relative-float temp-inf regnum:scratch-0 label-inf) + ,@(load-pc-relative-float temp-norm regnum:scratch-0 label-norm) + (FCMP D ,temp-source ,temp-inf) + ;; If |x| < +inf, compare source and norm; otherwise, if |x| + ;; >= +inf or |x| and +inf are unordered, set NCVB := 1000 + ;; so that GE will be not-taken and LO will be taken. + (FCCMP D ,temp-source ,temp-norm (&U #b1000) LT))))) + +(define-rule predicate + (FLONUM-PRED-2-ARGS (? predicate) + (REGISTER (? source1)) + (REGISTER (? source2))) + (let* ((source1 (float-source-fpr! source1)) + (source2 (float-source-fpr! source2))) + (float-branch! predicate) + (float-comparison predicate source1 source2))) + +(define (float-branch! predicate) + (case predicate + ((FLONUM-EQUAL? FLONUM-IS-EQUAL?) + (set-equal-branches!)) + ((FLONUM-LESS? FLONUM-IS-LESS?) + (set-condition-branches! 'LO 'HS)) ;LO = lt; HS = gt, eq, or un + ((FLONUM-GREATER? FLONUM-IS-GREATER?) + (set-condition-branches! 'GT 'LE)) ;LE = le, eq, or un + ((FLONUM-IS-LESS-OR-EQUAL?) ;XXX FLONUM-LESS-OR-EQUAL? + (set-condition-branches! 'LS 'HI)) ;LS = le or eq; HI = gt or un + ((FLONUM-IS-GREATER-OR-EQUAL?) ;XXX FLONUM-GREATER-OR-EQUAL? + (set-condition-branches! 'GE 'LT)) ;LT = lt or un + ((FLONUM-IS-LESS-OR-GREATER?) ;XXX FLONUM-LESS-OR-GREATER? + (set-current-branches! + (lambda (label) ;branch to label if less or greater + (let ((unordered (generate-label 'UNORDERED))) + (LAP + ;; branch if unordered (V=1) + (B. VS (@PCR ,unordered ,regnum:scratch-0)) + ;; branch if less or greater + (B. NE (@PCR ,label ,regnum:scratch-0)) + (LABEL ,unordered)))) + (lambda (label) ;branch to label if NaN or equal + (LAP + ;; branch if unordered (V=1) + (B. VS (@PCR ,label ,regnum:scratch-0)) + ;; branch if equal + (B. EQ (@PCR ,label ,regnum:scratch-0)))))) + ((FLONUM-IS-UNORDERED?) + (set-condition-branches! 'VS 'VC)) + (else + (error "Unknown float predicate:" predicate)))) + +(define (float-comparison predicate source1 source2) + (case predicate + ((FLONUM-EQUAL? FLONUM-LESS? FLONUM-GREATER?) + ;; floating-point compare with exception + (LAP (FCMPE D ,source1 ,source2))) + ((FLONUM-IS-EQUAL? + FLONUM-IS-LESS? + FLONUM-IS-GREATER? + FLONUM-IS-LESS-OR-EQUAL? + FLONUM-IS-GREATER-OR-EQUAL? + FLONUM-IS-LESS-OR-GREATER? + FLONUM-IS-UNORDERED?) + ;; floating-point quiet compare + (LAP (FCMP D ,source1 ,source2)))))