Disabled by default for now due to limited testing.
(define-load/store-pair-instruction LDP 1)
(define-load/store-pair-instruction STP 0))
\f
-;;; C3.2.9 Load/Store scalar SIMD and floating-point
-
-(let-syntax
- ((define-simd/fp-load/store-instruction
- (sc-macro-transformer
- (lambda (form environment)
- environment
- (receive (mnemonic load/store . extra) (apply values (cdr form))
- `(define-instruction ,mnemonic
- ;; LDR immediate, SIMD&FP, pre/post-index with signed
- ;; byte offset (C7.2.176)
- ;; STR immediate, SIMD&FP, pre/post-index with signed
- ;; byte offset (C7.2.315)
- (((? sz load/store-simd/fp-size)
- (? Vt vregister)
- ((? pre/post load/store-pre/post-index)
- (? Rn register-31=sp)
- (& (? offset signed-9))))
- (BITS (2 sz LOAD/STORE-SIMD/FP-SIZE)
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b00)
- (1 sz LOAD/STORE-SIMD/FP-OPCHI)
- (1 ,load/store) ;opc[0]
- (1 0)
- (9 offset SIGNED)
- (2 pre/post)
- (5 Rn)
- (5 Vt)))
- ;; LDR immediate, SIMD&FP, zero offset (C7.2.176)
- ;; STR immediate, SIMD&FP, zero offset (C7.2.315)
- (((? sz load/store-simd/fp-size)
- (? Vt vregister)
- (? Rn register-31=sp))
- (BITS (2 sz LOAD/STORE-SIMD/FP-SIZE)
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b01)
- (1 sz LOAD/STORE-SIMD/FP-OPCHI)
- (1 ,load/store) ;opc[0]
- (12 0) ;offset=0
- (5 Rn)
- (5 Vt)))
- ;; LDR immediate, SIMD&FP (B), unsigned byte offset (C7.2.176)
- ;; STR immediate, SIMD&FP (B), unsigned byte offset (C7.2.315)
- ((B (? Vt vregister)
- (+ (? Rn register-31=sp)
- (&U (? offset unsigned-12))))
- (BITS (2 #b00) ;size=B, 8-bit
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b01)
- (1 0) ;opc[1]
- (1 ,load/store) ;opc[0]
- (12 offset)
- (5 Rn)
- (5 Vt)))
- ;; LDR immediate, SIMD&FP (B), unsigned byte offset (C7.2.176)
- ;; STR immediate, SIMD&FP (B), unsigned byte offset (C7.2.315)
- ;; [same as above]
- ((B (? Vt vregister)
- (+ (? Rn register-31=sp)
- (&U (* 1 (? offset unsigned-12)))))
- (BITS (2 #b00) ;size=B, 8-bit
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b01)
- (1 0) ;opc[1]
- (1 ,load/store) ;opc[0]
- (12 offset)
- (5 Rn)
- (5 Vt)))
-\f
- ;; LDR immediate, SIMD&FP (H), unsigned 2-byte offset (C7.2.176)
- ;; STR immediate, SIMD&FP (H), unsigned 2-byte offset (C7.2.315)
- ((H (? Vt vregister)
- (+ (? Rn register-31=sp)
- (&U (* 2 (? offset unsigned-12)))))
- (BITS (2 #b01) ;size=H, 16-bit
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b01)
- (1 0) ;opc[1]
- (1 ,load/store) ;opc[0]
- (12 offset)
- (5 Rn)
- (5 Vt)))
- ;; LDR immediate, SIMD&FP (S), unsigned 4-byte offset (C7.2.176)
- ;; STR immediate, SIMD&FP (S), unsigned 4-byte offset (C7.2.315)
- ((S (? Vt vregister)
- (+ (? Rn register-31=sp)
- (&U (* 4 (? offset unsigned-12)))))
- (BITS (2 #b10) ;size=S, 32-bit
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b01)
- (1 0) ;opc[1]
- (1 ,load/store) ;opc[0]
- (12 offset)
- (5 Rn)
- (5 Vt)))
- ;; LDR immediate, SIMD&FP (D), unsigned 8-byte offset (C7.2.176)
- ;; STR immediate, SIMD&FP (D), unsigned 8-byte offset (C7.2.315)
- ((D (? Vt vregister)
- (+ (? Rn register-31=sp)
- (&U (* 8 (? offset unsigned-12)))))
- (BITS (2 #b11) ;size=D, 64-bit
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b01)
- (1 0) ;opc[1]
- (1 ,load/store) ;opc[0]
- (12 offset)
- (5 Rn)
- (5 Vt)))
- ;; LDR immediate, SIMD&FP (Q), unsigned 16-byte offset (C7.2.176)
- ;; STR immediate, SIMD&FP (Q), unsigned 16-byte offset (C7.2.315)
- ((Q (? Vt vregister)
- (+ (? Rn register-31=sp)
- (&U (* 16 (? offset unsigned-12)))))
- (BITS (2 #b00) ;`size'
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b01)
- (1 0) ;opc[1]
- (1 ,load/store) ;opc[0]
- (12 offset)
- (5 Rn)
- (5 Vt)))
-\f
- ;; LDR register, SIMD&FP, no extend (C7.2.178)
- ;; STR register, SIMD&FP, no extend (C7.3.316)
- (((? sz load/store-simd/fp-size)
- (? Vt vregister)
- (+ (? Rn register-31=sp)
- (? Rm register-31=z)))
- (BITS (2 sz LOAD/STORE-SIMD/FP-SIZE)
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b00)
- (1 sz LOAD/STORE-SIMD/FP-OPCHI) ;opc[1]
- (1 ,load/store) ;opc[0]
- (1 1)
- (5 Rm)
- (3 #b011) ;option=LSL
- (1 0) ;shift=0
- (2 #b10)
- (5 Rn)
- (5 Vt)))
- ;; LDR register, SIMD&FP (B), (C7.2.178)
- ;; STR register, SIMD&FP (B), (C7.2.316)
- ((B (? Vt vregister)
- (+ (? Rn register-31=sp)
- ((? option load/store-extend-type)
- (? Rm register-31=z)
- (? S load/store8-extend-amount))))
- (BITS (2 #b00) ;size=B
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b00)
- (1 0) ;opc[1]
- (1 ,load/store) ;opc[0]
- (1 1)
- (5 Rm)
- (3 option)
- (1 S)
- (2 #b10)
- (5 Rn)
- (5 Vt)))
- ;; LDR register, SIMD&FP (H), (C7.2.178)
- ;; STR register, SIMD&FP (H), (C7.2.316)
- ((H (? Vt vregister)
- (+ (? Rn register-31=sp)
- ((? option load/store-extend-type)
- (? Rm register-31=z)
- (? S load/store16-extend-amount))))
- (BITS (2 #b01) ;size=H
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b00)
- (1 0) ;opc[1]
- (1 ,load/store) ;opc[0]
- (1 1)
- (5 Rm)
- (3 option)
- (1 S)
- (2 #b10)
- (5 Rn)
- (5 Vt)))
- ;; LDR register, SIMD&FP (S), (C7.2.178)
- ;; STR register, SIMD&FP (S), (C7.2.316)
- ((S (? Vt vregister)
- (+ (? Rn register-31=sp)
- ((? option load/store-extend-type)
- (? Rm register-31=z)
- (? S load/store32-extend-amount))))
- (BITS (2 #b10) ;size=H
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b00)
- (1 0) ;opc[1]
- (1 ,load/store) ;opc[0]
- (1 1)
- (5 Rm)
- (3 option)
- (1 S)
- (2 #b10)
- (5 Rn)
- (5 Vt)))
-\f
- ;; LDR register, SIMD&FP (D), (C7.2.178)
- ;; STR register, SIMD&FP (D), (C7.2.316)
- ((D (? Vt vregister)
- (+ (? Rn register-31=sp)
- ((? option load/store-extend-type)
- (? Rm register-31=z)
- (? S load/store64-extend-amount))))
- (BITS (2 #b11) ;size=D
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b00)
- (1 0) ;opc[1]
- (1 ,load/store) ;opc[0]
- (1 1)
- (5 Rm)
- (3 option)
- (1 S)
- (2 #b10)
- (5 Rn)
- (5 Vt)))
- ;; LDR register, SIMD&FP (Q), (C7.2.178)
- ;; STR register, SIMD&FP (Q), (C7.2.316)
- ((Q (? Vt vregister)
- (+ (? Rn register-31=sp)
- ((? option load/store-extend-type)
- (? Rm register-31=z)
- (? S load/store128-extend-amount))))
- (BITS (2 #b00) ;size=Q
- (3 #b111)
- (1 1) ;SIMD/FP
- (2 #b00)
- (1 1) ;opc[1]
- (1 ,load/store) ;opc[0]
- (1 1)
- (5 Rm)
- (3 option)
- (1 S)
- (2 #b10)
- (5 Rn)
- (5 Vt)))
- ,@extra))))))
- ;; The ARM assembler has `LDRB w13,...' for byte-sized load into
- ;; general register 13, and `LDR b13,...' for byte-sized load into
- ;; vector register 13. We use a separate mnemonic for general
- ;; registers and vector registers.
- (define-simd/fp-load/store-instruction STR.V 0)
- (define-simd/fp-load/store-instruction LDR.V 1
- ;; LDR PC-relative literal, SIMD&FP (C7.2.177)
- (((? opc ldr-literal-simd/fp-size)
- (? Vt vregister)
- (@PCO (? offset signed-19)))
- (BITS (2 opc)
- (3 #b011)
- (1 1) ;SIMD/FP
- (2 #b00)
- (19 offset SIGNED)
- (5 Vt)))
- (((? size) (? Vt) (@PCR (? label) (? temp register<31)))
- (VARIABLE-WIDTH offset `(/ (- ,label *PC*) 4)
- ((#x-40000 #x3ffff)
- (MACRO 32 (LDR.V ,size ,Vt (@PCO (* 4 ,offset)))))
- ((#x-100000000 #xffffffff)
- (MACRO 64 (ADRP-ADD X ,temp (@PCO ,(* 4 offset)))) ;(*)
- (MACRO 32 (LDR.V X ,Vt ,temp)))))))
-\f
;; Load register signed (i.e., sign-extended). This does not detect
;; the nonsensical (LDRS W W ...) operand combination -- it will
;; assemble into a possibly different instruction.
(declare (usual-integrations))
\f
-;;; XXX not yet
+;;; C3.2.9 Load/Store scalar SIMD and floating-point
-#f
+(let-syntax
+ ((define-simd/fp-load/store-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment
+ (receive (mnemonic load/store . extra) (apply values (cdr form))
+ `(define-instruction ,mnemonic
+ ;; LDR immediate, SIMD&FP, pre/post-index with signed
+ ;; byte offset (C7.2.176)
+ ;; STR immediate, SIMD&FP, pre/post-index with signed
+ ;; byte offset (C7.2.315)
+ (((? sz:opchi load/store-simd/fp-size)
+ (? Vt vregister)
+ ((? pre/post load/store-pre/post-index)
+ (? Rn register-31=sp)
+ (& (? offset signed-9))))
+ (BITS (2 (fix:lsh sz:opchi -1))
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b00)
+ (1 (fix:and 1 sz:opchi)) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (1 0)
+ (9 offset SIGNED)
+ (2 pre/post)
+ (5 Rn)
+ (5 Vt)))
+ ;; LDR immediate, SIMD&FP, zero offset (C7.2.176)
+ ;; STR immediate, SIMD&FP, zero offset (C7.2.315)
+ (((? sz:opchi load/store-simd/fp-size)
+ (? Vt vregister)
+ (? Rn register-31=sp))
+ (BITS (2 (fix:lsh sz:opchi -1))
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b01)
+ (1 (fix:and 1 sz:opchi)) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (12 0) ;offset=0
+ (5 Rn)
+ (5 Vt)))
+ ;; LDR immediate, SIMD&FP (B), unsigned byte offset (C7.2.176)
+ ;; STR immediate, SIMD&FP (B), unsigned byte offset (C7.2.315)
+ ((B (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ (&U (? offset unsigned-12))))
+ (BITS (2 #b00) ;size=B, 8-bit
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b01)
+ (1 0) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (12 offset)
+ (5 Rn)
+ (5 Vt)))
+ ;; LDR immediate, SIMD&FP (B), unsigned byte offset (C7.2.176)
+ ;; STR immediate, SIMD&FP (B), unsigned byte offset (C7.2.315)
+ ;; [same as above]
+ ((B (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ (&U (* 1 (? offset unsigned-12)))))
+ (BITS (2 #b00) ;size=B, 8-bit
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b01)
+ (1 0) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (12 offset)
+ (5 Rn)
+ (5 Vt)))
+\f
+ ;; LDR immediate, SIMD&FP (H), unsigned 2-byte offset (C7.2.176)
+ ;; STR immediate, SIMD&FP (H), unsigned 2-byte offset (C7.2.315)
+ ((H (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ (&U (* 2 (? offset unsigned-12)))))
+ (BITS (2 #b01) ;size=H, 16-bit
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b01)
+ (1 0) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (12 offset)
+ (5 Rn)
+ (5 Vt)))
+ ;; LDR immediate, SIMD&FP (S), unsigned 4-byte offset (C7.2.176)
+ ;; STR immediate, SIMD&FP (S), unsigned 4-byte offset (C7.2.315)
+ ((S (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ (&U (* 4 (? offset unsigned-12)))))
+ (BITS (2 #b10) ;size=S, 32-bit
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b01)
+ (1 0) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (12 offset)
+ (5 Rn)
+ (5 Vt)))
+ ;; LDR immediate, SIMD&FP (D), unsigned 8-byte offset (C7.2.176)
+ ;; STR immediate, SIMD&FP (D), unsigned 8-byte offset (C7.2.315)
+ ((D (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ (&U (* 8 (? offset unsigned-12)))))
+ (BITS (2 #b11) ;size=D, 64-bit
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b01)
+ (1 0) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (12 offset)
+ (5 Rn)
+ (5 Vt)))
+ ;; LDR immediate, SIMD&FP (Q), unsigned 16-byte offset (C7.2.176)
+ ;; STR immediate, SIMD&FP (Q), unsigned 16-byte offset (C7.2.315)
+ ((Q (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ (&U (* 16 (? offset unsigned-12)))))
+ (BITS (2 #b00) ;`size'
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b01)
+ (1 0) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (12 offset)
+ (5 Rn)
+ (5 Vt)))
+\f
+ ;; LDR register, SIMD&FP, no extend (C7.2.178)
+ ;; STR register, SIMD&FP, no extend (C7.3.316)
+ (((? sz:opchi load/store-simd/fp-size)
+ (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ (? Rm register-31=z)))
+ (BITS (2 (fix:lsh sz:opchi -1))
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b00)
+ (1 (fix:and 1 sz:opchi)) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (1 1)
+ (5 Rm)
+ (3 #b011) ;option=LSL
+ (1 0) ;shift=0
+ (2 #b10)
+ (5 Rn)
+ (5 Vt)))
+ ;; LDR register, SIMD&FP (B), (C7.2.178)
+ ;; STR register, SIMD&FP (B), (C7.2.316)
+ ((B (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ ((? option load/store-extend-type)
+ (? Rm register-31=z)
+ (? S load/store8-extend-amount))))
+ (BITS (2 #b00) ;size=B
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b00)
+ (1 0) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (1 1)
+ (5 Rm)
+ (3 option)
+ (1 S)
+ (2 #b10)
+ (5 Rn)
+ (5 Vt)))
+ ;; LDR register, SIMD&FP (H), (C7.2.178)
+ ;; STR register, SIMD&FP (H), (C7.2.316)
+ ((H (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ ((? option load/store-extend-type)
+ (? Rm register-31=z)
+ (? S load/store16-extend-amount))))
+ (BITS (2 #b01) ;size=H
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b00)
+ (1 0) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (1 1)
+ (5 Rm)
+ (3 option)
+ (1 S)
+ (2 #b10)
+ (5 Rn)
+ (5 Vt)))
+ ;; LDR register, SIMD&FP (S), (C7.2.178)
+ ;; STR register, SIMD&FP (S), (C7.2.316)
+ ((S (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ ((? option load/store-extend-type)
+ (? Rm register-31=z)
+ (? S load/store32-extend-amount))))
+ (BITS (2 #b10) ;size=H
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b00)
+ (1 0) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (1 1)
+ (5 Rm)
+ (3 option)
+ (1 S)
+ (2 #b10)
+ (5 Rn)
+ (5 Vt)))
+\f
+ ;; LDR register, SIMD&FP (D), (C7.2.178)
+ ;; STR register, SIMD&FP (D), (C7.2.316)
+ ((D (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ ((? option load/store-extend-type)
+ (? Rm register-31=z)
+ (? S load/store64-extend-amount))))
+ (BITS (2 #b11) ;size=D
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b00)
+ (1 0) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (1 1)
+ (5 Rm)
+ (3 option)
+ (1 S)
+ (2 #b10)
+ (5 Rn)
+ (5 Vt)))
+ ;; LDR register, SIMD&FP (Q), (C7.2.178)
+ ;; STR register, SIMD&FP (Q), (C7.2.316)
+ ((Q (? Vt vregister)
+ (+ (? Rn register-31=sp)
+ ((? option load/store-extend-type)
+ (? Rm register-31=z)
+ (? S load/store128-extend-amount))))
+ (BITS (2 #b00) ;size=Q
+ (3 #b111)
+ (1 1) ;SIMD/FP
+ (2 #b00)
+ (1 1) ;opc[1]
+ (1 ,load/store) ;opc[0]
+ (1 1)
+ (5 Rm)
+ (3 option)
+ (1 S)
+ (2 #b10)
+ (5 Rn)
+ (5 Vt)))
+ ,@extra))))))
+ ;; The ARM assembler has `LDRB w13,...' for byte-sized load into
+ ;; general register 13, and `LDR b13,...' for byte-sized load into
+ ;; vector register 13. We use a separate mnemonic for general
+ ;; registers and vector registers.
+ (define-simd/fp-load/store-instruction STR.V 0)
+ (define-simd/fp-load/store-instruction LDR.V 1
+ ;; LDR PC-relative literal, SIMD&FP (C7.2.177)
+ (((? opc ldr-literal-simd/fp-size)
+ (? Vt vregister)
+ (@PCO (? offset signed-19)))
+ (BITS (2 opc)
+ (3 #b011)
+ (1 1) ;SIMD/FP
+ (2 #b00)
+ (19 offset SIGNED)
+ (5 Vt)))
+ (((? size) (? Vt) (@PCR (? label) (? temp register<31)))
+ (VARIABLE-WIDTH offset `(/ (- ,label *PC*) 4)
+ ((#x-40000 #x3ffff)
+ (MACRO 32 (LDR.V ,size ,Vt (@PCO (* 4 ,offset)))))
+ ((#x-100000000 #xffffffff)
+ (MACRO 64 (ADRP-ADD X ,temp (@PCO ,(* 4 offset)))) ;(*)
+ (MACRO 32 (LDR.V X ,Vt ,temp)))))))
+\f
+(let-syntax
+ ((define-aes-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment
+ (receive (mnemonic opcode) (apply values (cdr form))
+ `(define-instruction ,mnemonic
+ (((? Rd vregister) (? Rn vregister))
+ (BITS (8 #b01001110)
+ (2 #b00) ;size
+ (5 #b10100)
+ (5 ,opcode)
+ (2 #b10)
+ (5 Rn)
+ (5 Rd)))))))))
+ (define-aes-instruction AESE #b00100)
+ (define-aes-instruction AESD #b00101)
+ (define-aes-instruction AESMC #b00110)
+ (define-aes-instruction AESIMC #b00111))
+
+(let-syntax
+ ((define-sha-3reg-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment
+ (receive (mnemonic opcode) (apply values (cdr form))
+ `(define-instruction ,mnemonic
+ (((? Rd vregister) (? Rn vregister) (? Rm vregister))
+ (BITS (8 #b01011110)
+ (2 #b00) ;size
+ (1 0)
+ (5 Rm)
+ (1 0)
+ (3 ,opcode)
+ (2 #b00)
+ (5 Rn)
+ (5 Rd)))))))))
+ (define-sha-3reg-instruction SHA1C #b000)
+ (define-sha-3reg-instruction SHA1P #b001)
+ (define-sha-3reg-instruction SHA1M #b010)
+ (define-sha-3reg-instruction SHA1SU0 #b011)
+ (define-sha-3reg-instruction SHA256H #b100)
+ (define-sha-3reg-instruction SHA256H2 #b101)
+ (define-sha-3reg-instruction SHA256SU1 #b110))
+
+(let-syntax
+ ((define-sha-2reg-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment
+ (receive (mnemonic opcode) (apply values (cdr form))
+ `(define-instruction ,mnemonic
+ (((? Rd vregister) (? Rn vregister))
+ (BITS (8 #b01011110)
+ (2 #b00) ;size
+ (5 #b10100)
+ (5 ,opcode)
+ (2 #b10)
+ (5 Rn)
+ (5 Rd)))))))))
+ (define-sha-2reg-instruction SHA1H #b00000)
+ (define-sha-2reg-instruction SHA1SU1 #b00001)
+ (define-sha-2reg-instruction SHA256SU0 #b00010))
+
+(let-syntax
+ ((define-fp-unary-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment
+ (receive (mnemonic opcode-s U a opcode-v) (apply values (cdr form))
+ `(define-instruction ,mnemonic
+ ;; scalar
+ (((? type fp-scalar-size) (? Rd vregister) (? Rn vregister))
+ (BITS (1 0) ;M=0
+ (1 0) ;?
+ (1 0) ;S=0
+ (5 #b11110)
+ (2 type)
+ (1 1)
+ (6 ,opcode-s)
+ (5 #b10000)
+ (5 Rn)
+ (5 Rd)))
+ ;; vector
+ (((? sz:Q fp-vector-size)
+ (? Rd vregister)
+ (? Rn vregister))
+ (BITS (1 0) ;M=0
+ (1 (fix:and sz:Q 1)) ;Q
+ (1 ,U)
+ (5 #b01110)
+ (1 ,a)
+ (6 (fix:lsh sz:Q -1)) ;sz
+ (5 ,opcode-v)
+ (2 #b10)
+ (5 Rn)
+ (5 Rd)))))))))
+ ;; arithmetic
+ (define-fp-unary-instruction FABS #b000001 0 1 #b01111)
+ (define-fp-unary-instruction FNEG #b000010 1 1 #b01111)
+ (define-fp-unary-instruction FSQRT #b000011 1 1 #b11111)
+ ;; rounding
+ (define-fp-unary-instruction FRINTN #b001000 0 0 #b11000)
+ (define-fp-unary-instruction FRINTP #b001001 0 1 #b11000)
+ (define-fp-unary-instruction FRINTM #b001010 0 0 #b11001)
+ (define-fp-unary-instruction FRINTZ #b001011 0 1 #b11001)
+ (define-fp-unary-instruction FRINTA #b001100 1 0 #b11000)
+ (define-fp-unary-instruction FRINTX #b001110 1 0 #b11001)
+ (define-fp-unary-instruction FRINTI #b001111 1 1 #b11001))
+
+(let-syntax
+ ((define-fp-binary-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment
+ (receive (mnemonic opcode-s U a S opcode-v) (apply values (cdr form))
+ `(define-instruction ,mnemonic
+ (((? type fp-scalar-size)
+ (? Rd vregister)
+ (? Rn vregister)
+ (? Rm vregister))
+ (BITS (1 0) ;M
+ (1 0)
+ (1 0) ;S
+ (5 #b11110)
+ (2 type)
+ (1 1)
+ (5 Rm)
+ (4 ,opcode-s)
+ (2 #b10)
+ (5 Rn)
+ (5 Rd)))
+ (((? Q fp16-vector-size)
+ (? Rd vregister)
+ (? Rn vregister)
+ (? Rm vregister))
+ (BITS (1 0)
+ (1 Q)
+ (1 ,U)
+ (5 #b01110)
+ (1 ,a)
+ (1 0)
+ (1 1)
+ (5 Rm)
+ (2 #b00)
+ (4 ,opcode-v)
+ (5 Rn)
+ (5 Rd)))
+ (((? sz:Q fp32/64-vector-size)
+ (? Rd vregister)
+ (? Rn vregister)
+ (? Rm vregister))
+ (BITS (1 0)
+ (1 (fix:and sz:Q 1)) ;Q
+ (1 ,U)
+ (5 #b01110)
+ (1 ,S)
+ (1 (fix:lsh sz:Q -1)) ;sz
+ (1 1)
+ (5 Rm)
+ (2 #b11)
+ (4 ,opcode-v)
+ (5 Rn)
+ (5 Rd)))))))))
+ (define-fp-binary-instruction FMUL #b0000 1 0 0 #b011)
+ (define-fp-binary-instruction FDIV #b0001 1 0 0 #b111)
+ (define-fp-binary-instruction FADD #b0010 0 0 0 #b010)
+ (define-fp-binary-instruction FSUB #b0011 0 1 1 #b010)
+ (define-fp-binary-instruction FMAX #b0100 0 0 0 #b110)
+ (define-fp-binary-instruction FMIN #b0101 0 1 1 #b110)
+ (define-fp-binary-instruction FMAXNM #b0110 0 0 0 #b000)
+ (define-fp-binary-instruction FMINNM #b0111 0 1 1 #b000))
+
+(let-syntax
+ ((define-fp-compare-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment
+ (receive (mnemonic opc-hi) (apply values (cdr form))
+ `(define-instruction ,mnemonic
+ (((? type fp-scalar-size) (? Rn vregister) Z)
+ (BITS (1 0) ;M
+ (1 0)
+ (1 0) ;S
+ (5 #b11110)
+ (2 type)
+ (1 1)
+ (5 #b0000) ;Rm = #0.0
+ (2 #b00)
+ (4 #b1000)
+ (5 Rn)
+ (1 ,opc-hi)
+ (1 1) ;zero variant
+ (3 #b000)))
+ (((? type fp-scalar-size) (? Rn vregister) (? Rm vregister))
+ (BITS (1 0) ;M
+ (1 0)
+ (1 0) ;S
+ (5 #b11110)
+ (2 type)
+ (1 1)
+ (5 Rm)
+ (2 0)
+ (4 #b1000)
+ (5 Rn)
+ (1 ,opc-hi)
+ (1 0) ;register variant
+ (3 #b000)))))))))
+ ;; quiet compare
+ (define-fp-compare-instruction FCMP 0)
+ ;; compare and raise exceptions
+ (define-fp-compare-instruction FCMPE 1))
+
+(let-syntax
+ ((define-fp-conditional-compare-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment
+ (receive (mnemonic op) (apply values (cdr form))
+ `(define-instruction ,mnemonic
+ (((? type fp-scalar-size)
+ (? Rn vregister)
+ (? Rm vregister)
+ (&U (? nzcv nzcv-value))
+ (? cc branch-condition))
+ (BITS (1 0) ;M
+ (1 0)
+ (1 0) ;S
+ (5 #b11110)
+ (2 type)
+ (1 1)
+ (5 Rm)
+ (4 cc)
+ (2 #b01)
+ (5 Rn)
+ (1 ,op)
+ (4 nzcv)))))))))
+ ;; quiet compare
+ (define-fp-conditional-compare-instruction FCCMP 0)
+ ;; compare and raise exceptions
+ (define-fp-conditional-compare-instruction FCCMPE 1))
+
+(let-syntax
+ ((define-simd-byte-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment
+ (receive (mnemonic U opc2 opcode) (apply values (cdr form))
+ `(define-instruction ,mnemonic
+ (((? Q simd-byte-vector-size)
+ (? Rd vregister)
+ (? Rn vregister)
+ (? Rm vregister))
+ (BITS (1 0)
+ (1 Q)
+ (1 ,U)
+ (5 #b01110)
+ (2 ,opc2)
+ (1 1)
+ (5 Rm)
+ (5 ,opcode)
+ (1 1)
+ (5 Rn)
+ (5 Rd)))))))))
+ (define-simd-byte-instruction BSL 1 #b01 #b00011)
+ (define-simd-byte-instruction BIT 1 #b10 #b00011))
+
+(define-instruction FMOV
+ ;; vector, immediate (C7.2.122)
+ (((? U:op:Q fmov-vector-immediate-size)
+ (? Rd vregister)
+ (&U (? abc fmov-abc) (? defgh fmov-defgh)))
+ (BITS (1 0)
+ (1 (fix:and 1 U:op:Q))
+ (1 (fix:and 1 (fix:lsh U:op:Q -2)))
+ (5 #b01111)
+ (5 #b00000)
+ (3 abc)
+ (4 #b1111)
+ (1 (fix:and 1 (fix:lsh U:op:Q -1)))
+ (1 1)
+ (5 defgh)
+ (5 Rd)))
+ (((? U:op:Q fmov-vector-immediate-size)
+ (? Rd vregister)
+ (&. (? abcdefgh fmov-abcdefgh)))
+ (BITS (1 0)
+ (1 (fix:and 1 U:op:Q))
+ (1 (fix:and 1 (fix:lsh U:op:Q -2)))
+ (5 #b01111)
+ (5 #b00000)
+ (3 (fix:lsh abcdefgh -5))
+ (4 #b1111)
+ (1 (fix:and 1 (fix:lsh U:op:Q -1)))
+ (1 1)
+ (5 (fix:and #b11111 abcdefgh))
+ (5 Rd)))
+ ;; register (C7.2.123)
+ (((? type fp-scalar-size) (? Rd vregister) (? Rn vregister))
+ (BITS (1 0)
+ (1 0)
+ (1 0)
+ (5 #b11110)
+ (2 type)
+ (1 1)
+ (4 #b0000)
+ (2 #b00) ;opc
+ (5 #b10000)
+ (5 Rn)
+ (5 Rd)))
+ ;; general (C7.2.124)
+ (((? sf sf-size) (? Rd register-31=z)
+ (? rmode:type fmov-general-size) (? Rn vregister))
+ (BITS (1 sf)
+ (1 0)
+ (1 0)
+ (5 #b11110)
+ (2 (fix:and #b11 rmode:type))
+ (1 1)
+ (2 (fix:lsh rmode:type -2))
+ (3 #b110) ;opcode
+ (6 #b000000)
+ (5 Rn)
+ (5 Rd)))
+ (((? rmode:type fmov-general-size) (? Rd vregister)
+ (? sf sf-size) (? Rn register-31=z))
+ (BITS (1 sf)
+ (1 0)
+ (1 0)
+ (5 #b11110)
+ (2 (fix:and #b11 rmode:type))
+ (1 1)
+ (2 (fix:lsh rmode:type -2))
+ (3 #b111) ;opcode
+ (6 #b000000)
+ (5 Rn)
+ (5 Rd)))
+ ;; scalar, immediate (C7.2.125)
+ (((? type fp-scalar-size) (? Rd vregister) (&U (? imm8 unsigned-8)))
+ (BITS (1 0)
+ (1 0)
+ (1 0)
+ (5 #b11110)
+ (2 type)
+ (1 1)
+ (8 imm8)
+ (3 #b100)
+ (5 #b00000)
+ (5 Rd)))
+ (((? type fp-scalar-size) (? Rd vregister) (&. (? imm8 fp-binary8)))
+ (BITS (1 0)
+ (1 0)
+ (1 0)
+ (5 #b11110)
+ (2 type)
+ (1 1)
+ (8 imm8)
+ (3 #b100)
+ (5 #b00000)
+ (5 Rd))))
(<= 0 x #x7f)
x))
+(define (unsigned-8 x)
+ (and (exact-nonnegative-integer? x)
+ (<= 0 x #xff)
+ x))
+
(define (unsigned-12 x)
(and (exact-nonnegative-integer? x)
(<= 0 x #xfff)
((NV) 'AL)
(else #f)))
+(define (nzcv-value x)
+ (and (exact-integer? x)
+ (<= 0 x #xf)
+ x))
+
(define (sf-size size)
(case size
((W) 0)
((X) #b01)
(else #f)))
+(define (ldr-literal-simd/fp-size t)
+ (case t
+ ((S) #b00)
+ ((D) #b01)
+ ((Q) #b10)
+ (else #f)))
+
(define (load/store-extend-type t)
(case t
((UTXW) #b010)
((H) #b01)
((W) #b10)
(else #f)))
+
+(define (fp-scalar-size t)
+ (case t
+ ((H) #b11)
+ ((S) #b00)
+ ((D) #b01)
+ (else #f)))
+
+(define (fp-vector-size t)
+ ;; Low bit is Q, next five bits are opcode, high bit is sz.
+ (case t
+ ((H4) #b1111000)
+ ((H8) #b1111001)
+ ((S2) #b0100000)
+ ((S4) #b0100001)
+ ((D2) #b1100001)
+ (else #f)))
+
+(define (fp16-vector-size t)
+ ;; Q
+ (case t
+ ((H4) 0)
+ ((H8) 1)
+ (else #f)))
+
+(define (fp32/64-vector-size t)
+ (case t
+ ((S2) #b00)
+ ((S4) #b01)
+ ((D2) #b11)
+ (else #f)))
+
+(define (simd-byte-vector-size t)
+ ;; Q
+ (case t
+ ((B8) 0)
+ ((B16) 1)
+ (else #f)))
+
+(define (simd-integer-vector-size t)
+ ;; Low bit is Q, high two bits are sz.
+ (case t
+ ((B8) #b000)
+ ((B16) #b001)
+ ((H4) #b010)
+ ((H8) #b011)
+ ((S2) #b100)
+ ((S4) #b101)
+ (else #f)))
+
+(define (simd-double-integer-vector-size t)
+ ;; Low bit is Q, high two bits are sz.
+ (case t
+ ((B8) #b000)
+ ((B16) #b001)
+ ((H4) #b010)
+ ((H8) #b011)
+ ((S2) #b100)
+ ((S4) #b101)
+ ((D2) #b111)
+ (else #f)))
+
+(define (fmov-vector-immediate-size t)
+ ;; #b{U}{op}{Q}
+ (case t
+ ((H4) #b100)
+ ((H8) #b101)
+ ((S2) #b000)
+ ((S4) #b001)
+ ((D2) #b010)
+ (else #f)))
+
+(define (fmov-general-size t)
+ ;; Low two bits are type; high two bits are rmode.
+ (case t
+ ((H) #b0011)
+ ((S) #b0000)
+ ((D) #b0001)
+ ((D1) #b0110)
+ (else #f)))
+
+(define (fmov-abc x)
+ (and (exact-nonnegative-integer? x)
+ (<= 0 x #b111)
+ x))
+
+(define (fmov-defgh x)
+ (and (exact-nonnegative-integer? x)
+ (<= 0 x #b11111)
+ x))
+
+;; XXX fixed-point
+(define (fmov-abcdefgh x)
+ x
+ #f)
+
+;; XXX 8-bit floating-point
+(define (fp-binary8 x)
+ x
+ #f)
((register-value-class=word? register) 'GENERAL)
((register-value-class=float? register) 'FLOAT)
(else (error "Unknown register type:" register))))
+
+;;; Convert between RTL repersentation of machine (not pseudo)
+;;; registers, and LAP syntax for register numbers.
+
+(define (float-register->fpr register)
+ (assert (<= v0 register))
+ (assert (<= register v31))
+ (- register 32))
+
+(define (fpr->float-register fpr)
+ (+ fpr 32))
\f
;;; References, for machine register allocator. Not used by LAP
;;; syntax.
(LAP (ADD X ,target ,source (&U 0)))
(LAP (ORR X ,target Z ,source))))
((FLOAT)
- (LAP (FMOV D ,target ,source)))
+ (let ((source (float-register->fpr source))
+ (target (float-register->fpr target)))
+ (LAP (FMOV D ,target ,source))))
(else
(error "Unknown register type:" source target)))))
(define (load-register register ea)
(case (register-type register)
((GENERAL) (LAP (LDR X ,register ,ea)))
- ((FLOAT) (LAP (LDR D ,register ,ea)))
+ ((FLOAT) (LAP (LDR D ,(float-register->fpr register) ,ea)))
(else (error "Unknown register type:" register))))
(define (store-register register ea)
(case (register-type register)
((GENERAL) (LAP (STR X ,register ,ea)))
- ((FLOAT) (LAP (STR D ,register ,ea)))
+ ((FLOAT) (LAP (STR D ,(float-register->fpr register) ,ea)))
(else (error "Unknown register type:" register))))
\f
;;; Utilities
(LAP ,@(load-pc-relative-address target label)
(LDR X ,target ,target)))
+(define (load-pc-relative-float target temp label)
+ (LAP ,@(load-pc-relative-address temp label)
+ (LDR.V D ,target ,temp)))
+
(define (load-tagged-immediate target type datum)
(load-unsigned-immediate target (make-non-pointer-literal type datum)))
(define (object->address target source)
(object->datum target source))
\f
+;;;; Data labels
+
+(define (allocate-data-label datum block-name offset alignment data)
+ (let* ((block
+ (or (find-extra-code-block block-name)
+ (let ((block
+ (declare-extra-code-block! block-name 'ANYWHERE '())))
+ (add-extra-code!
+ block
+ (LAP (PADDING ,offset ,alignment ,padding-string)))
+ block)))
+ (pairs (extra-code-block/xtra block))
+ (place (assoc datum pairs)))
+ (if place
+ (cdr place)
+ (let ((label (generate-label block-name)))
+ (set-extra-code-block/xtra!
+ block
+ (cons (cons datum label) pairs))
+ (add-extra-code! block (LAP (LABEL ,label) ,@data))
+ label))))
+\f
;;;; Linearizer interface
(define (lap:make-label-statement label)
;; XXX doesn't belong here
(define-integrable type-code:fixnum #x1a)
+(define-integrable type-code:flonum #x06)
(define-integrable type-code:manifest-closure #x0d)
(define-integrable type-code:manifest-vector #x00)
(object-datum object))
\f
(define compiler:open-code-floating-point-arithmetic?
- ;; XXX not yet
#f)
(define compiler:primitives-with-no-open-coding
DIVIDE-FIXNUM ;nobody open-codes this
FIXNUM-LSH ;open-coding not useful without constant operands
FLOATING-VECTOR-CONS;nobody open-codes this
- FLOATING-VECTOR-REF ;no flonum arithmetic yet
- FLOATING-VECTOR-SET!;no flonum arithmetic yet
- FLONUM-ABS ;no flonum arithmetic yet
FLONUM-ACOS ;not useful to open-code hairy math
- FLONUM-ADD ;no flonum arithmetic yet
FLONUM-ASIN ;not useful to open-code hairy math
FLONUM-ATAN ;not useful to open-code hairy math
FLONUM-ATAN2 ;not useful to open-code hairy math
- FLONUM-CEILING ;no flonum arithmetic yet
FLONUM-COS ;not useful to open-code hairy math
- FLONUM-DIVIDE ;no flonum arithmetic yet
FLONUM-EXP ;not useful to open-code hairy math
FLONUM-EXPM1 ;not useful to open-code hairy math
- FLONUM-FLOOR ;no flonum arithmetic yet
FLONUM-LOG ;not useful to open-code hairy math
FLONUM-LOG1P ;not useful to open-code hairy math
- FLONUM-MULTIPLY ;no flonum arithmetic yet
- FLONUM-NEGATE ;no flonum arithmetic yet
- FLONUM-ROUND ;no flonum arithmetic yet
FLONUM-SIN ;not useful to open-code hairy math
- FLONUM-SQRT ;no flonum arithmetic yet
- FLONUM-SUBTRACT ;no flonum arithmetic yet
FLONUM-TAN ;not useful to open-code hairy math
- FLONUM-TRUNCATE ;no flonum arithmetic yet
GCD-FIXNUM ;nobody open-codes this
VECTOR-CONS ;nobody open-codes this
))
(lambda (label) (LAP (CBZ X ,source (@PCR ,label ,regnum:scratch-0))))
(lambda (label) (LAP (CBNZ X ,source (@PCR ,label ,regnum:scratch-0))))))
+(define (set-test-bit-set-branches! source bit)
+ (set-current-branches!
+ (lambda (label)
+ (LAP (TBNZ X ,source (&U ,bit) (@PCR ,label ,regnum:scratch-0))))
+ (lambda (label)
+ (LAP (TBZ X ,source (&U ,bit) (@PCR ,label ,regnum:scratch-0))))))
+
+(define (set-test-bit-clear-branches! source bit)
+ (set-current-branches!
+ (lambda (label)
+ (LAP (TBZ X ,source (&U ,bit) (@PCR ,label ,regnum:scratch-0))))
+ (lambda (label)
+ (LAP (TBNZ X ,source (&U ,bit) (@PCR ,label ,regnum:scratch-0))))))
+
(define (set-condition-branches! cc ~cc)
(set-current-branches!
(lambda (label) (LAP (B. ,cc (@PCR ,label ,regnum:scratch-0))))
(declare (usual-integrations))
\f
-;;; XXX not yet
+;; register: RTL representation of machine (not pseudo) register
+;; fpr: LAP representation of SIMD/FP register
+;;
+;; (not needed for general because the numbers coincide)
-#f
+(define (float-source-register! register)
+ (assert (eq? 'FLOAT (register-type register)))
+ (load-alias-register! register 'FLOAT))
+
+(define (float-source-fpr! register)
+ (float-register->fpr (float-source-register! register)))
+
+(define (float-move-to-temporary-register! register)
+ (assert (eq? 'FLOAT (register-type register)))
+ (move-to-temporary-register! register 'FLOAT))
+
+(define (float-move-to-temporary-fpr! register)
+ (float-register->fpr (float-move-to-temporary-register! register)))
+
+(define (float-target-register! register)
+ (assert (eq? 'FLOAT (register-type register)))
+ (delete-dead-registers!)
+ (allocate-alias-register! register 'FLOAT))
+
+(define (float-target-fpr! register)
+ (float-register->fpr (float-target-register! register)))
+
+(define (float-temporary-register!)
+ (allocate-temporary-register! 'FLOAT))
+
+(define (float-temporary-fpr!)
+ (float-register->fpr (float-temporary-register!)))
+
+(define (allocate-binary64-label bit-string)
+ (allocate-data-label bit-string 'BINARY64 0 8
+ (LAP (DATA 64 U ,(bit-string->unsigned-integer bit-string)))))
+
+(define binary64-bits:+inf
+ (unsigned-integer->bit-string 64 #x7ff0000000000000))
+
+(define binary64-bits:smallest-normal
+ (unsigned-integer->bit-string 64 #x0010000000000000))
+
+;; Layout:
+;;
+;; 0 manifest-nm-vector [length = 1 word]
+;; 8 double (binary64) float
+;; 16
+\f
+;;;; Flonum Loads & Stores
+
+;; Float load/store.
+
+(define-rule statement
+ (ASSIGN (REGISTER (? target)) (OBJECT->FLOAT (REGISTER (? source))))
+ (let* ((source (standard-source! source))
+ (target (float-target-fpr! target))
+ (temp regnum:scratch-0))
+ (LAP ,@(object->address temp source)
+ (LDR.V D ,target (+ ,temp (&U (* 8 1)))))))
+
+(define-rule statement
+ (ASSIGN (REGISTER (? target)) (FLOAT->OBJECT (REGISTER (? source))))
+ (let* ((source (float-source-fpr! source))
+ (target (standard-target! target))
+ (temp regnum:scratch-0)
+ (Free regnum:free-pointer))
+ (LAP
+ ;; target := Free
+ ,@(register->register-transfer Free target)
+ ;; temp := manifest-nm-vector, length = 1 word
+ ,@(load-tagged-immediate temp (ucode-type MANIFEST-NM-VECTOR) 1)
+ ;; *Free++ := temp
+ (STR X ,temp (POST+ ,Free (& 8)))
+ ;; *Free++ := source
+ (STR.V D ,source (POST+ ,Free (& 8)))
+ ;; Tag it.
+ ,@(affix-type target type-code:flonum target (lambda () temp)))))
+
+;; Constant-offset floating vector load/store.
+
+(define-rule statement
+ (ASSIGN (REGISTER (? target))
+ (FLOAT-OFFSET (REGISTER (? base))
+ (MACHINE-CONSTANT (? offset))))
+ (QUALIFIER (fits-in-unsigned-12? offset))
+ (let* ((base (standard-source! base))
+ (target (float-target-fpr! target)))
+ (LAP (LDR.V D ,target (+ ,base (&U (* 8 ,offset)))))))
+
+(define-rule statement
+ (ASSIGN (FLOAT-OFFSET (REGISTER (? base))
+ (MACHINE-CONSTANT (? offset)))
+ (REGISTER (? source)))
+ (QUALIFIER (fits-in-unsigned-12? offset))
+ (let* ((base (standard-source! base))
+ (source (float-source-fpr! source)))
+ (LAP (STR.V D ,source (+ ,base (&U (* 8 ,offset)))))))
+
+;; Variable-offset floating vector load/store.
+
+(define-rule statement
+ (ASSIGN (REGISTER (? target))
+ (FLOAT-OFFSET (REGISTER (? base))
+ (REGISTER (? offset))))
+ (let* ((base (standard-source! base))
+ (offset (standard-source! offset))
+ (target (float-target-fpr! target)))
+ (LAP (LDR.V D ,target (+ ,base (LSL ,offset 3))))))
+
+(define-rule statement
+ (ASSIGN (FLOAT-OFFSET (REGISTER (? base))
+ (REGISTER (? offset)))
+ (REGISTER (? source)))
+ (let* ((base (standard-source! base))
+ (offset (standard-source! offset))
+ (source (float-source-fpr! source)))
+ (LAP (STR.V D ,source (+ ,base (LSL ,offset 3))))))
+\f
+;;;; Flonum Arithmetic -- unary operations
+
+(define-rule statement
+ (ASSIGN (REGISTER (? target))
+ (FLONUM-1-ARG (? operator) (REGISTER (? source)) (? overflow?)))
+ overflow? ;ignore
+ (let* ((source (float-source-fpr! source))
+ (target (float-target-fpr! target)))
+ ((flonum-1-arg/operator operator) target source)))
+
+(define-integrable (flonum-1-arg/operator operation)
+ (lookup-arithmetic-method operation flonum-methods/1-arg))
+
+(define flonum-methods/1-arg
+ (list 'FLONUM-METHODS/1-ARG))
+
+(define-arithmetic-method 'FLONUM-ABS flonum-methods/1-arg
+ (lambda (target source)
+ (LAP (FABS D ,target ,source))))
+
+(define-arithmetic-method 'FLONUM-NEGATE flonum-methods/1-arg
+ (lambda (target source)
+ (LAP (FNEG D ,target ,source))))
+
+(define-arithmetic-method 'FLONUM-SQRT flonum-methods/1-arg
+ (lambda (target source)
+ (LAP (FSQRT D ,target ,source))))
+
+(define-arithmetic-method 'FLONUM-CEILING flonum-methods/1-arg
+ (lambda (target source)
+ ;; round toward Plus infinity
+ (LAP (FRINTP D ,target ,source))))
+
+(define-arithmetic-method 'FLONUM-FLOOR flonum-methods/1-arg
+ (lambda (target source)
+ ;; round toward Minus infinity
+ (LAP (FRINTM D ,target ,source))))
+
+(define-arithmetic-method 'FLONUM-ROUND flonum-methods/1-arg
+ (lambda (target source)
+ ;; round to Nearest (ties to even)
+ (LAP (FRINTN D ,target ,source))))
+
+(define-arithmetic-method 'FLONUM-TRUNCATE flonum-methods/1-arg
+ (lambda (target source)
+ ;; round toward Zero
+ (LAP (FRINTZ D ,target ,source))))
+\f
+;;;; Flonum Arithmetic -- binary operations
+
+(define-rule statement
+ (ASSIGN (REGISTER (? target))
+ (FLONUM-2-ARGS (? operator)
+ (REGISTER (? source1))
+ (REGISTER (? source2))
+ (? overflow?)))
+ overflow? ;ignore
+ ((flonum-2-args/operator operator) target source1 source2))
+
+(define flonum-methods/2-args
+ (list 'FLONUM-METHODS/2-ARGS))
+
+(define-integrable (flonum-2-args/operator operator)
+ (lookup-arithmetic-method operator flonum-methods/2-args))
+
+(define ((flonum-2-args/standard operate) target source1 source2)
+ (let* ((source1 (float-source-fpr! source1))
+ (source2 (float-source-fpr! source2))
+ (target (float-target-fpr! target)))
+ (operate target source1 source2)))
+
+(define ((flonum-2-args/temporary operate) target source1 source2)
+ ;; Allocate a temporary register that we can write to without
+ ;; destroying source1 and source2; then assign it as an alias for the
+ ;; target at the end.
+ (let* ((source1 (float-source-fpr! source1))
+ (source2 (float-source-fpr! source2))
+ (temp-register (float-temporary-register!)))
+ (delete-dead-registers!)
+ (rtl-target:=machine-register! target temp-register)
+ (operate (float-register->fpr temp-register) source1 source2)))
+
+(define-arithmetic-method 'FLONUM-ADD flonum-methods/2-args
+ (flonum-2-args/standard
+ (lambda (target source1 source2)
+ (LAP (FADD D ,target ,source1 ,source2)))))
+
+(define-arithmetic-method 'FLONUM-SUBTRACT flonum-methods/2-args
+ (flonum-2-args/standard
+ (lambda (target source1 source2)
+ (LAP (FSUB D ,target ,source1 ,source2)))))
+
+(define-arithmetic-method 'FLONUM-MULTIPLY flonum-methods/2-args
+ (flonum-2-args/standard
+ (lambda (target source1 source2)
+ (LAP (FMUL D ,target ,source1 ,source2)))))
+
+(define-arithmetic-method 'FLONUM-DIVIDE flonum-methods/2-args
+ (flonum-2-args/standard
+ (lambda (target source1 source2)
+ (LAP (FDIV D ,target ,source1 ,source2)))))
+
+(define-arithmetic-method 'FLONUM-COPYSIGN flonum-methods/2-args
+ (flonum-2-args/temporary
+ (lambda (target source1 source2)
+ (let ((tempx (general-temporary!)))
+ ;; XXX do this without a temporary general register -- MOVI?
+ (LAP (MOVZ X ,tempx (LSL (&U #x8000) 48))
+ (FMOV D ,target X ,tempx)
+ ;; target[i] := source2[i] if signbit[i] else source1[i]
+ (BSL B8 ,target ,source2 ,source1))))))
+\f
+;;;; Flonum Predicates
+
+(define-rule predicate
+ (FLONUM-PRED-1-ARG (? predicate) (REGISTER (? source)))
+ (let* ((predicate (flonum-pred-1->2-args predicate))
+ (source1 (float-source-fpr! source))
+ (source2 'Z))
+ (float-branch! predicate)
+ (float-comparison predicate source1 source2)))
+
+(define (flonum-pred-1->2-args predicate)
+ (case predicate
+ ((FLONUM-NEGATIVE?) 'FLONUM-LESS?)
+ ((FLONUM-IS-ZERO?) 'FLONUM-IS-EQUAL?)
+ ((FLONUM-ZERO?) 'FLONUM-EQUAL?)
+ ((FLONUM-POSITIVE?) 'FLONUM-POSITIVE?)
+ (else (error "Invalid flonum-pred-1-arg:" predicate))))
+
+(define-rule predicate
+ (FLONUM-PRED-1-ARG FLONUM-IS-NEGATIVE? (REGISTER (? source)))
+ (let* ((source (float-source-fpr! source))
+ ;; Avoid scratch-0, which may be used in the branches.
+ (temp regnum:scratch-1))
+ (set-test-bit-set-branches! temp 63)
+ (LAP (FMOV X ,temp D ,source))))
+
+(define-rule predicate
+ (FLONUM-PRED-1-ARG FLONUM-IS-NAN? (REGISTER (? source)))
+ (let ((source (float-source-fpr! source)))
+ (set-condition-branches! 'VS 'VC)
+ (LAP (FCMP D ,source ,source))))
+
+(define-rule predicate
+ (FLONUM-PRED-1-ARG FLONUM-IS-INFINITE? (REGISTER (? source)))
+ ;; Discriminate on |x| = +inf, versus |x| =/= +inf or unordered.
+ (set-condition-branches! 'EQ 'NE)
+ (let ((source (float-source-fpr! source)))
+ (delete-dead-registers!)
+ (let* ((temp-source (float-temporary-fpr!))
+ (temp-inf (float-temporary-fpr!))
+ (label (allocate-binary64-label binary64-bits:+inf)))
+ (LAP (FABS D ,temp-source ,source)
+ ,@(load-pc-relative-float temp-inf regnum:scratch-0 label)
+ (FCMP D ,temp-source ,temp-inf)))))
+\f
+(define-rule predicate
+ (FLONUM-PRED-1-ARG FLONUM-IS-FINITE? (REGISTER (? source)))
+ ;; Discriminate on |x| < +inf, versus |x| >= +inf or unordered.
+ (set-condition-branches! 'LO 'HS) ;LO = lt; HS = gt, eq, or un
+ (let ((source (float-source-fpr! source)))
+ (delete-dead-registers!)
+ (let* ((temp-source (float-temporary-fpr!))
+ (temp-inf (float-temporary-fpr!))
+ (label (allocate-binary64-label binary64-bits:+inf)))
+ (LAP (FABS D ,temp-source ,source)
+ ,@(load-pc-relative-float temp-inf regnum:scratch-0 label)
+ (FCMP D ,temp-source ,temp-inf)))))
+
+(define-rule predicate
+ (FLONUM-PRED-1-ARG FLONUM-IS-NORMAL? (REGISTER (? source)))
+ ;; Break it into two steps:
+ ;;
+ ;; 1. Is |x| < +inf, versus |x| >= +inf or unordered?
+ ;; 2. Is |x| >= smallest normal, versus |x| < smallest normal?
+ ;;
+ ;; The branches are:
+ ;;
+ ;; GE: gt or eq, when n = v
+ ;; LO: lt, when c = 0
+ ;;
+ (set-condition-branches! 'GE 'LO)
+ (let ((source (float-source-fpr! source)))
+ (delete-dead-registers!)
+ (let* ((temp-source (float-temporary-fpr!))
+ (temp-inf (float-temporary-fpr!))
+ (temp-norm (float-temporary-fpr!))
+ (label-inf (allocate-binary64-label binary64-bits:+inf))
+ (label-norm
+ (allocate-binary64-label binary64-bits:smallest-normal)))
+ ;; XXX Use LDP to load both registers in one go.
+ (LAP (FABS D ,temp-source ,source)
+ ,@(load-pc-relative-float temp-inf regnum:scratch-0 label-inf)
+ ,@(load-pc-relative-float temp-norm regnum:scratch-0 label-norm)
+ (FCMP D ,temp-source ,temp-inf)
+ ;; If |x| < +inf, compare source and norm; otherwise, if |x|
+ ;; >= +inf or |x| and +inf are unordered, set NCVB := 1000
+ ;; so that GE will be not-taken and LO will be taken.
+ (FCCMP D ,temp-source ,temp-norm (&U #b1000) LT)))))
+\f
+(define-rule predicate
+ (FLONUM-PRED-2-ARGS (? predicate)
+ (REGISTER (? source1))
+ (REGISTER (? source2)))
+ (let* ((source1 (float-source-fpr! source1))
+ (source2 (float-source-fpr! source2)))
+ (float-branch! predicate)
+ (float-comparison predicate source1 source2)))
+
+(define (float-branch! predicate)
+ (case predicate
+ ((FLONUM-EQUAL? FLONUM-IS-EQUAL?)
+ (set-equal-branches!))
+ ((FLONUM-LESS? FLONUM-IS-LESS?)
+ (set-condition-branches! 'LO 'HS)) ;LO = lt; HS = gt, eq, or un
+ ((FLONUM-GREATER? FLONUM-IS-GREATER?)
+ (set-condition-branches! 'GT 'LE)) ;LE = le, eq, or un
+ ((FLONUM-IS-LESS-OR-EQUAL?) ;XXX FLONUM-LESS-OR-EQUAL?
+ (set-condition-branches! 'LS 'HI)) ;LS = le or eq; HI = gt or un
+ ((FLONUM-IS-GREATER-OR-EQUAL?) ;XXX FLONUM-GREATER-OR-EQUAL?
+ (set-condition-branches! 'GE 'LT)) ;LT = lt or un
+ ((FLONUM-IS-LESS-OR-GREATER?) ;XXX FLONUM-LESS-OR-GREATER?
+ (set-current-branches!
+ (lambda (label) ;branch to label if less or greater
+ (let ((unordered (generate-label 'UNORDERED)))
+ (LAP
+ ;; branch if unordered (V=1)
+ (B. VS (@PCR ,unordered ,regnum:scratch-0))
+ ;; branch if less or greater
+ (B. NE (@PCR ,label ,regnum:scratch-0))
+ (LABEL ,unordered))))
+ (lambda (label) ;branch to label if NaN or equal
+ (LAP
+ ;; branch if unordered (V=1)
+ (B. VS (@PCR ,label ,regnum:scratch-0))
+ ;; branch if equal
+ (B. EQ (@PCR ,label ,regnum:scratch-0))))))
+ ((FLONUM-IS-UNORDERED?)
+ (set-condition-branches! 'VS 'VC))
+ (else
+ (error "Unknown float predicate:" predicate))))
+
+(define (float-comparison predicate source1 source2)
+ (case predicate
+ ((FLONUM-EQUAL? FLONUM-LESS? FLONUM-GREATER?)
+ ;; floating-point compare with exception
+ (LAP (FCMPE D ,source1 ,source2)))
+ ((FLONUM-IS-EQUAL?
+ FLONUM-IS-LESS?
+ FLONUM-IS-GREATER?
+ FLONUM-IS-LESS-OR-EQUAL?
+ FLONUM-IS-GREATER-OR-EQUAL?
+ FLONUM-IS-LESS-OR-GREATER?
+ FLONUM-IS-UNORDERED?)
+ ;; floating-point quiet compare
+ (LAP (FCMP D ,source1 ,source2)))))