(define compiler:primitives-with-no-open-coding
'(DIVIDE-FIXNUM GCD-FIXNUM &/
VECTOR-CONS STRING-ALLOCATE FLOATING-VECTOR-CONS
+ FLONUM-FMA
FLONUM-IS-LESS? FLONUM-IS-LESS-OR-EQUAL?
FLONUM-IS-GREATER? FLONUM-IS-GREATER-OR-EQUAL?
FLONUM-IS-LESS-OR-GREATER? FLONUM-IS-UNORDERED?
(define-fp-binary-instruction FMAXNM #b0110 0 0 0 #b000)
(define-fp-binary-instruction FMINNM #b0111 0 1 1 #b000))
+(let-syntax
+ ((define-fp-ternary-instruction
+ (sc-macro-transformer
+ (lambda (form environment)
+ environment
+ (receive (mnemonic o1 o0) (apply values (cdr form))
+ `(define-instruction ,mnemonic
+ (((? type fp-scalar-size)
+ (? Rd vregister)
+ (? Rn vregister)
+ (? Rm vregister)
+ (? Ra vregister))
+ (BITS (1 0)
+ (1 0)
+ (1 0)
+ (5 #b11111)
+ (2 type)
+ (1 ,o1)
+ (5 Rm)
+ (1 ,o0)
+ (5 Ra)
+ (5 Rn)
+ (5 Rd)))))))))
+ (define-fp-ternary-instruction FMADD 0 0)
+ (define-fp-ternary-instruction FMSUB 0 1)
+ (define-fp-ternary-instruction FNMADD 1 0)
+ (define-fp-ternary-instruction FNMSUB 1 1))
+
(let-syntax
((define-fp-compare-instruction
(sc-macro-transformer
;; target[i] := source2[i] if signbit[i] else source1[i]
(BSL B8 ,target ,source2 ,source1))))))
\f
+;;;; Fused multiply/add/negate
+
+(define-rule statement
+ (ASSIGN (REGISTER (? target))
+ (FLONUM-3-ARGS FLONUM-FMA
+ (REGISTER (? factor1))
+ (REGISTER (? factor2))
+ (REGISTER (? addend))
+ (? overflow?)))
+ overflow? ;ignore
+ ((flonum-3-args/standard
+ (lambda (target factor1 factor2 addend)
+ (LAP (FMADD D ,target ,factor1 ,factor2 ,addend))))
+ target factor1 factor2 addend))
+
+;;; XXX The following rules are busted because RTL instruction folding
+;;; (rcompr.scm) doesn't know how to search through flonum operations
+;;; or something -- and FIND-REFERENCE-INSTRUCTION is too mysterious
+;;; for me to understand at this hour!
+
+;;; XXX What about (fma x y (- 0. z)) or (- 0. (fma x y z))? Need to
+;;; check sign of zero for results.
+
+(define-rule statement
+ (ASSIGN (REGISTER (? target))
+ (FLONUM-3-ARGS FLONUM-FMA
+ (REGISTER (? factor1))
+ (REGISTER (? factor2))
+ (FLONUM-1-ARG FLONUM-NEGATE
+ (REGISTER (? subtrahend))
+ (? overflow0?))
+ (? overflow1?)))
+ overflow0? overflow1? ;ignore
+ ((flonum-3-args/standard
+ (lambda (target factor1 factor2 subtrahend)
+ (LAP (FMSUB D ,target ,factor1 ,factor2 ,subtrahend))))
+ target factor1 factor2 subtrahend))
+
+(define-rule statement
+ (ASSIGN (REGISTER (? target))
+ (FLONUM-1-ARG FLONUM-NEGATE
+ (FLONUM-3-ARGS FLONUM-FMA
+ (REGISTER (? factor1))
+ (REGISTER (? factor2))
+ (REGISTER (? addend))
+ (? overflow0?))
+ (? overflow1?)))
+ overflow0? overflow1? ;ignore
+ ((flonum-3-args/standard
+ (lambda (target factor1 factor2 addend)
+ (LAP (FNMADD D ,target ,factor1 ,factor2 ,addend))))
+ target factor1 factor2 addend))
+
+(define-rule statement
+ (ASSIGN (REGISTER (? target))
+ (FLONUM-1-ARG FLONUM-NEGATE
+ (FLONUM-3-ARGS FLONUM-FMA
+ (REGISTER (? factor1))
+ (REGISTER (? factor2))
+ (FLONUM-1-ARG FLONUM-NEGATE
+ (REGISTER (? subtrahend))
+ (? overflow0?))
+ (? overflow1?))
+ (? overflow2?)))
+ overflow0? overflow1? overflow2? ;ignore
+ ((flonum-3-args/standard
+ (lambda (target factor1 factor2 subtrahend)
+ (LAP (FNMSUB D ,target ,factor1 ,factor2 ,subtrahend))))
+ target factor1 factor2 subtrahend))
+
+(define ((flonum-3-args/standard operate) target source1 source2 source3)
+ (let* ((source1 (float-source-fpr! source1))
+ (source2 (float-source-fpr! source2))
+ (source3 (float-source-fpr! source3))
+ (target (float-target-fpr! target)))
+ (operate target source1 source2 source3)))
+\f
;;;; Flonum Predicates
(define-rule predicate
;; <= pi/4. Correct argument reduction requires a
;; better approximation of pi than the i387 has.
FLONUM-SIN FLONUM-COS FLONUM-TAN
+ ;; Disabled: need to futz with cpuid flags to
+ ;; determine whether we can use it.
+ FLONUM-FMA
;; Disabled: exp is too much trouble to get right in
;; i387; need 64-bit precision. Let libm do it.
FLONUM-EXP
(define compiler:primitives-with-no-open-coding
'(DIVIDE-FIXNUM GCD-FIXNUM &/ FLONUM-EXPM1 FLONUM-LOG1P
VECTOR-CONS STRING-ALLOCATE FLOATING-VECTOR-CONS
+ FLONUM-FMA
FLONUM-IS-LESS? FLONUM-IS-LESS-OR-EQUAL?
FLONUM-IS-GREATER? FLONUM-IS-GREATER-OR-EQUAL?
FLONUM-IS-LESS-OR-GREATER? FLONUM-IS-UNORDERED?
&/
FLOATING-VECTOR-CONS FLONUM-ACOS FLONUM-ASIN FLONUM-ATAN
FLONUM-ATAN2 FLONUM-CEILING FLONUM-COS FLONUM-EXP FLONUM-EXPM1
- FLONUM-FLOOR FLONUM-LOG FLONUM-LOG1P FLONUM-ROUND FLONUM-SIN
+ FLONUM-FLOOR FLONUM-FMA FLONUM-LOG FLONUM-LOG1P FLONUM-ROUND FLONUM-SIN
FLONUM-TAN FLONUM-TRUNCATE GCD-FIXNUM STRING-ALLOCATE VECTOR-CONS))
\ No newline at end of file
operand1
operand2
overflow?))))))))
-
+\f
(define-expression-method 'FIXNUM-1-ARG
(lambda (receiver scfg-append! operator operand overflow?)
(expression-simplify operand scfg-append!
s-operand2
overflow?))))))))
+(define-expression-method 'FLONUM-3-ARGS
+ (lambda (receiver scfg-append! operator operand1 operand2 operand3 overflow?)
+ (expression-simplify operand1 scfg-append!
+ (lambda (s-operand1)
+ (expression-simplify operand2 scfg-append!
+ (lambda (s-operand2)
+ (expression-simplify operand3 scfg-append!
+ (lambda (s-operand3)
+ (receiver (rtl:make-flonum-3-args
+ operator
+ s-operand1
+ s-operand2
+ s-operand3
+ overflow?))))))))))
+
;;; end EXPRESSION-SIMPLIFY package
)
\ No newline at end of file
value-class=fixnum)
((OBJECT->TYPE)
value-class=type)
- ((OBJECT->FLOAT FLONUM-1-ARG FLONUM-2-ARGS FLOAT-OFFSET)
+ ((OBJECT->FLOAT FLONUM-1-ARG FLONUM-2-ARGS FLONUM-3-ARGS FLOAT-OFFSET)
value-class=float)
(else
(error "unknown RTL expression type" expression))))
FLOAT-OFFSET-ADDRESS
FLONUM-1-ARG
FLONUM-2-ARGS
+ FLONUM-3-ARGS
GENERIC-BINARY
GENERIC-UNARY
OBJECT->ADDRESS
operator operand overflow?)
(define-rtl-expression flonum-2-args rtl:
operator operand-1 operand-2 overflow?)
+(define-rtl-expression flonum-3-args rtl:
+ operator operand-1 operand-2 operand-3 overflow?)
(define-rtl-predicate fixnum-pred-1-arg %
predicate operand)
'(FLONUM-ADD FLONUM-SUBTRACT FLONUM-MULTIPLY FLONUM-DIVIDE FLONUM-ATAN2
FLONUM-COPYSIGN))
\f
+(for-each
+ (lambda (flonum-operator)
+ (define-open-coder/value flonum-operator
+ (floating-point-open-coder
+ (lambda (combination expressions finish)
+ (let ((arg1 (car expressions))
+ (arg2 (cadr expressions))
+ (arg3 (caddr expressions)))
+ (open-code:with-checks
+ combination
+ (let ((name flonum-operator)
+ (block (combination/block combination)))
+ (list (open-code:type-check arg1 (ucode-type flonum) name block)
+ (open-code:type-check arg2 (ucode-type flonum) name block)
+ (open-code:type-check arg3 (ucode-type flonum) name block)))
+ (finish
+ (rtl:make-float->object
+ (rtl:make-flonum-3-args
+ flonum-operator
+ (rtl:make-object->float arg1)
+ (rtl:make-object->float arg2)
+ (rtl:make-object->float arg3)
+ false)))
+ finish
+ flonum-operator
+ expressions)))
+ '(0 1 2)
+ internal-close-coding-for-type-checks)))
+ '(FLONUM-FMA))
+\f
(for-each
(lambda (flonum-pred)
(define-open-coder/predicate flonum-pred
(fix:zero? zero-fixnum?)
(fixnum? fixnum?)
(flo:* flonum-multiply)
+ (flo:*+ flonum-fma 3)
(flo:+ flonum-add)
(flo:- flonum-subtract)
(flo:/ flonum-divide)
(flo:exp flonum-exp)
(flo:expm1 flonum-expm1)
(flo:expt flonum-expt)
+ (flo:fast-fma? flonum-fast-fma? 0)
(flo:finite? flonum-is-finite? 1)
(flo:flonum? flonum?)
(flo:floor flonum-floor)
(flo:floor->exact flonum-floor->exact)
+ (flo:fma flonum-fma 3)
(flo:infinite? flonum-is-infinite? 1)
(flo:log flonum-log)
(flo:log1p flonum-log1p)