From 0b24188b754826e6a8202efcdf2d877f1b833a10 Mon Sep 17 00:00:00 2001 From: "Taylor R. Campbell" Date: Tue, 24 Feb 2009 23:09:56 +0000 Subject: [PATCH] Re-enable open-coding of FLONUM-ASIN and FLONUM-ACOS on i386, and implement FLONUM-EXP using F2XM1 for the fractional part and FSCALE for the integral part of the argument. This about trebles the speed of these primitives in compiled code, without giving answers substantially different from what the microcode computes. When open-coding of FLONUM-EXP was disabled in 1992, a comment was added to the effect that some i387 implementations had bugs in their F2XM1 instructions. I imagine that these bugs have been fixed in more recent hardware, and that few users care about seventeen-year-old i387 units. If this is a problem we can always disable it again. --- v7/src/compiler/machines/i386/machin.scm | 14 +++--- v7/src/compiler/machines/i386/rulflo.scm | 56 +++++++++++++++++------- 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/v7/src/compiler/machines/i386/machin.scm b/v7/src/compiler/machines/i386/machin.scm index 195cfbfff..5e617bb8d 100644 --- a/v7/src/compiler/machines/i386/machin.scm +++ b/v7/src/compiler/machines/i386/machin.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: machin.scm,v 1.25 2008/06/09 01:39:28 cph Exp $ +$Id: machin.scm,v 1.26 2009/02/24 23:09:56 riastradh Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -351,13 +351,9 @@ USA. (define compiler:primitives-with-no-open-coding '(DIVIDE-FIXNUM GCD-FIXNUM &/ ;; Disabled: trig instructions are limited to an - ;; input range of 0 <= X <= pi*2^62. + ;; input range of 0 <= |X| <= pi*2^62, and yield + ;; inaccurate answers for an input range of 0 <= |X| + ;; <= pi/4. Correct argument reduction requires a + ;; better approximation of pi than the i387 has. FLONUM-SIN FLONUM-COS FLONUM-TAN - ;; The rewriting rules in rulrew.scm don't work. - ;; Treat as not available. - FLONUM-ASIN FLONUM-ACOS - ;; Disabled for now. The F2XM1 instruction is - ;; broken on the 387 (or at least some of them), and - ;; in general has a very limited input range. - FLONUM-EXP VECTOR-CONS STRING-ALLOCATE FLOATING-VECTOR-CONS)) \ No newline at end of file diff --git a/v7/src/compiler/machines/i386/rulflo.scm b/v7/src/compiler/machines/i386/rulflo.scm index fb2791474..46c7e3259 100644 --- a/v7/src/compiler/machines/i386/rulflo.scm +++ b/v7/src/compiler/machines/i386/rulflo.scm @@ -1,6 +1,6 @@ #| -*-Scheme-*- -$Id: rulflo.scm,v 1.31 2008/06/09 01:39:29 cph Exp $ +$Id: rulflo.scm,v 1.32 2009/02/24 23:09:56 riastradh Exp $ Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, @@ -321,18 +321,47 @@ USA. (FXCH (ST 0) (ST 1)) (FYL2X))))) -#| -;; Disabled: F2XM1 is limited to -1 <= X <= +1. (define-arithmetic-method 'FLONUM-EXP flonum-methods/1-arg (flonum-unary-operation/stack-top (lambda () - (LAP (FLDL2E) - (FMULP (ST 1) (ST 0)) - (F2XM1) - (FLD1) - (FADDP (ST 1) (ST 0)))))) -|# - + ;; Hair to avoid arithmetic for non-finite inputs: exp(-inf) = 0, + ;; but exp(x) = x for any other non-finite x. We use the first + ;; free slot (1) to pick apart the double format to check for + ;; non-finite inputs, and (2) to avoid using two stack slots. + (let ((temp (temporary-register-reference)) + (infinity-or-nan (generate-label 'INFINITY-OR-NAN)) + (join (generate-label 'JOIN)) + (temp-pointer regnum:free-pointer)) + (LAP (FST D (@R ,temp-pointer)) + (MOV W ,temp (@RO W ,temp-pointer 4)) + (AND W ,temp (&U #x7FFFFFFF)) + (CMP W ,temp (&U #x7FF00000)) + (JAE B (@PCR ,infinity-or-nan)) + ;; Compute 2^(x log_2 e) with F2XM1 and FSCALE. + (FLDL2E) ;st0 = lg e, st1 = x + (FMULP (ST 1) (ST 0)) ;st0 = x lg e + (FLD (ST 0)) ;st0 = x lg e, st1 = x lg e + (FRNDINT) ;st0 = I(x lg e), st1 = x lg e + (FSUB (ST 1) (ST 0)) ;st0 = I(x lg e), st1 = F(x lg e) + (FSTP D (@R ,temp-pointer)) ;st0 = F(x lg e), save I(x lg e) + (F2XM1) ;st0 = 2^F(x lg e) - 1 + (FLD1) ;st0 = 1, st1 = 2^F(x lg e) - 1 + (FADD) ;st0 = 2^F(x lg e) + (FLD D (@R ,temp-pointer)) ;st0 = I(x lg e), st1 = 2^F(x lg e) + (FXCH (ST 0) (ST 1)) ;st0 = 2^F(x lg e), st1 = I(x lg e) + (FSCALE) ;st0 = 2^F(x lg e) * 2^I(x lg e), + ;st1 = I(x lg e) + (FSTP (ST 1)) ;Drop st1, leaving in st0 the value + (JMP B (@PCR ,join)) ; 2^(F(x lg e) + I(x lg e)) = e^x. + (LABEL ,infinity-or-nan) + (CMP W (@RO W ,temp-pointer 4) (&U #xFFF00000)) + (JNE B (@PCR ,join)) + (CMP W (@RO W ,temp-pointer 0) (& 0)) + (JNE B (@PCR ,join)) + (FSTP (ST 0)) ;Pop argument. + (FLDZ) ;Return zero. + (LABEL ,join)))))) + #| ;; Disabled: FPTAN limited to pi * 2^62. (define-arithmetic-method 'FLONUM-TAN flonum-methods/1-arg @@ -342,17 +371,13 @@ USA. (FSTP (ST 0)) ; FPOP )))) |# - + (define-arithmetic-method 'FLONUM-ATAN flonum-methods/1-arg (flonum-unary-operation/stack-top (lambda () (LAP (FLD1) (FPATAN))))) -#| -;; Disabled: these aren't used due to broken rewriting rules. See -;; rulrew.scm for details. - ;; For now, these preserve values in memory ;; in order to avoid flushing a stack location. @@ -378,7 +403,6 @@ USA. (FLD D (@R ,regnum:free-pointer)) (FXCH (ST 0) (ST 1)) (FPATAN))))) -|# (define-rule statement (ASSIGN (REGISTER (? target)) -- 2.25.1