From 2b9da75439e37ef7375c7d2b8ccccfefc081f12f Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 25 Oct 2018 00:05:37 +0000 Subject: [PATCH] Use better formulas for logistic(negative) and logit(near 1/2). Prove some error bounds in comments. --- src/runtime/arith.scm | 228 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 222 insertions(+), 6 deletions(-) diff --git a/src/runtime/arith.scm b/src/runtime/arith.scm index 004637470..6955b2bdf 100644 --- a/src/runtime/arith.scm +++ b/src/runtime/arith.scm @@ -1989,20 +1989,236 @@ USA. ((<= x 33.3) (+ x (exp (- x)))) (else x))) -;;; 1/(1 + e^{-x}) +;;; Lemma 1. If |d'| < 1/2, then |(d' - d)/(1 + d')| <= 2|d' - d|. +;;; +;;; Proof. Case I: If d' > 0, then 1 + d' > 1 so 0 < 1/(1 + d') <= 1. +;;; Case II: If -1/2 < d' < 0, then 1/2 < 1 + d' < 1 so that 1 < 1/(1 + +;;; d') <= 2. QED. +;;; +;;; Lemma 2. If b = a*(1 + d)/(1 + d') for |d'| < 1/2 and nonzero a, b, +;;; then b = a*(1 + e) for |e| <= 2|d' - d|. +;;; +;;; Proof. |a - b|/|a| +;;; = |a - a*(1 + d)/(1 + d')|/|a| +;;; = |1 - (1 + d)/(1 + d')| +;;; = |(1 + d' - 1 - d)/(1 + d')| +;;; = |(d' - d)/(1 + d')| +;;; <= 2|d' - d|, by Lemma 1, +;;; +;;; QED. +;;; +;;; Lemma 3. For |d|, |d'| < 1/4, +;;; +;;; |log((1 + d)/(1 + d'))| <= 4|d - d'|. +;;; +;;; Proof. Write +;;; +;;; log((1 + d)/(1 + d')) +;;; = log(1 + (1 + d)/(1 + d') - 1) +;;; = log(1 + (1 + d - 1 - d')/(1 + d') +;;; = log(1 + (d - d')/(1 + d')). +;;; +;;; By Lemma 1, |(d - d')/(1 + d')| < 2|d' - d| < 1, so the Taylor +;;; series of log1p converges absolutely for (d - d')/(1 + d'), and +;;; thus we have +;;; +;;; |log(1 + (d - d')/(1 + d'))| +;;; = |\sum_{n=1}^\infty ((d - d')/(1 + d'))^n/n| +;;; <= \sum_{n=1}^\infty |(d - d')/(1 + d')|^n/n +;;; <= \sum_{n=1}^\infty |2(d' - d)|^n/n +;;; <= \sum_{n=1}^\infty |2(d' - d)|^n +;;; = 1/(1 - |2(d' - d)|) +;;; <= 4|d' - d|, +;;; +;;; QED. +;;; +;;; Lemma 4. If 1/e <= 1 + x <= e, then +;;; +;;; log1p((1 + d) x) = (1 + d') log1p(x) +;;; +;;; for |d'| < 8|d|. +;;; +;;; Proof. Write +;;; +;;; log1p((1 + d) x) +;;; = log(1 + x + x*d) +;;; = log((1 + x) (1 + x + x*d)/(1 + x)) +;;; = log(1 + x) + log((1 + x + x*d)/(1 + x)) +;;; = log1p(x) (1 + log((1 + x + x*d)/(1 + x))/log1p(x)). +;;; +;;; The relative error is bounded by +;;; +;;; |log((1 + x + x*d)/(1 + x))/log1p(x)| +;;; <= 4|x + x*d - x|/|log1p(x)|, by Lemma 3, +;;; = 4|x*d|/|log1p(x)| +;;; < 8|d|, +;;; +;;; since in this range 0 < 1 - 1/e < x/log(1 + x) <= e - 1 < 2. QED. + +;;; 1/(1 + e^{-x}) = e^x/(1 + e^x) (define (logistic x) (guarantee-real x 'logistic) - (cond ((<= x -745) 0.) - ((<= x -37) (exp x)) - ((<= x 37) (/ 1 (+ 1 (exp (- x))))) - (else 1.))) + (cond ((<= x -745) + ;; e^x/(1 + e^x) < e^x < smallest positive float. (XXX Should + ;; raise inexact and underflow here.) + 0.) + ((<= x -37) + ;; e^x < eps/2, so + ;; + ;; |e^x - e^x/(1 + e^x)|/|e^x/(1 + e^x)| + ;; <= |1 - 1/(1 + e^x)|*|1 + e^x| + ;; = |e^x/(1 + e^x)|*|1 + e^x| + ;; = |e^x/(1 + e^x) + e^x| + ;; <= |e^x + e^x| + ;; < eps. + ;; + (exp x)) + ((< x 0) + ;; e^x < 1 < 1 + e^x, so e^x/(1 + e^x) < 1, so, if exp has + ;; relative error d0, + has relative error d1, and / has + ;; relative error d2, then we get + ;; + ;; (1 + d2) (1 + d0) e^x/[(1 + (1 + d0) e^x)(1 + d1)] + ;; = (1 + d0 + d2 + d0 d2) e^x/[(1 + (1 + d0) e^x)(1 + d1)] + ;; = (1 + d') e^x/[(1 + (1 + d0) e^x)(1 + d1)] + ;; = (1 + d') e^x/[(1 + e^x + d0 e^x)(1 + d1)] + ;; = (1 + d') e^x/[1 + e^x + d0 e^x + d1 + d1 e^x + d1 d0 e^x] + ;; = (1 + d') e^x/[1 + e^x + d0 e^x + d1 (1 + e^x) + d1 d0 e^x] + ;; = (1 + d') e^x/[(1 + e^x)(1 + d0 e^x/(1 + e^x) + ;; + d1 + ;; + d1 d0 e^x/(1 + e^x))] + ;; = (1 + d') e^x/[(1 + e^x)(1 + d'')] + ;; = [e^x/(1 + e^x)] (1 + d')/(1 + d'') + ;; + ;; where + ;; + ;; d' = d0 + d2 + d0 d2, + ;; d'' = d0 e^x/(1 + e^x) + d1 + d1 d0 e^x/(1 + e^x). + ;; + ;; Note that since e^x/(1 + e^x), + ;; + ;; |d''| <= |d0| + |d1| + |d1 d0| <= 2 eps + eps^2, + ;; + ;; so by Lemma 2 the relative error is bounded by + ;; + ;; 2|d' - d''| + ;; <= 2|d0| + 2|d2| + 2|d0 d2| + 2|d0| + 2|d1| + 2|d1 d0| + ;; <= 8 eps + 4 eps^2. + ;; + (let ((e^x (exp x))) + (/ e^x (+ 1 e^x)))) + ((<= x 37) + ;; e^{-x} < 1 < 1 + e^{-x}, so 1/(1 + e^{-x}) < 1, so, if exp + ;; has relative error d0, + has relative error d1, and / has + ;; relative error d2, then we get + ;; + ;; (1 + d2)/[(1 + (1 + d0) e^{-x})(1 + d1)] + ;; = (1 + d0)/[1 + e^{-x} + d0 e^{-x} + ;; + d1 + d1 e^{-x} + d0 d1 e^{-x}] + ;; = (1 + d0)/[(1 + e^{-x})(1 + d0 e^{-x}/(1 + e^{-x}) + ;; + d1/(1 + e^{-x}) + ;; + d0 d1 e^{-x}/(1 + e^{-x}))]. + ;; = (1 + d0)/[(1 + e^{-x})(1 + d')] + ;; = [1/(1 + e^{-x})] (1 + d0)/(1 + d') + ;; + ;; where + ;; + ;; d' = d0 e^{-x}/(1 + e^{-x}) + ;; + d1/(1 + e^{-x}) + ;; + d0 d1 e^{-x}/(1 + e^{-x}). + ;; + ;; By Lemma 2 this relative error is bounded by + ;; + ;; 2|d0 - d'| + ;; = 2|d0 - d0 e^{-x}/(1 + e^{-x}) + ;; - d1/(1 + e^{-x}) + ;; - d0 d1 e^{-x}/(1 + e^{-x})| + ;; <= 2|d0| + 2|d0 e^{-x}/(1 + e^{-x})| + ;; + 2|d1/(1 + e^{-x})| + ;; + 2|d0 d1 e^{-x}/(1 + e^{-x})| + ;; <= 2|d0| + 2|d0| + 2|d1| + 2|d0 d1| + ;; <= 4|d0| + 2|d1| + 2|d0 d1| + ;; <= 6 eps + 2 eps^2. + ;; + (/ 1 (+ 1 (exp (- x))))) + (else + ;; If x > 37, then e^{-x} < eps, so the relative error of 1 + ;; from 1/(1 + e^{-x}) is + ;; + ;; |1/(1 + e^{-x}) - 1|/|1/(1 + e^{-x})| + ;; = |e^{-x}/(1 + e^{-x})|/|1/(1 + e^{-x})| + ;; = |e^{-x}| + ;; <= eps. + ;; + 1.))) + +(define-integrable logit-boundary-lo ;logistic(-1) + (flo:/ (flo:exp -1.) (flo:+ 1. (flo:exp -1.)))) +(define-integrable logit-boundary-hi ;logistic(+1) + (flo:/ 1. (flo:+ 1. (flo:exp -1.)))) ;;; log p/(1 - p) (define (logit p) (guarantee-real p 'logit) (if (not (<= 0 p 1)) (error:bad-range-argument p 'logit)) - (log (/ p (- 1 p)))) + ;; For small p, 1 - p is so close to 1 that log(p) is essentially + ;; log(p/(1 - p)). For p near 1/2, the quotient is close to 1 so we + ;; want to use log1p with the identity + ;; + ;; log(p/(1 - p)) = -log((1 - p)/p) + ;; = -log(1 + (1 - p)/p - 1) + ;; = -log(1 + (1 - p - p)/p) + ;; = -log(1 + (1 - 2p)/p) + ;; = -log1p((1 - 2p)/p). + ;; + ;; to get an intermediate quotient near zero. + (cond ((or (< p logit-boundary-lo) + (< logit-boundary-hi p)) + ;; If - has relative error d0, / has relative error d1, and + ;; log has relative error d2, then + ;; + ;; (1 + d2) log((1 + d0) p/[(1 - p)(1 + d1)]) + ;; = (1 + d2) [log(p/(1 - p)) + log((1 + d0)/(1 + d1))] + ;; = log(p/(1 - p)) + d2 log(p/(1 - p)) + ;; + (1 + d2) log((1 + d0)/(1 + d1)) + ;; = log(p/(1 - p))*[1 + d2 + + ;; + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))] + ;; + ;; Since 0 <= p < logistic(-1) or logistic(+1) < p <= 1, we + ;; have |log(p/(1 - p))| > 1. Hence this error is bounded by + ;; + ;; |d2 + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))| + ;; <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))|. + ;; <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))| + ;; <= |d2| + 4|(1 + d2) (d0 - d1)|, by Lemma 3, + ;; <= |d2| + 4|d0 - d1 + d2 d0 - d1 d0| + ;; <= |d2| + 4|d0| + 4|d1| + 4|d2 d0| + 4|d1 d0| + ;; <= 9 eps + 8 eps^2. + ;; + (log (/ p (- 1 p)))) + (else + ;; + ;; Since p = 2p/2 <= 1 <= 2*2p = 4p, the floating-point + ;; evaluation of 1 - 2p is exact; the only error arises from + ;; division and log1p. First, note that if logistic(-1) < p < + ;; logistic(+1), (1 - 2p)/p lies in the bounds of Lemma 4. + ;; + ;; If division has relative error d0 and log1p has relative + ;; error d1, the outcome is + ;; + ;; -(1 + d1) log1p((1 - 2p)*(1 + d0)/p) + ;; = -(1 + d1) (1 + d') log1p((1 - 2p)/p) + ;; = -(1 + d1 + d' + d1 d') log1p((1 - 2p)/p). + ;; + ;; where |d'| < 8|d0| by Lemma 4. The relative error is then + ;; bounded by + ;; + ;; |d1 + d' + d1 d'| + ;; <= |d1| + 8|d0| + 8|d1 d0| + ;; <= 9 eps + 8 eps^2. + ;; + (- (log1p (/ (- 1 (* 2 p)) p)))))) ;;; log logistic(x) = -log (1 + e^{-x}) (define (log-logistic x) -- 2.25.1