p1l1 {DPQ}R Documentation

Numerically Stable p1l1(t) = (t+1)*log(1+t) - t

Description

The binomial deviance function bd0(x,M) can mathematically be re-written as bd0(x,M) = M * p1l1((x-M)/M) where we look into providing numerically stable formula for p1l1(t) as its mathematical formula p1l1(t) = (t+1)\log(1+t) - t suffers from cancellation for small |t|, even when log1p(t) is used instead of log(1+t).

Using a hybrid implementation, p1l1() uses a direct formula, now the stable one in p1l1p(), for \left| t \right| > c and a series approximation for \left|t\right| \le c for some c.

NB: The re-expression log1pmx() is almost perfect; it fixes the cancellation problem entirely (and exposes the fact that log1pmx()'s internal cutoff seems sub optimal.

Usage


 p1l1p  (t, ...)
 p1l1.  (t)
 p1l1   (t,    F = t^2/2)
 p1l1ser(t, k, F = t^2/2)
.p1l1ser(t, k, F = t^2/2)

Arguments

t

numeric a-like vector ("mpfr" included), larger (or equal) to -1.

...

optional (tuning) arguments, passed to log1pmx().

k

small positive integer, the number of terms to use in the Taylor series approximation p1l1ser(t,k) of p1l1(t).

F

numeric vector of multiplication factor; must be t^2/2 for the p1l1() function, but can be modified, e.g. in more direct bd0() computations.

Details

for now see in bd0().

Value

numeric vector “as” t.

Author(s)

Martin Maechler

See Also

bd0; our package vignette log1pmx, bd0, stirlerr - Probability Computations in R. dbinom the latter for the C.Loader(2000) reference.

Examples

t <- seq(-1, 4, by=1/64)
plot(t, p1l1ser(t, 1), type="l")
lines(t, p1l1.(t), lwd=5, col=adjustcolor(1, 1/2)) # direct formula
for(k in 2:6) lines(t, p1l1ser(t, k), col=k)

## zoom in
t <- 2^seq(-59,-1, by=1/4)
t <- c(-rev(t), 0, t)
stopifnot(!is.unsorted(t))
k.s <- 1:12; names(k.s) <- paste0("k=", 1:12)

## True function values: use Rmpfr with 256 bits precision: ---
### eventually move this to ../tests/ & ../vignettes/log1pmx-etc.Rnw
#### FIXME: eventually replace with  if(requireNamespace("Rmpfr")){ ......}
#### =====
if((needRmpfr <- is.na(match("Rmpfr", (srch0 <- search())))))
    require("Rmpfr")
p1l1.T <- p1l1.(mpfr(t, 256)) # "true" values
p1l1.n <- asNumeric(p1l1.T)
all.equal(sapply(k.s, function(k)  p1l1ser(t,k)) -> m.p1l1,
          sapply(k.s, function(k) .p1l1ser(t,k)) -> m.p1l., tolerance = 0)
p1tab <-
    cbind(b1 = bd0(t+1, 1),
          b.10 = bd0(10*t+10,10)/10,
          dirct = p1l1.(t),
          p1l1p = p1l1p(t),
          p1l1  = p1l1 (t),
          sapply(k.s, function(k) p1l1ser(t,k)))
matplot(t, p1tab, type="l", ylab = "p1l1*(t)")
## (absolute) error:
##' legend for matplot()
mpLeg <- function(leg = colnames(p1tab), xy = "top", col=1:6, lty=1:5, lwd=1,
                  pch = c(1L:9L, 0L, letters, LETTERS)[seq_along(leg)], ...)
    legend(xy, legend=leg, col=col, lty=lty, lwd=lwd, pch=pch, ncol=3, ...)

titAbs <- "Absolute errors of p1l1(t) approximations"
matplot(t, asNumeric(p1tab - p1l1.T), type="o", main=titAbs); mpLeg()
i <- abs(t) <= 1/10 ## zoom in a bit
matplot(t[i], abs(asNumeric((p1tab - p1l1.T)[i,])), type="o", log="y",
        main=titAbs, ylim = c(1e-18, 0.003)); mpLeg()
## Relative Error
titR <- "|Relative error| of p1l1(t) approximations"
matplot(t[i], abs(asNumeric((p1tab/p1l1.T - 1)[i,])), type="o", log="y",
        ylim = c(1e-18, 2^-10), main=titR)
mpLeg(xy="topright", bg= adjustcolor("gray80", 4/5))
i <- abs(t) <= 2^-10 # zoom in more
matplot(t[i], abs(asNumeric((p1tab/p1l1.T - 1)[i,])), type="o", log="y",
        ylim = c(1e-18, 1e-9))
mpLeg(xy="topright", bg= adjustcolor("gray80", 4/5))


## Correct number of digits
corDig <- asNumeric(-log10(abs(p1tab/p1l1.T - 1)))
cbind(t, round(corDig, 1))# correct number of digits

matplot(t, corDig, type="o", ylim = c(1,17))
(cN <- colnames(corDig))
legend(-.5, 14, cN, col=1:6, lty=1:5, pch = c(1L:9L, 0L, letters), ncol=2)

## plot() function >>>> using global (t, corDig) <<<<<<<<<
p.relEr <- function(i, ylim = c(11,17), type = "o",
                    leg.pos = "left", inset=1/128,
                    main = sprintf(
                        "Correct #{Digits} in p1l1() approx., notably Taylor(k=1 .. %d)",
                                   max(k.s)))
{
    if((neg <- all(t[i] < 0)))
        t  <- -t
    stopifnot(all(t[i] > 0), length(ylim) == 2) # as we use log="x"
    matplot(t[i], corDig[i,], type=type, ylim=ylim, log="x", xlab = quote(t), xaxt="n",
            main=main)
    legend(leg.pos, cN, col=1:6, lty=1:5, pch = c(1L:9L, 0L, letters), ncol=2,
           bg=adjustcolor("gray90", 7/8), inset=inset)
    t.epsC <- -log10(c(1,2,4)* .Machine$double.eps)
    axis(2, at=t.epsC, labels = expression(epsilon[C], 2*epsilon[C], 4*epsilon[C]),
         las=2, col=2, line=1)
    tenRs <- function(t) floor(log10(min(t))) : ceiling(log10(max(t)))
    tenE <- tenRs(t[i])
    tE <- 10^tenE
    abline (h = t.epsC,
            v = tE, lty=3, col=adjustcolor("gray",.8), lwd=2)
    AX <- if(requireNamespace("sfsmisc")) sfsmisc::eaxis else axis
    AX(1, at= tE, labels = as.expression(
                      lapply(tenE,
                             if(neg)
                                 function(e) substitute(-10^{E}, list(E = e+0))
                             else
                                 function(e) substitute( 10^{E}, list(E = e+0)))))
}

p.relEr(t > 0, ylim = c(1,17))
p.relEr(t > 0) # full positive range
p.relEr(t < 0) # full negative range
if(FALSE) {## (actually less informative):
 p.relEr(i = 0 < t & t < .01)  ## positive small t
 p.relEr(i = -.1 < t & t < 0) ## negative small t
}

## Find approximate formulas for accuracy of k=k*  approximation
d.corrD <- cbind(t=t, as.data.frame(corDig))
names(d.corrD) <- sub("k=", "nC_",  names(d.corrD))

fmod <- function(k, data, cut.y.at = -log10(2 * .Machine$double.eps),
                 good.y = -log10(.Machine$double.eps), # ~ 15.654
                 verbose=FALSE) {
    varNm <- paste0("nC_",k)
    stopifnot(is.numeric(y <- get(varNm, data, inherits=FALSE)),
              is.numeric(t <- data$t))# '$' works for data.frame, list, environment
    i <- 3 <= y & y <= cut.y.at
    i.pos <- i & t > 0
    i.neg <- i & t < 0
    if(verbose) cat(sprintf("k=%d >> y <= %g ==> #{pos. t} = %d ;  #{neg. t} = %d\n",
                            k, cut.y.at, sum(i.pos), sum(i.neg)))
    nCoefLm <- function(x,y) `names<-`(.lm.fit(x=x, y=y)$coeff, c("int", "slp"))
    nC.t <- function(x,y) { cf <- nCoefLm(x,y); c(cf, t.0 = exp((good.y - cf[[1]])/cf[[2]])) }
    cbind(pos = nC.t(cbind(1, log( t[i.pos])), y[i.pos]),
          neg = nC.t(cbind(1, log(-t[i.neg])), y[i.neg]))
}
rr <- sapply(k.s, fmod, data=d.corrD, verbose=TRUE, simplify="array")
stopifnot(rr["slp",,] < 0) # all slopes are negative (important!)
matplot(k.s, t(rr["slp",,]), type="o", xlab = quote(k), ylab = quote(slope[k]))
## fantastcally close to linear in k
## The numbers, nicely arranged
ftable(aperm(rr, c(3,2,1)))
signif(t(rr["t.0",,]),3) # ==> Should be boundaries for the hybrid p1l1()
##           pos      neg
## k=1  6.60e-16 6.69e-16
## k=2  3.65e-08 3.65e-08
## k=3  1.30e-05 1.32e-05
## k=4  2.39e-04 2.42e-04
## k=5  1.35e-03 1.38e-03
## k=6  4.27e-03 4.34e-03
## k=7  9.60e-03 9.78e-03
## k=8  1.78e-02 1.80e-02
## k=9  2.85e-02 2.85e-02
## k=10 4.13e-02 4.14e-02
## k=11 5.62e-02 5.64e-02
## k=12 7.24e-02 7.18e-02

###------------- Well,  p1l1p()  is really basically good enough ... with a small exception:
rErr1k <- curve(asNumeric(p1l1p(x) / p1l1.(mpfr(x, 4096)) - 1), -.999, .999,
                n = 4000, col=2, lwd=2)
abline(h = c(-8,-4,-2:2,4,8)* 2^-52, lty=2, col=adjustcolor("gray20", 1/4))
## well, have a "spike" at around -0.8 -- why?

plot(abs(y) ~ x, data = rErr1k, ylim = c(4e-17, max(abs(y))),
     ylab = expression(abs(hat(p)/p - 1)),
     main = "p1l1p(x) -- Relative Error wrt mpfr(*. 4096) [log]",
     col=2, lwd=1.5, type = "b", cex=1/2, log="y", yaxt="n")
sfsmisc::eaxis(2)
eps124 <-  c(1, 2,4,8)* 2^-52
abline(h = eps124, lwd=c(3,1,1,1), lty=c(1,2,2,2), col=adjustcolor("gray20", 1/4))
axLab <- expression(epsilon[c], 2*epsilon[c], 4*epsilon[c], 8*epsilon[c])
axis(4, at = eps124, labels = axLab, col="gray20", las=1)
abline(v= -.791, lty=3, lwd=2, col="blue4") # -.789  from visual ..
##--> The "error" is in log1pmx() which has cutoff minLog1Value = -0.79149064
##--> which is clearly not optimal, at least not for computing p1l1p()

d <- 1/2048; x <- seq(-1+d, 1, by=d)
p1l1Xct <- p1l1.(mpfr(x, 4096))
rEx.5 <- asNumeric(p1l1p(x, minL1 = -0.5) / p1l1Xct - 1)
lines(x, abs(rEx.5), lwd=2.5, col=adjustcolor(4, 1/2)); abline(v=-.5, lty=2,col=4)
rEx.25 <- asNumeric(p1l1p(x, minL1 = -0.25) / p1l1Xct - 1)
lines(x, abs(rEx.25), lwd=3.5, col=adjustcolor(6, 1/2)); abline(v=-.25, lty=2,col=6)
lines(lowess(x, abs(rEx.5),  f=1/20), col=adjustcolor(4,offset=rep(1,4)/3), lwd=3)

lines(lowess(x, abs(rEx.25), f=1/20), col=adjustcolor(6,offset=rep(1,4)/3), lwd=
3)

rEx.4 <- asNumeric(p1l1p(x, tol_logcf=1e-15, minL1 = -0.4) / p1l1Xct - 1)
lines(x, abs(rEx.4), lwd=5.5, col=adjustcolor("brown", 1/2)); abline(v=-.25, lty=2,col="brown")

if(needRmpfr && isNamespaceLoaded("Rmpfr"))
    detach("package:Rmpfr")

[Package DPQ version 0.5-8 Index]