R: (Anti)Symmetric Log High-Transform

u.log {sfsmisc}

R Documentation

(Anti)Symmetric Log High-Transform

Description

Compute log() only for high values and keep low ones – antisymmetrically such that u.log(x) is (once) continuously differentiable, it computes f(x) = x for |x| \le c and sign(x) c\cdot(1 + log(|x|/c)) for |x| \ge c.

Usage

u.log(x, c = 1)

Arguments

`x`	numeric vector to be transformed.
`c`	scalar, > 0

Details

Alternatively, the ‘IHS’ (inverse hyperbolic sine) transform has been proposed, first in more generality as S_U() curves by Johnson(1949); in its simplest form, f(x) = arsinh(x) = \log(x + \sqrt{x^2 + 1}), which is also antisymmetric, continous and once differentiable as our u.log(.).

Value

numeric vector of same length as x.

Author(s)

Martin Maechler, 24 Jan 1995

References

N. L. Johnson (1949). Systems of Frequency Curves Generated by Methods of Translation, Biometrika 36, pp 149–176. doi:10.2307/2332539

Examples

curve(u.log, -3, 10); abline(h=0, v=0, col = "gray20", lty = 3)
curve(1 + log(x), .01,  add = TRUE, col= "brown") # simple log
curve(u.log(x,    2),   add = TRUE, col=2)
curve(u.log(x, c= 0.4), add = TRUE, col=4)

## Compare with  IHS = inverse hyperbolic sine == asinh 
ihs <- function(x) log(x+sqrt(x^2+1)) # == asinh(x) {aka "arsinh(x)" or "sinh^{-1} (x)"}
xI <- c(-Inf, Inf, NA, NaN)
stopifnot(all.equal(xI, asinh(xI))) # but not for ihs():
cbind(xI, asinh=asinh(xI), ihs=ihs(xI)) # differs for -Inf
x <- runif(500, 0, 4); x[100+0:3] <- xI
all.equal(ihs(x), asinh(x)) #==>  is.NA value mismatch:  asinh() is correct {i.e. better!}

curve(u.log, -2, 20, n=1000); abline(h=0, v=0, col = "gray20", lty = 3)
curve(ihs(x)+1-log(2), add=TRUE, col=adjustcolor(2, 1/2), lwd=2)
curve(ihs(x),          add=TRUE, col=adjustcolor(4, 1/2), lwd=2)
## for x >= 0, u.log(x) is nicely between IHS(x) and shifted IHS

## a  log10-scale version of asinh()  {aka "IHS" }: ihs10(x) :=  asinh(x/2) / ln(10)
ihs10 <- function(x) asinh(x/2)/log(10)
xyaxis <- function() abline(h=0, v=0, col = "gray20", lty = 3)
leg3 <- function(x = "right")
    legend(x, legend = c(quote(ihs10(x) == asinh(x/2)/log(10)),
                         quote(log[10](1+x)), quote(log[10](x))),
           col=c(1,2,5), bty="n", lwd=2)
curve(asinh(x/2)/log(10), -5, 20, n=1000, lwd=2); xyaxis()
curve(log10(1+x), col=2, lwd=2, add=TRUE)
curve(log10( x ), col=5, lwd=2, add=TRUE); leg3()

## zoom out  and  x-log-scale
curve(asinh(x/2)/log(10), .1, 100, log="x", n=1000); xyaxis()
curve(log10(1+x), col=2, add=TRUE)
curve(log10( x ), col=5, add=TRUE) ; leg3("center")

curve(log10(1+x) - ihs10(x), .1, 1000, col=2, n=1000, log="x", ylim = c(-1,1)*0.10,
      main = "absolute difference", xaxt="n"); xyaxis(); eaxis(1, sub10=1)
curve(log10( x ) - ihs10(x), col=4, n=1000, add = TRUE)

curve(abs(1 - ihs10(x) / log10(1+x)), .1, 5000, col=2, log = "xy", ylim = c(6e-9, 2),
      main="|relative error| of approx. ihs10(x) :=  asinh(x/2)/log(10)", n=1000, axes=FALSE)
eaxis(1, sub10=1); eaxis(2, sub10=TRUE)
curve(abs(1 - ihs10(x) / log10(x)), col=4, n=1000, add = TRUE)
legend("left", legend = c(quote(log[10](1+x)), quote(log[10](x))),
       col=c(2,4), bty="n", lwd=1)

## Compare with Stahel's version of "started log"
## (here, for *vectors* only, and 'base', as Desctools::LogSt();

## by MM: "modularized" by providing a threshold-computer function separately:
logst_thrWS <- function(x, mult = 1) {
    lq <- quantile(x[x > 0], probs = c(0.25, 0.75), na.rm = TRUE, names = FALSE)
    if (lq[1] == lq[2])
        lq[1] <- lq[2]/2
    lq[1]^(1 + mult)/lq[2]^mult
}
logst0L <- function(x, calib = x, threshold = thrFUN(calib, mult=mult),
                   thrFUN = logst_thrWS, mult = 1, base = 10)
{
    ## logical index sub-assignment instead of ifelse(): { already in DescTools::LogSt }
    res <- x # incl NA's
    notNA <- !is.na(sml <- (x < (th <- threshold)))
    i1 <-  sml & notNA; res[i1] <- log(th, base) + ((x[i1] - th)/(th * log(base)))
    i2 <- !sml & notNA; res[i2] <- log(x[i2], base)
    attr(res, "threshold") <- th
    attr(res, "base") <- base
    res
}
logst0 <- function(x, calib = x, threshold = thrFUN(calib, mult=mult),
                   thrFUN = logst_thrWS, mult = 1, base = 10)
{
    ## Using pmax.int() instead of logical indexing -- NA's work automatically - even faster
    xm <- pmax.int(threshold, x)
    res <- log(xm, base) + (x - xm)/(threshold * log(base))
    attr(res, "threshold") <- threshold
    attr(res, "base") <- base
    res
}

## u.log() is really using natural log() -- whereas logst() defaults to base=10
curve(u.log, -4, 10, n=1000); abline(h=0, v=0, col = "gray20", lty = 3); points(-1:1, -1:1, pch=3)
curve(log10(x) + 1,  add=TRUE, col=adjustcolor("midnightblue", 1/2), lwd=4, lty=6)
curve(log10(x),      add=TRUE, col=adjustcolor("skyblue3",     1/2), lwd=4, lty=7)
curve(logst0(x, threshold= 2  ), add=TRUE, col=adjustcolor("orange",1/2), lwd=2)
curve(logst0(x, threshold= 1  ), add=TRUE, col=adjustcolor(2, 1/2), lwd=2)
curve(logst0(x, threshold= 1/4), add=TRUE, col=adjustcolor(3, 1/2), lwd=2, lty=2)
curve(logst0(x, threshold= 1/8), add=TRUE, col=adjustcolor(4, 1/2), lwd=2, lty=2)

[Package sfsmisc version 1.1-18 Index]