| u.log {sfsmisc} | R Documentation |
(Anti)Symmetric Log High-Transform
Description
Compute log() only for high values and keep low ones –
antisymmetrically such that u.log(x) is (once) continuously
differentiable, it computes
f(x) = x for |x| \le c and
sign(x) c\cdot(1 + log(|x|/c))
for |x| \ge c.
Usage
u.log(x, c = 1)
Arguments
x |
numeric vector to be transformed. |
c |
scalar, > 0 |
Details
Alternatively, the ‘IHS’ (inverse hyperbolic sine)
transform has been proposed, first in more generality as S_U()
curves by Johnson(1949); in its simplest form,
f(x) = arsinh(x) = \log(x + \sqrt{x^2 + 1}),
which is also antisymmetric, continous and once differentiable as our u.log(.).
Value
numeric vector of same length as x.
Author(s)
Martin Maechler, 24 Jan 1995
References
N. L. Johnson (1949). Systems of Frequency Curves Generated by Methods of Translation, Biometrika 36, pp 149–176. doi:10.2307/2332539
See Also
Werner Stahel's sophisticated version of John Tukey's “started log” (which was \log(x
+ c)), with concave extension to negative values and adaptive default choice of c;
logst in his CRAN package relevance, or
LogSt in package DescTools.
Examples
curve(u.log, -3, 10); abline(h=0, v=0, col = "gray20", lty = 3)
curve(1 + log(x), .01, add = TRUE, col= "brown") # simple log
curve(u.log(x, 2), add = TRUE, col=2)
curve(u.log(x, c= 0.4), add = TRUE, col=4)
## Compare with IHS = inverse hyperbolic sine == asinh
ihs <- function(x) log(x+sqrt(x^2+1)) # == asinh(x) {aka "arsinh(x)" or "sinh^{-1} (x)"}
xI <- c(-Inf, Inf, NA, NaN)
stopifnot(all.equal(xI, asinh(xI))) # but not for ihs():
cbind(xI, asinh=asinh(xI), ihs=ihs(xI)) # differs for -Inf
x <- runif(500, 0, 4); x[100+0:3] <- xI
all.equal(ihs(x), asinh(x)) #==> is.NA value mismatch: asinh() is correct {i.e. better!}
curve(u.log, -2, 20, n=1000); abline(h=0, v=0, col = "gray20", lty = 3)
curve(ihs(x)+1-log(2), add=TRUE, col=adjustcolor(2, 1/2), lwd=2)
curve(ihs(x), add=TRUE, col=adjustcolor(4, 1/2), lwd=2)
## for x >= 0, u.log(x) is nicely between IHS(x) and shifted IHS
## a log10-scale version of asinh() {aka "IHS" }: ihs10(x) := asinh(x/2) / ln(10)
ihs10 <- function(x) asinh(x/2)/log(10)
xyaxis <- function() abline(h=0, v=0, col = "gray20", lty = 3)
leg3 <- function(x = "right")
legend(x, legend = c(quote(ihs10(x) == asinh(x/2)/log(10)),
quote(log[10](1+x)), quote(log[10](x))),
col=c(1,2,5), bty="n", lwd=2)
curve(asinh(x/2)/log(10), -5, 20, n=1000, lwd=2); xyaxis()
curve(log10(1+x), col=2, lwd=2, add=TRUE)
curve(log10( x ), col=5, lwd=2, add=TRUE); leg3()
## zoom out and x-log-scale
curve(asinh(x/2)/log(10), .1, 100, log="x", n=1000); xyaxis()
curve(log10(1+x), col=2, add=TRUE)
curve(log10( x ), col=5, add=TRUE) ; leg3("center")
curve(log10(1+x) - ihs10(x), .1, 1000, col=2, n=1000, log="x", ylim = c(-1,1)*0.10,
main = "absolute difference", xaxt="n"); xyaxis(); eaxis(1, sub10=1)
curve(log10( x ) - ihs10(x), col=4, n=1000, add = TRUE)
curve(abs(1 - ihs10(x) / log10(1+x)), .1, 5000, col=2, log = "xy", ylim = c(6e-9, 2),
main="|relative error| of approx. ihs10(x) := asinh(x/2)/log(10)", n=1000, axes=FALSE)
eaxis(1, sub10=1); eaxis(2, sub10=TRUE)
curve(abs(1 - ihs10(x) / log10(x)), col=4, n=1000, add = TRUE)
legend("left", legend = c(quote(log[10](1+x)), quote(log[10](x))),
col=c(2,4), bty="n", lwd=1)
## Compare with Stahel's version of "started log"
## (here, for *vectors* only, and 'base', as Desctools::LogSt();
## by MM: "modularized" by providing a threshold-computer function separately:
logst_thrWS <- function(x, mult = 1) {
lq <- quantile(x[x > 0], probs = c(0.25, 0.75), na.rm = TRUE, names = FALSE)
if (lq[1] == lq[2])
lq[1] <- lq[2]/2
lq[1]^(1 + mult)/lq[2]^mult
}
logst0L <- function(x, calib = x, threshold = thrFUN(calib, mult=mult),
thrFUN = logst_thrWS, mult = 1, base = 10)
{
## logical index sub-assignment instead of ifelse(): { already in DescTools::LogSt }
res <- x # incl NA's
notNA <- !is.na(sml <- (x < (th <- threshold)))
i1 <- sml & notNA; res[i1] <- log(th, base) + ((x[i1] - th)/(th * log(base)))
i2 <- !sml & notNA; res[i2] <- log(x[i2], base)
attr(res, "threshold") <- th
attr(res, "base") <- base
res
}
logst0 <- function(x, calib = x, threshold = thrFUN(calib, mult=mult),
thrFUN = logst_thrWS, mult = 1, base = 10)
{
## Using pmax.int() instead of logical indexing -- NA's work automatically - even faster
xm <- pmax.int(threshold, x)
res <- log(xm, base) + (x - xm)/(threshold * log(base))
attr(res, "threshold") <- threshold
attr(res, "base") <- base
res
}
## u.log() is really using natural log() -- whereas logst() defaults to base=10
curve(u.log, -4, 10, n=1000); abline(h=0, v=0, col = "gray20", lty = 3); points(-1:1, -1:1, pch=3)
curve(log10(x) + 1, add=TRUE, col=adjustcolor("midnightblue", 1/2), lwd=4, lty=6)
curve(log10(x), add=TRUE, col=adjustcolor("skyblue3", 1/2), lwd=4, lty=7)
curve(logst0(x, threshold= 2 ), add=TRUE, col=adjustcolor("orange",1/2), lwd=2)
curve(logst0(x, threshold= 1 ), add=TRUE, col=adjustcolor(2, 1/2), lwd=2)
curve(logst0(x, threshold= 1/4), add=TRUE, col=adjustcolor(3, 1/2), lwd=2, lty=2)
curve(logst0(x, threshold= 1/8), add=TRUE, col=adjustcolor(4, 1/2), lwd=2, lty=2)