df.util {mt} | R Documentation |
Summary Utilities
Description
Functions to summarise data.
Usage
df.summ(dat, method=vec.summ,...)
vec.summ(x)
vec.summ.1(x)
Arguments
dat |
A data frame or matrix of data set. |
x |
A vector value. |
method |
Summary method such as |
... |
Additional parameters to |
Value
df.summ
returns a summarised data frame.
vec.summ
returns an vector of number of variables (exclusing NAs),
minimum, mean, median, maximum and standard derivation.
vec.summ.1
returns an vector of number of variables (exclusing NAs),
mean, median, 95% confidence interval of median, IQR and standard derivation.
Author(s)
Wanchang Lin
Examples
data(abr1)
dat <- (abr1$pos)[,110:150]
cls <- factor(abr1$fact$class)
## sort out missing value
dat <- mv.zene(dat)
## summary of an individual column
vec.summ(dat[,2])
vec.summ.1(dat[,2])
## summary of data frame
summ <- df.summ(dat) ## default: vec.summ
summ.1 <- df.summ(dat, method=vec.summ.1)
## summary by groups
by(dat, list(cls=cls), df.summ)
## User-defined summary function:
vec.segment <- function(x, bar=c("SD", "SE", "CI"))
{
bar <- match.arg(bar)
centre <- mean(x, na.rm = TRUE)
if (bar == "SD") {
stderr <- sd(x, na.rm = TRUE) ## Standard derivation (SD)
lower <- centre - stderr
upper <- centre + stderr
} else if (bar == "SE") { ## Standard error(SE) of mean
stderr <- sd(x, na.rm = TRUE)/sqrt(sum(!is.na(x)))
## stderr <- sqrt(var(x, na.rm = TRUE)/length(x[complete.cases(x)]))
lower <- centre - stderr
upper <- centre + stderr
} else if (bar == "CI") { ## Confidence interval (CI), here 95%.
conf <- t.test(x)$conf.int
lower <- conf[1]
upper <- conf[2]
} else {
stop("'method' invalid")
}
res <- c(lower=lower, centre=centre, upper=upper)
return(res)
}
## test it
vec.segment(dat[,2])
summ.2 <- df.summ(dat, method=vec.segment, bar="SE")
## ----------------------------------------------------------
#' iris data
df.summ(iris)
#' Group summary
## library(plyr)
## ddply(iris, .(Species), df.summ)
## (tmp <- dlply(iris, .(Species), df.summ, method=vec.segment))
##do.call("rbind", tmp)
#' or you can use summarise to get the group summary for single variable:
## ddply(iris, .(Species), summarise,
## mean=mean(Sepal.Length), std=sd(Sepal.Length))
[Package mt version 2.0-1.20 Index]