df.util {mt}R Documentation

Summary Utilities

Description

Functions to summarise data.

Usage

df.summ(dat, method=vec.summ,...)
  
vec.summ(x)

vec.summ.1(x)

Arguments

dat

A data frame or matrix of data set.

x

A vector value.

method

Summary method such as vec.summ and vec.summ.1. For user-defined methods, see examples below.

...

Additional parameters to method function.

Value

df.summ returns a summarised data frame.

vec.summ returns an vector of number of variables (exclusing NAs), minimum, mean, median, maximum and standard derivation.

vec.summ.1 returns an vector of number of variables (exclusing NAs), mean, median, 95% confidence interval of median, IQR and standard derivation.

Author(s)

Wanchang Lin

Examples

data(abr1)
dat <- (abr1$pos)[,110:150]
cls <- factor(abr1$fact$class)

## sort out missing value
dat <- mv.zene(dat)

## summary of an individual column
vec.summ(dat[,2])
vec.summ.1(dat[,2])

## summary of data frame
summ   <- df.summ(dat)                       ## default: vec.summ
summ.1 <- df.summ(dat, method=vec.summ.1)

## summary by groups
by(dat, list(cls=cls), df.summ)

## User-defined summary function: 
vec.segment <- function(x, bar=c("SD", "SE", "CI"))
{  
  bar <- match.arg(bar)

  centre <- mean(x, na.rm = TRUE)

  if (bar == "SD") {
    stderr <- sd(x, na.rm = TRUE)        ## Standard derivation (SD)
    lower  <- centre - stderr
    upper  <- centre + stderr
  } else if (bar == "SE") {      ## Standard error(SE) of mean
    stderr <- sd(x, na.rm = TRUE)/sqrt(sum(!is.na(x)))
    ## stderr <- sqrt(var(x, na.rm = TRUE)/length(x[complete.cases(x)]))
    lower  <- centre - stderr
    upper  <- centre + stderr
  } else if (bar == "CI") {      ## Confidence interval (CI), here 95%.
    conf   <- t.test(x)$conf.int
    lower  <- conf[1]
    upper  <- conf[2]
  } else {
    stop("'method' invalid")
  }

  res <- c(lower=lower, centre=centre, upper=upper)
  return(res)
}

## test it
vec.segment(dat[,2])
summ.2 <- df.summ(dat, method=vec.segment, bar="SE")

## ----------------------------------------------------------
#' iris data
df.summ(iris)

#' Group summary
## library(plyr)
## ddply(iris, .(Species), df.summ)
## (tmp <- dlply(iris, .(Species), df.summ, method=vec.segment))
##do.call("rbind", tmp)

#' or you can use summarise to get the group summary for single variable:
## ddply(iris, .(Species), summarise, 
##      mean=mean(Sepal.Length), std=sd(Sepal.Length))


[Package mt version 2.0-1.20 Index]