bodyfat {ipsRdbs}R Documentation

Body fat percentage data for 102 elite male athletes training at the Australian Institute of Sport.

Description

Body fat percentage data for 102 elite male athletes training at the Australian Institute of Sport.

Usage

bodyfat

Format

A data frame with 102 rows and two columns:

Skinfold

Skin-fold thicknesses measured using calipers

Bodyfat

Percentage of fat content in the body

Source

Data collected by Dr R. Telford who was working for the Australian Institute of Sport (AIS)

Examples

 summary(bodyfat)
plot(bodyfat$Skinfold,  bodyfat$Bodyfat, xlab="Skin", ylab="Fat")
plot(bodyfat$Skinfold,  log(bodyfat$Bodyfat), xlab="Skin", ylab="log Fat")
plot(log(bodyfat$Skinfold),  log(bodyfat$Bodyfat), xlab="log Skin", ylab="log Fat")
old.par <- par(no.readonly = TRUE)
# par(mfrow=c(2,2)) # draws four plots in a graph
plot(bodyfat$Skinfold,  bodyfat$Bodyfat, xlab="Skin", ylab="Fat")
plot(bodyfat$Skinfold,  log(bodyfat$Bodyfat), xlab="Skin", ylab="log Fat")
plot(log(bodyfat$Skinfold),  log(bodyfat$Bodyfat), xlab="log Skin", ylab="log Fat")
plot(1:5, 1:5, axes=FALSE, xlab="", ylab="",  type="n")
text(2, 2, "Log both X and Y")
text(2, 1, "To have the best plot")
# Keep the transformed variables in the data set 
bodyfat$logskin <- log(bodyfat$Skinfold)
bodyfat$logbfat <- log(bodyfat$Bodyfat)
bodyfat$logskin <- log(bodyfat$Skinfold)
par(old.par)
 # Create a grouped variable 
bodyfat$cutskin <- cut(log(bodyfat$Skinfold), breaks=6) 
boxplot(data=bodyfat, Bodyfat~cutskin, col=2:7)
head(bodyfat)
require(ggplot2)
p2 <- ggplot(data=bodyfat, aes(x=cutskin, y=logbfat)) + 
geom_boxplot(col=2:7) + 
stat_summary(fun=mean, geom="line", aes(group=1), col="blue", linewidth=1) +
labs(x="Skinfold", y="Percentage of log bodyfat", 
title="Boxplot of log-bodyfat percentage vs grouped log-skinfold")  
plot(p2)

n <- nrow(bodyfat)
x <- bodyfat$logskin
y <- bodyfat$logbfat
xbar <- mean(x)
ybar <- mean(y)
sx2 <- var(x)
sy2 <- var(y)
sxy <- cov(x, y)
r <- cor(x, y)
print(list(n=n, xbar=xbar, ybar=ybar, sx2=sx2, sy2=sy2, sxy=sxy, r=r))
hatbeta1 <- r * sqrt(sy2/sx2) # calculates estimate of the slope
hatbeta0 <- ybar - hatbeta1 * xbar # calculates estimate of the intercept
rs <-  y - hatbeta0 - hatbeta1 * x  # calculates residuals
s2 <- sum(rs^2)/(n-2)  # calculates estimate of sigma2
s2
bfat.lm <- lm(logbfat ~ logskin, data=bodyfat)
### Check the diagnostics 
plot(bfat.lm$fit, bfat.lm$res, xlab="Fitted values", ylab = "Residuals")
abline(h=0)
### Should be a random scatter
qqnorm(bfat.lm$res, col=2)
qqline(bfat.lm$res, col="blue")

# All Points should be on the straight line 
summary(bfat.lm)
anova(bfat.lm)
plot(bodyfat$logskin,  bodyfat$logbfat, xlab="log Skin", ylab="log Fat")
abline(bfat.lm, col=7)
title("Scatter plot with the fitted Linear Regression line")
# 95% CI for beta(1)
# 0.88225 + c(-1, 1) * qt(0.975, df=100) *  0.02479 
# round(0.88225 + c(-1, 1) * qt(0.975, df=100) *  0.02479, 2)
# To test H0: beta1 = 1. 
tstat <- (0.88225 -1)/0.02479 
pval <- 2 * (1- pt(abs(tstat), df=100))
x <- seq(from=-5, to=5, length=500)
y <- dt(x, df=100)
plot(x, y,  xlab="", ylab="", type="l")
title("T-density with df=100")
abline(v=abs(tstat))
abline(h=0)
x1 <- seq(from=abs(tstat), to=10, length=100)
y1 <- rep(0, length=100)
x2 <- x1
y2 <- dt(x1, df=100)
segments(x1, y1, x2, y2)
abline(h=0)
# Predict at a new value of Skinfold=70
# Create a new data set called new
newx <- data.frame(logskin=log(70))
a <- predict(bfat.lm, newdata=newx, se.fit=TRUE) 
# Confidence interval for the mean of log bodyfat  at skinfold=70
a <- predict(bfat.lm, newdata=newx, interval="confidence") 
# a
#          fit      lwr     upr
# [1,] 2.498339 2.474198 2.52248
# Prediction interval for a future log bodyfat  at skinfold=70
a <- predict(bfat.lm, newdata=newx, interval="prediction") 
a
#          fit      lwr      upr
# [1,] 2.498339 2.333783 2.662895
#prediction intervals for the mean 
pred.bfat.clim <- predict(bfat.lm, data=bodyfat, interval="confidence")
#prediction intervals for future observation
pred.bfat.plim <- suppressWarnings(predict(bfat.lm, data=bodyfat, interval="prediction"))
plot(bodyfat$logskin,  bodyfat$logbfat, xlab="log Skin", ylab="log Fat")
abline(bfat.lm, col=5)
lines(log(bodyfat$Skinfold), pred.bfat.clim[,2], lty=2, col=2) 
lines(log(bodyfat$Skinfold), pred.bfat.clim[,3], lty=2, col=2) 
lines(log(bodyfat$Skinfold), pred.bfat.plim[,2], lty=4, col=3) 
lines(log(bodyfat$Skinfold), pred.bfat.plim[,3], lty=4, col=3) 
title("Scatter plot with the fitted line and prediction intervals")
symb <- c("Fitted line", "95% CI for mean", "95% CI for observation")
## legend(locator(1), legend = symb, lty = c(1, 2, 4), col = c(5, 2, 3))
# Shows where we predicted earlier 
abline(v=log(70))
summary(bfat.lm)
anova(bfat.lm)

[Package ipsRdbs version 1.0.0 Index]