R: Bootstrap method

BootPst {Pstat}

R Documentation

Bootstrap method

Description

'BootPst' performs a bootstrap resampling procedure with all individuals of the selected populations and calculates Pst values of quantitative measures considered. This function provides a confidence interval or the distribution of Pst.

Usage

BootPst(data,va,opt=0,csh=1,boot=1000,Ri=0,Rp=0,Pw=0,pe=0.95,bars=20)

Arguments

`data`	a dataframe with as many rows as individuals. The first column contains the name of the population to which the individual belongs, the others contain quantitative variables.
`va`	the name (or number) of the quantitative measure considered.
`opt`	if opt=0 all the boot values of Pst are returned, if opt="ci" these ordered values and the confidence interval are returned, and if opt="hist" these ordered values and the distribution histogram of Pst are returned.
`csh`	the value of c/h^2, where c is the assumed additive genetic proportion of differences between populations and where h^2 is (narrow-sense heritability) the assumed additive genetic proportion of differences between individuals within populations.
`boot`	the number of data frames generated to determine the confidence interval or to construct the distribution (with the bootstrap method).
`Ri`	a vector containing each number of individual to be deleted. The vector Ri must contain existent individuals, each of them once.
`Rp`	a vector containing the names of the populations to be deleted.
`Pw`	a vector containing the names of the two populations considered to obtain pairwise Pst.
`pe`	the confidence level of the calculated interval.
`bars`	the maximum number of bars the histogram may have: indeed, on the x-axis, interval $[0,1]$ is divided into $bars$ parts (there may exist unfilled bars).

Value

In any case, the sizes of each population considered. If opt="ci" an ordered vector containing values of Pst and the confidence interval (also a vector). If opt="hist" an ordered vector containing values of Pst and the Pst distribution histogram. Else a single vector containing the boot values of Pst.

Author(s)

Blondeau Da Silva Stephane - Da Silva Anne.

Examples

data(test)
# BootPst(test,va=1)
# BootPst(test,va="QM7",opt="ci",csh=0.8,boot=500,Ri=18)
# BootPst(test,va=11,opt="ci",Ri=c(22,27,195),Rp=c("A","B","E"),pe=0.9)
BootPst(test,va="Body_length",boot=80,opt="hist",bars=50)
# BootPst(test,va=4,opt="hist",Ri=c(3,7:17),Pw=c("C","D"))

## The function is currently defined as
function (data, va, opt = 0, csh = 1, boot = 1000, Ri = 0, Rp = 0, 
    Pw = 0, pe = 0.95, bars = 20) 
{
    nonNa.clm <- function(data, clm) {
        nb.ind = dim(data)[1]
        nb.na = 0
        for (i in 1:nb.ind) if (is.na(data[i, clm])) 
            nb.na = nb.na + 1
        return(nb.ind - nb.na)
    }
    dat.fra.prep <- function(data) {
        nb.var = dim(data)[2] - 1
        data = as.data.frame(data)
        data[, 1] = as.character(data[, 1])
        for (i in 1:nb.var) {
            if (is.numeric(data[, i + 1]) == FALSE) 
                data[, i + 1] = as.numeric(as.character(data[, 
                  i + 1]))
        }
        dat.sta <- function(dat) {
            nb.vari = dim(dat)[2] - 1
            st.dev = rep(0, nb.vari)
            mea = rep(0, nb.vari)
            for (i in 1:nb.vari) {
                nna.clm = nonNa.clm(dat, i + 1)
                st.dev[i] = sqrt((nna.clm - 1)/nna.clm) * sd(dat[, 
                  i + 1], na.rm = TRUE)
                mea[i] = mean(dat[, i + 1], na.rm = TRUE)
            }
            for (j in 1:nb.vari) dat[, j + 1] = (dat[, j + 1] - 
                mea[j])/st.dev[j]
            return(dat)
        }
        data = dat.sta(data)
        return(data)
    }
    dat.rem.ind.pop <- function(data, ind = 0, pop = 0) {
        data = as.data.frame(data)
        dat.rem.ind <- function(dat, ind) {
            nb.rem.ind = length(ind)
            nb.ind = dim(dat)[1]
            for (i in 1:nb.rem.ind) dat = dat[row.names(dat)[1:(nb.ind - 
                i + 1)] != ind[i], ]
            return(dat)
        }
        dat.rem.pop <- function(dat, pop) {
            nb.rem.pop = length(pop)
            for (i in 1:nb.rem.pop) dat = dat[dat[, 1] != pop[i], 
                ]
            return(dat)
        }
        if (ind[1] != 0) 
            data = dat.rem.ind(data, ind)
        if (pop[1] != 0) 
            data = dat.rem.pop(data, pop)
        return(data)
    }
    dat.pw <- function(data, pw = 0) {
        if (pw[1] == 0) 
            return(data)
        else {
            data = data[data[, 1] == pw[1] | data[, 1] == pw[2], 
                ]
            return(data)
        }
    }
    nb.pop <- function(data) {
        data = data[order(data[, 1]), ]
        nb.ind = dim(data)[1]
        nb.pop = 1
        for (i in 1:(nb.ind - 1)) if (data[i, 1] != data[i + 
            1, 1]) 
            nb.pop = nb.pop + 1
        return(nb.pop)
    }
    pop.freq <- function(data) {
        data = data[order(data[, 1]), ]
        nb.ind = dim(data)[1]
        dat.fra = as.data.frame(data)
        nb.pop = 1
        for (i in 1:(nb.ind - 1)) if (data[i, 1] != data[i + 
            1, 1]) 
            nb.pop = nb.pop + 1
        pop.freq.vec = rep(1, nb.pop)
        name = rep(0, nb.pop)
        k = 1
        name[1] = as.character(dat.fra[1, 1])
        for (i in 2:nb.ind) if (dat.fra[i - 1, 1] == dat.fra[i, 
            1]) 
            pop.freq.vec[k] = pop.freq.vec[k] + 1
        else {
            k = k + 1
            name[k] = as.character(dat.fra[i, 1])
        }
        names(pop.freq.vec) = name
        return(pop.freq.vec)
    }
    Pst.val <- function(data, csh = 1) {
        nbpop = nb.pop(data)
        nb.var = dim(data)[2] - 1
        data = data[order(data[, 1]), ]
        if (nbpop == 1) 
            return(rep(0, nb.var))
        else {
            pop.freq = pop.freq(data)
            Pst.clm <- function(dat, clm) {
                mea = mean(dat[, clm], na.rm = TRUE)
                nna.clm = nonNa.clm(dat, clm)
                SSTotal = (nna.clm - 1) * var(dat[, clm], na.rm = TRUE)
                mea.pop = rep(0, nbpop)
                nna.pop.freq = rep(0, nbpop)
                nna.pop.freq[1] = nonNa.clm(dat[1:(pop.freq[1]), 
                  ], clm)
                nb.allna.pop = 0
                if (nna.pop.freq[1] == 0) 
                  nb.allna.pop = 1
                else mea.pop[1] = mean(dat[1:(pop.freq[1]), clm], 
                  na.rm = TRUE)
                for (i in 2:nbpop) {
                  nna.pop.freq[i] = nonNa.clm(dat[(sum(pop.freq[1:(i - 
                    1)]) + 1):(sum(pop.freq[1:i])), ], clm)
                  if (nna.pop.freq[i] != 0) 
                    mea.pop[i] = mean(dat[(sum(pop.freq[1:(i - 
                      1)]) + 1):(sum(pop.freq[1:i])), clm], na.rm = TRUE)
                  else nb.allna.pop = nb.allna.pop + 1
                }
                SSBetween = sum(nna.pop.freq * (mea.pop - mea)^2)
                SSWithin = SSTotal - SSBetween
                if ((nna.clm - nbpop + nb.allna.pop) * (nbpop - 
                  nb.allna.pop - 1) != 0) {
                  MSWithin = SSWithin/(nna.clm - nbpop + nb.allna.pop)
                  MSBetween = SSBetween/(nbpop - nb.allna.pop - 
                    1)
                  return(csh * MSBetween/(csh * MSBetween + 2 * 
                    MSWithin))
                }
                else {
                  if ((nna.clm - nbpop + nb.allna.pop) == 0) 
                    return(1)
                  else return(0)
                }
            }
            pst.val = rep(0, nb.var)
            for (j in 1:nb.var) pst.val[j] = Pst.clm(data, j + 
                1)
            return(pst.val)
        }
    }
    boot.pst.va <- function(data, csh, boot, clm) {
        nb.ind = dim(data)[1]
        dat = data[, c(1, clm)]
        boot.val = rep(0, boot)
        for (i in 1:boot) {
            da = dat[sample(1:nb.ind, nb.ind, T), ]
            boot.val[i] = Pst.val(da, csh)
        }
        return(boot.val)
    }
    ConInt.pst.va <- function(data, csh, boot, clm, per) {
        boot.pst.val = boot.pst.va(data = data, csh = csh, boot = boot, 
            clm = clm)
        boot.pst.val = sort(boot.pst.val)
        print(c(boot.pst.val[floor(boot * (1 - per)/2 + 1)], 
            boot.pst.val[ceiling(boot * (per + 1)/2)]))
        return(boot.pst.val)
    }
    dis.pst.va <- function(data, csh, boot, clm, bars) {
        psts.val = boot.pst.va(data = data, csh = csh, boot = boot, 
            clm = clm)
        hist(psts.val, breaks = c(0:bars)/bars, xlab = "Pst", 
            ylab = "Frequency", main = c("Pst distribution:", 
                names(data)[clm]), col = "gray88")
        return(sort(psts.val))
    }
    for (i in 2:dim(data)[2]) {
        if (names(data)[i] == va) 
            va = i - 1
    }
    if (is.numeric(va) == FALSE) 
        return("va value does not exist!")
    data = dat.fra.prep(data)
    data = dat.rem.ind.pop(data, ind = Ri, pop = Rp)
    data = dat.pw(data, pw = Pw)
    print("The studied quantitative variable is:")
    print(names(data)[va + 1])
    print("Populations sizes are:")
    print(pop.freq(data))
    if (opt != "ci" & opt != "hist") {
        print(paste(boot, "bootstrap values:"))
        return(boot.pst.va(data, csh = csh, boot = boot, clm = va + 
            1))
    }
    if (opt == "ci") {
        print(paste(100 * pe, "% confidence interval determined by", 
            boot, "bootstrap values:"))
        return(ConInt.pst.va(data, csh = csh, boot = boot, clm = va + 
            1, per = pe))
    }
    if (opt == "hist") {
        print(paste(boot, "bootstrap values and", "Pst distribution:"))
        dev.new()
        dis.pst.va(data = data, csh = csh, boot = boot, clm = va + 
            1, bars = bars)
    }
  }

[Package Pstat version 1.2 Index]