| simphony {simphony} | R Documentation | 
Simulate feature abundance data
Description
Simulate experiments in which abundances of rhythmic and non-rhythmic features are measured at multiple timepoints in one or more conditions.
Usage
simphony(
  featureGroupsList,
  fracFeatures = NULL,
  nFeatures = 10,
  timepointsType = c("auto", "specified", "random"),
  timeRange = c(0, 48),
  interval = 2,
  nReps = 1,
  timepoints = NULL,
  nSamplesPerCond = NULL,
  rhyFunc = sin,
  dispFunc = NULL,
  logOdds = FALSE,
  family = c("gaussian", "negbinom", "bernoulli", "poisson")
)
Arguments
| featureGroupsList | 
 
 | 
| fracFeatures | Fraction of simulated features to allocate to each group.
Defaults to 1/(number of groups). Only used if the first
 | 
| nFeatures | Integer for the total number of features to simulate. | 
| timepointsType | Character string for how to set the timepoints for the simulation. Must be 'auto' (default), 'specified', or 'random'. | 
| timeRange | Numeric vector for the range of timepoints to use for the
simulation. Defaults to c(0, 48). Only used if  | 
| interval | Number for the amount of time between consecutive
timepoints, in the same units as  | 
| nReps | Integer for the number of replicates per timepoint. Only used
if  | 
| timepoints | Numeric vector of exact timepoints to simulate, including
any replicates. Only used if  | 
| nSamplesPerCond | Integer for the number of samples per condition,
which will be randomly uniformly spaced between 0 and  | 
| rhyFunc | Function to generate rhythmic abundance. Must have a period
of  | 
| dispFunc | Function to calculate dispersion of sampled abundance
values, given expected abundance in counts. Defaults to  | 
| logOdds | Logical for whether the rhythmic function corresponds to
log-odds. Only used if  | 
| family | Character string for the family of distributions from which to
sample the abundance values.  | 
Value
List with the following elements:
- abundData
- Matrix of abundance values (counts, if - familyis 'negbinom'), with features as rownames and samples as colnames.
- sampleMetadata
- data.tablewith one row per sample.
- featureMetadata
- data.tablewith one row per feature per condition. Columns- ampand- baseare functions of time. Columns- amp0and- base0are numeric and correspond to the amplitude and baseline abundance at time 0, respectively.
- experMetadata
- List of arguments that were passed to - simphony.
See Also
defaultDispFunc(), getExpectedAbund(), getSampledAbund(),
mergeSimData()
Examples
library('data.table')
# Simulate data for features having one of three sets of rhythmic parameters.
featureGroups = data.table(amp = c(0, 1, 1), phase = c(0, 0, 6),
                           rhyFunc = c(cos, cos, sin))
simData = simphony(featureGroups)
# Simulate data for an experiment with specified timepoints and replicates.
featureGroups = data.table(amp = c(0, 1))
simData = simphony(featureGroups, timepointsType = 'specified',
                   timepoints = c(0, 2, 2, 4, 12, 16, 21))
# Simulate data for an experiment with random timepoints between 0 and 24.
featureGroups = data.table(amp = c(0, 2))
simData = simphony(featureGroups, timepointsType = 'random',
                   timeRange = c(0, 24), nSamplesPerCond = 20)
# Simulate data with time-dependent rhythm amplitude or baseline abundance
featureGroups = data.table(amp = c(function(x) 1, function(x) 2^(-x / 24)),
                           base = c(function(x) x / 12, function(x) 0))
simData = simphony(featureGroups)
# Simulate data for features whose rhythmicity varies between two conditions.
featureGroupsList = list(
  data.table(amp = c(1, 2, 2), phase = c(0, -3, 0), period = c(24, 24, 22)),
  data.table(amp = c(3, 2, 2), phase = c(0, 3, 0), period = c(24, 24, 26)))
simData = simphony(featureGroupsList)
# Simulate data from a negative binomial distribution with a higher variance.
featureGroups = data.table(amp = 1, base = 6:8)
dispFunc = function(x) 3 * defaultDispFunc(x)
simData = simphony(featureGroups, family = 'negbinom', dispFunc = dispFunc)
# Simulate data at high temporal resolution from a Poisson distribution that
# alternates between two states.
featureGroups = data.table(amp = 1, base = 0,
                           rhyFunc = function(x) ifelse(x %% (2 * pi) < pi, 0.5, 4))
simData = simphony(featureGroups, timeRange = c(0, 24 * 4), interval = 0.1,
                   nReps = 1, family = 'poisson')
# Simulate data for 100 features, half non-rhythmic and half rhythmic, with
# amplitudes for rhythmic features sampled from a log-normal distribution.
nFeatures = 100
rhyFrac = 0.5
nRhyFeatures = round(rhyFrac * nFeatures)
rhyAmps = exp(rnorm(nRhyFeatures, mean = 0, sd = 0.25))
fracFeatures = c(1 - rhyFrac, rep(rhyFrac / nRhyFeatures, nRhyFeatures))
featureGroups = data.table(amp = c(0, rhyAmps), fracFeatures = fracFeatures)
simData = simphony(featureGroups, nFeatures = nFeatures)
# Simulate data for 100 rhythmic features, with baseline log2 expected counts
# and residual log dispersion sampled from distributions whose parameters
# were estimated, using DESeq2 and fitdistrplus, from circadian RNA-seq data
# from mouse liver (PRJNA297287).
nFeatures = 100
baseLog2Counts = rnorm(nFeatures, mean = 8.63, sd = 2.73)
dispFactors = exp(rnorm(nFeatures, sd = 0.819))
dispFuncs = sapply(dispFactors, function(z) {function(x) defaultDispFunc(x) * z})
featureGroups = data.table(base = baseLog2Counts, dispFunc = dispFuncs, amp = 1)
simData = simphony(featureGroups, nFeatures = nFeatures, family = 'negbinom')