PeakSegJointFasterOne {PeakSegJoint} | R Documentation |
PeakSegJointFasterOne
Description
Run the PeakSegJointFaster
heuristic optimization algorithm, which
gives an approximate solution to a multi-sample Poisson maximum
likelihood segmentation problem. Given S samples, this function
computes a sequence of S+1 PeakSegJoint models, with 0, ..., S
samples with an overlapping peak (maximum of one peak per
sample).
Usage
PeakSegJointFasterOne(profiles,
bin.factor = 2L)
Arguments
profiles |
List of data.frames with columns chromStart, chromEnd, count, or single data.frame with additional column sample.id. |
bin.factor |
Size of bin pyramid. Bigger values result in slower computation. |
Value
List of model fit results, see examples to see how to use it.
Author(s)
Toby Dylan Hocking
Examples
library(PeakSegJoint)
data(H3K36me3.TDH.other.chunk1, envir=environment())
some.counts <- subset(
H3K36me3.TDH.other.chunk1$counts,
43000000 < chromEnd &
chromStart < 43200000 &
sample.id %in% c("McGill0023", "McGill0022", "McGill0016", "McGill0013"))
id.df <- unique(some.counts[, c("cell.type", "sample.id")])
group.list <- split(paste(id.df$sample.id), id.df$cell.type, drop=TRUE)
loss.df.list <- list()
fit.list <- list()
for(bin.factor in 2:7){
fit.fast <- PeakSegJointFasterOne(some.counts, bin.factor)
fit.fast$min.loss <- sum(fit.fast$peak_loss_vec)
fit.fast$sample.loss.diff.vec <- sort(with(fit.fast, structure(
peak_loss_vec-flat_loss_vec, names=sample.id)))
fit.fast$group.loss.diff.vec <- sort(sapply(group.list, function(sid.vec){
sum(fit.fast$sample.loss.diff.vec[sid.vec])
}))
fit.fast$sample.loss.vec <- with(fit.fast, structure(
sum(flat_loss_vec)+cumsum(c(0, sample.loss.diff.vec)),
names=paste0(0:length(sample.loss.diff.vec), "samples")))
fit.fast$group.loss.vec <- with(fit.fast, structure(
sum(flat_loss_vec)+cumsum(c(0, group.loss.diff.vec)),
names=paste0(0:length(group.loss.diff.vec), "groups")))
loss.df.list[[paste(bin.factor)]] <- with(fit.fast, data.frame(
bin.factor,
loss=sample.loss.vec,
peaks=0:length(sample.loss.diff.vec)))
fit.list[[paste(bin.factor)]] <- fit.fast
}
loss.df <- do.call(rbind, loss.df.list)
fit.best <- fit.list[[which.min(sapply(fit.list, "[[", "min.loss"))]]
norm.list <- list()
profile.list <- split(some.counts, some.counts$sample.id, drop=TRUE)
for(sample.id in names(profile.list)){
one <- profile.list[[sample.id]]
max.count <- max(one$count)
one$count.norm <- one$count/max.count
norm.list[[sample.id]] <- one
}
norm.df <- do.call(rbind, norm.list)
if(interactive() && require(ggplot2)){
peaks.df.list <- list()
for(n.samples in 1:length(fit.best$sample.loss.diff.vec)){
peaks.df.list[[paste(n.samples)]] <- with(fit.best, data.frame(
samples=n.samples,
sample.id=names(sample.loss.diff.vec)[1:n.samples],
chromStart=peak_start_end[1],
chromEnd=peak_start_end[2]))
}
peaks <- do.call(rbind, peaks.df.list)
best.peaks <- transform(peaks, y=samples*-0.1, what="peaks")
ggplot()+
ggtitle("model for each sample")+
scale_color_manual(values=c(data="grey50",
peaks="deepskyblue",
bins="black", segments="green"))+
geom_step(aes(chromStart/1e3, count.norm, color=what),
data=data.frame(norm.df, what="data"))+
geom_segment(aes(chromStart/1e3, y,
xend=chromEnd/1e3, yend=y,
color=what),
size=1,
data=best.peaks)+
geom_text(aes(chromStart/1e3, y,
label=paste0(samples, " sample",
ifelse(samples==1, "", "s"), " "),
color=what),
hjust=1,
size=3,
vjust=0.5,
data=best.peaks)+
theme_bw()+
theme(panel.margin=grid::unit(0, "cm"))+
facet_grid(sample.id ~ ., scales="free")
## same thing but for each group.
peaks.df.list <- list()
for(n.groups in 1:length(fit.best$group.loss.diff.vec)){
group.vec <- names(fit.best$group.loss.diff.vec[1:n.groups])
meta.df <- do.call(rbind, lapply(group.vec, function(cell.type){
data.frame(cell.type, sample.id=group.list[[cell.type]])
}))
peaks.df.list[[paste(n.groups)]] <- with(fit.best, data.frame(
groups=n.groups,
meta.df,
chromStart=peak_start_end[1],
chromEnd=peak_start_end[2]))
}
peaks <- do.call(rbind, peaks.df.list)
best.peaks <- transform(peaks, y=groups*-0.1, what="peaks")
ggplot()+
ggtitle("model for each group")+
scale_color_manual(values=c(data="grey50",
peaks="deepskyblue",
bins="black", segments="green"))+
geom_step(aes(chromStart/1e3, count.norm, color=what),
data=data.frame(norm.df, what="data"))+
geom_segment(aes(chromStart/1e3, y,
xend=chromEnd/1e3, yend=y,
color=what),
size=1,
data=best.peaks)+
geom_text(aes(chromStart/1e3, y,
label=paste0(groups, " group",
ifelse(groups==1, "", "s"), " "),
color=what),
hjust=1,
size=3,
vjust=0.5,
data=best.peaks)+
theme_bw()+
theme(panel.margin=grid::unit(0, "cm"))+
facet_grid(sample.id + cell.type ~ ., scales="free")
min.df <- subset(loss.df, peaks==max(peaks))
ggplot()+
geom_line(aes(peaks, loss, group=bin.factor), data=loss.df)+
geom_text(aes(peaks, loss, label=bin.factor), data=min.df, hjust=0)
if(require(microbenchmark)){
N.samples.vec <- 10^seq(1, 3, by=0.5)
max.N <- max(N.samples.vec)
N.bases <- 10
rmat <- function(Nr, Nc, mu){
matrix(rpois(Nr*Nc, mu), Nr, Nc)
}
set.seed(1)
big.mat <- cbind(
rmat(max.N, N.bases, 5),
rmat(max.N, N.bases, 10),
rmat(max.N, N.bases, 5))
big.df <- data.frame(
sample.id=as.integer(row(big.mat)),
chromStart=as.integer(col(big.mat)-1),
chromEnd=as.integer(col(big.mat)),
count=as.integer(big.mat))
full.list <- ProfileList(big.df)
time.df.list <- list()
for(N.samples in N.samples.vec){
partial.list <- full.list[1:N.samples]
result <- microbenchmark(
Heuristic=PeakSegJointHeuristicStep2(partial.list, 2L),
Faster=PeakSegJointFasterOne(partial.list, 2L),
times=2L)
time.df.list[[paste(N.samples)]] <- data.frame(
N.samples,
result)
}
time.df <- do.call(rbind, time.df.list)
ggplot()+
geom_point(aes(
N.samples, time/1e9, color=expr),
data=time.df)+
scale_x_log10()+
scale_y_log10("seconds")
}
}
[Package PeakSegJoint version 2024.6.27 Index]