quantileattr <- function(dat = rates, quantiles = quantilenum){
absquantiles <- quantile(x = dat$absrate, probs = seq(0, 1, 1/quantiles))
relquantiles <- quantile(x = dat$relrate, probs = seq(0, 1, 1/quantiles))
absquantiles <- as.vector(absquantiles)
relquantiles <- as.vector(relquantiles)
absdat <- dat[c('gene_id', 'absrate')]
reldat <- dat[c('gene_id', 'relrate')]
absdat$absq <- 'quantile1'
reldat$relq <- 'quantile1'
i <- 2
for(i in 2:length(absquantiles)){
quantilename <- paste0('quantile', (i - 1))
absdat$absq[absdat$absrate > absquantiles[i - 1] & absdat$absrate <= absquantiles[i]] <- quantilename
reldat$relq[reldat$relrate > relquantiles[i - 1] & reldat$relrate <= relquantiles[i]] <- quantilename
}
absdat <- absdat[order(-absdat$absrate),]
reldat <- reldat[order(-reldat$relrate),]
row.names(absdat) <- 1:nrow(absdat)
row.names(reldat) <- 1:nrow(reldat)
resdat <- list(absres = absdat, relres = reldat)
}
fishermean <- function(line = line){
a11 <- line[1]
a12 <- line[2]
a21 <- a22 <- mean(line)
mat <- matrix(c(a11, a12, a21, a22), nrow = 2, byrow = TRUE)
mat <- round(mat)
fisherres <- fisher.test(mat)
fisherp <- fisherres$p.value
return(fisherp)
}
wilcoxcomp <- function(idx, mat1 = dat1, mat2 = dat2){
wilcoxres <- wilcox.test(unlist(mat1[idx,]), unlist(mat2[idx,]))
wilcoxp <- wilcoxres$p.value
return(wilcoxp)
}
limmacomp <- function(mat1 = dat1, mat2 = dat2){
mat <- cbind(mat1, mat2)
#mat <- log2(mat)
pd <- data.frame(group = colnames(mat), stringsAsFactors = FALSE)
row.names(pd) <- colnames(mat)
pd$group <- c(rep('group1', ncol(mat1)), rep('group2', ncol(mat2)))
#library(limma)
designstr <- '~ group'
designstr <- as.formula(designstr)
design <- model.matrix(designstr, data = pd)
fit1 <- limma::lmFit(mat, design)
fit2 <- limma::eBayes(fit1)
allg.limma <- limma::topTable(fit2, coef = 2, n = dim(fit1)[1])
return(allg.limma)
}
#'Divide genes into different quantile groups according to their rates
#'
#'Divide genes into different quantile groups according to their rates
#'inferred from the \code{calrate} function, or \code{mcalrate} function.
#'
#'@param inferres The result list generated by the \code{calrate} function or
#' \code{mcalrate} function.
#'@param targetgenes Which genes need to be divided into quantile groups. If
#' it is NULL (The default value), all genes provided by the parameter
#' \code{inferres} will be analyzed. If its value is 'significant', only
#' genes judged as significant genes by \code{calrate} or \code{mcalrate}
#' will be analyzed. While the the value of this parameter can also be a
#' vector with gene symbols as elements and only genes covered by this
#' vector will be analyzed.
#'@param quantilenum How many quantile groups need to be divided. Default
#' value is 4.
#'@return A list with 2 data.frames as elements. One of them records the
#' absolute elongation rates of the genes (bp/min) and divide the genes into
#' several quantile groups with genes in quantile1 group having the lowest
#' rates. The other data.frame records the relative elongation rates of the
#' genes (percent of gene body/min) and divide them into quantile groups. If
#' the value of parameter \code{inferres} is from \code{mcalrate}, the
#' absolute and relative rates of the genes are the mean of all the Pro-seq
#' or Gro-seq pairs.
#'@export
intracompare <- function(inferres, targetgenes = NULL, quantilenum = 4){
if(!('report' %in% names(inferres))){
i <- 1
for(i in 1:length(inferres)){
subres <- inferres[[i]]$report
if(!is.null(targetgenes)){
if(targetgenes == 'significant'){
subres <- subset(subres, significance == 'significant')
}else{
subres <- subset(subres, gene_id %in% targetgenes)
}
}
subgenes <- subres$gene_id
if(i == 1){
finalgenes <- subgenes
}else{
finalgenes <- intersect(finalgenes, subgenes)
}
}
j <- 1
for(j in 1:length(inferres)){
subres <- inferres[[j]]$report
subres <- subres[match(finalgenes, subres$gene_id),]
subres$relrate <- subres$rate/(abs(subres$end - subres$start) + 1)
subres <- subres[c('gene_id', 'rate', 'relrate')]
names(subres) <- c('gene_id', paste0('absrate_', j), paste0('relrate_', j))
subabs <- subres[,c(1, 2)]
subrel <- subres[,c(1, 3)]
if(j == 1){
absrates <- subabs
relrates <- subrel
}else{
absrates <- cbind(absrates, subabs[2])
relrates <- cbind(relrates, subrel[2])
}
}
absrates <- data.frame(gene_id = absrates$gene_id, absrate = rowMeans(absrates[-1]),
stringsAsFactors = FALSE)
relrates <- data.frame(gene_id = relrates$gene_id, relrate = rowMeans(relrates[-1]),
stringsAsFactors = FALSE)
rates <- cbind(absrates, relrates[-1])
row.names(rates) <- 1:nrow(rates)
}else{
subres <- inferres$report
if(!is.null(targetgenes)){
if(targetgenes == 'significant'){
subres <- subset(subres, significance == 'significant')
}else{
subres <- subset(subres, gene_id %in% targetgenes)
}
}
subres$relrate <- subres$rate/(abs(subres$end - subres$start) + 1)
subres <- subres[c('gene_id', 'rate', 'relrate')]
names(subres) <- c('gene_id', 'absrate', 'relrate')
rates <- subres
row.names(rates) <- 1:nrow(rates)
}
res <- quantileattr(dat = rates, quantiles = quantilenum)
names(res) <- c('absolute_rate', 'relative_rate')
return(res)
}
#'Organize the results from the function \code{mcalrate} to a data.frame
#'
#'Organize the results calculated by the function \code{mcalrate} to a
#'data.frame with the 1st column as gene symbols and the other columns as gene
#'elongation rates inferred for each Pro-seq or Gro-seq pair. This data.frame
#'can be used as input for the function \code{intercompare}.
#'
#'@param reslist The result list generated by the function \code{mcalrate}.
#'@param targetgenes Which genes need to be included in the output of this
#' function. If it is NULL (The default value), all genes provided by the
#' parameter \code{reslist} will be analyzed. If its value is 'significant',
#' only genes judged as significant genes by \code{mcalrate} will be
#' included. While the the value of this parameter can also be a vector with
#' gene symbols as elements and only genes covered by this vector will be
#' included.
#'@return A data.frame with the 1st column as gene symbols and the other
#' columns as gene elongation rates inferred for each Pro-seq or Gro-seq
#' pair. This data.frame can be used as input for the function
#' \code{intercompare}.
#'@export
orgintercompareinput <- function(reslist, targetgenes = NULL){
i <- 1
for(i in 1:length(reslist)){
res <- reslist[[i]]
res <- res$report
if(!is.null(targetgenes)){
if(targetgenes == 'significant'){
res <- subset(res, significance == 'significant')
}else{
res <- subset(res, gene_id %in% targetgenes)
}
}
res <- res[c('gene_id', 'rate')]
gene_id <- res$gene_id
if(i == 1){
gene_ids <- gene_id
res <- res[match(gene_ids, res$gene_id),]
reses <- res
}else{
gene_ids <- intersect(gene_ids, gene_id)
res <- res[match(gene_ids, res$gene_id),]
reses <- reses[match(gene_ids, reses$gene_id),]
reses <- cbind(reses, res[-1])
}
}
names(reses) <- c('gene_id', paste0('rate_', seq(1, (ncol(reses) - 1), 1)))
return(reses)
}
#'Compare gene elongation rates between 2 different conditions
#'
#'Compare gene elongation rate difference for each gene between 2 different
#'conditions. For each condition, if the number of replicates is < 3, Fisher's
#'test will be used to calculate the p-values and adjusted p-values of the
#'rate differences, while if the number of replicates is >= 3, Wilcox test and
#'limma will be used to calculate 2 sets of p-values and adjusted p-values
#'(Wilcox test based and limma based).
#'
#'@param inferresmat A data.frame recording the elongation rates for the genes
#' in different conditions. Can be generated using the function
#'\code{orgintercompareinput}.
#'@param groupnames A vector with the elements corresponding to the condition
#' names of the rate columns in the data.frame indicated by the parameter
#' \code{inferresmat}.
#'@return A data.frame with one row for one gene and the columns are the
#' average transcription elongation rates for each condition (log2
#' transformed), the log2FC, the p-value and adjusted p-value (adjusted by
#' Benjamini & Hochberg method) of the difference.
#'@export
intercompare <- function(inferresmat, groupnames){
groups <- unique(groupnames)
if(length(groups) != 2){
print('Currently only comparison between 2 groups is supported, so the data should come from 2 groups')
return(NULL)
}
groupidces <- paste0(groupnames, '.', seq(1, length(groupnames), 1))
names(inferresmat) <- c('gene_id', groupidces)
dat1 <- groupidces[groupnames == groups[1]]
dat2 <- groupidces[groupnames == groups[2]]
dat1 <- inferresmat[c('gene_id', dat1)]
dat2 <- inferresmat[c('gene_id', dat2)]
colnames(dat1) <- c('gene_id', paste0(groups[1], '.', 1:(ncol(dat1) - 1)))
colnames(dat2) <- c('gene_id', paste0(groups[2], '.', 1:(ncol(dat2) - 1)))
row.names(dat1) <- dat1$gene_id
row.names(dat2) <- dat2$gene_id
dat1 <- dat1[2:ncol(dat1)]
dat2 <- dat2[2:ncol(dat2)]
dat1 <- log2(dat1)
dat2 <- log2(dat2)
if(ncol(dat1) < 3 & ncol(dat2) < 3){
dat1 <- rowMeans(dat1)
dat2 <- rowMeans(dat2)
dat <- cbind(dat1, dat2)
pvals <- apply(X = dat, MARGIN = 1, FUN = fishermean)
padjs <- p.adjust(pvals, method = 'BH')
log2fcs <- dat2 - dat1
group1rates <- dat1
group2rates <- dat2
gene_ids <- names(dat1)
}else{
limmares <- limmacomp(mat1 = dat1, mat2 = dat2)
limmapvals <- limmares[c('P.Value', 'adj.P.Val')]
names(limmapvals) <- c('pval(limma)', 'padj(limma)')
limmapvals$gene_id <- row.names(limmares)
testlist <- as.list(seq(1, nrow(dat1), 1))
pvals <- lapply(X = testlist, FUN = wilcoxcomp, mat1 = dat1, mat2 = dat2)
names(pvals) <- row.names(dat1)
pvals <- unlist(pvals)
padjs <- p.adjust(pvals, method = 'BH')
log2fcs <- rowMeans(dat2) - rowMeans(dat1)
group1rates <- rowMeans(dat1)
group2rates <- rowMeans(dat2)
gene_ids <- row.names(dat1)
}
res <- data.frame(gene_id = gene_ids, group1rate = group1rates, group2rate = group2rates,
pval = pvals, padj = padjs, log2FC = log2fcs, stringsAsFactors = FALSE)
names(res) <- c('gene_id', paste0(groups[1], '.rate'), paste0(groups[2], '.rate'),
'pval', 'padj', paste0('log2FC(', groups[2], '/', groups[1], ')'))
row.names(res) <- 1:nrow(res)
if(exists('limmapvals')){
res <- merge(res, limmapvals, by = c('gene_id'))
}
res <- res[order(res$padj, res$pval, -abs(res[,6])),]
row.names(res) <- 1:nrow(res)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.