#' These functions are internal function and will not be exported
#'
#' @rdname InternalFunc
#' @name ReadSTARSJ
#'
#' @importFrom data.table fread
#' @importFrom data.table :=
#' @importFrom data.table setnames
#' @importFrom GenomicRanges GRanges
#' @importFrom IRanges IRanges
ReadSTARSJ <- function(file, uniqMapOnly = TRUE, minSJRead = 1, annotationOnly = TRUE, minOverhang = 3) {
Tab <- data.table::fread(file, stringsAsFactors = FALSE)
if(ncol(Tab) != 9) {
stop("your file may be not STAR output SJ.out.tab file")
}
colnames(Tab) <- c("chr", "start", "end", "strand", "intron_motif", "annotation", "um_reads", "mm_reads", "overhang")
if(length(minOverhang) != 1) {
stop("minOverhang must be a positive number")
}
if(!is.numeric(minOverhang)) {
stop("minOverhang must be a positive number")
}
if(length(minSJRead) != 1) {
stop("minSJRead must be a positive number")
}
if(!is.numeric(minSJRead)) {
stop("minSJRead must be a positive number")
}
if(!is.logical(uniqMapOnly)) {
stop("uniqMapOnly must be a logical value")
}
if(uniqMapOnly) {
data.table::setnames(Tab, "um_reads", "reads")
Tab[, mm_reads := NULL]
} else {
Tab[, um_reads := um_reads + mm_reads]
data.table::setnames(Tab, "um_reads", "reads")
Tab[, mm_reads := NULL]
}
Tab <- subset(Tab, reads >= minSJRead)
Tab <- subset(Tab, overhang >= minOverhang)
Tab <- subset(Tab, strand != 0)
if(annotationOnly) {
Tab <- subset(Tab, annotation == 1)
}
Tab[, strand := ifelse(strand == 0, "*", ifelse(strand == 1, "+", "-"))]
Tab[, intron_motif := ifelse(intron_motif == 0, "*", ifelse(intron_motif == 1 | intron_motif == 2, "GT-AG", ifelse(intron_motif == 3 | intron_motif == 4, "GC-AG", "AT-AC")))]
gr <- GenomicRanges::GRanges(seqnames = Tab$chr,
ranges = IRanges::IRanges(start = Tab$start, end = Tab$end),
strand = Tab$strand,
intron_motif = Tab$intron_motif,
annotation = Tab$annotation,
reads = Tab$reads,
overhang = Tab$overhang)
return(gr)
}
#' @rdname InternalFunc
#' @name multiMerge
#' @importFrom data.table fread
#' @importFrom data.table :=
#' @importFrom data.table setkey
#' @importClassesFrom GenomicRanges GRanges
multiMerge <- function(SJFiles, uniqMapOnly = TRUE, minSJRead = 1, minSJRowSum = 10, minSJSample = 2, annotationOnly = TRUE, minOverhang = 3, postfix = "SJ.out.tab") {
stopifnot(all(file.exists(SJFiles)))
if(length(minOverhang) != 1) {
stop("minOverhang must be a positive number")
}
if(!is.numeric(minOverhang)) {
stop("minOverhang must be a positive number")
}
if(minOverhang < 0) {
stop("minOverhang must be a positive number")
}
if(length(minSJRead) != 1) {
stop("minSJRead must be a positive number")
}
if(!is.numeric(minSJRead)) {
stop("minSJRead must be a positive number")
}
if(minSJRead < 0) {
stop("minSJRead must be a positive number")
}
if(length(minSJRowSum) != 1) {
stop("minSJRowSum must be a positive number")
}
if(!is.numeric(minSJRowSum)) {
stop("minSJRowSum must be a positive number")
}
if(minSJRowSum < 0) {
stop("minSJRowSum must be a positive number")
}
if(length(minSJSample) != 1) {
stop("minSJSample must be a positive number")
}
if(!is.numeric(minSJSample)) {
stop("minSJSample must be a positive number")
}
if(minSJSample < 0) {
stop("minSJSample must be a positive number")
}
if(minSJSample > length(SJFiles)) {
stop("minSJSample should not greater than number of SJFiles")
}
if(!is.logical(uniqMapOnly)) {
stop("uniqMapOnly must be a logical value")
}
SJs <- lapply(SJFiles, FUN = function(x) {
sj <- ReadSTARSJ(file = x, uniqMapOnly = uniqMapOnly, minSJRead = minSJRead, annotationOnly = annotationOnly, minOverhang = minOverhang)
sj <- data.table::data.table(SJ = as.character(sj), read = sj$reads)
data.table::setkey(sj, SJ)
return(sj)
})
sjs <- unique(do.call(c, lapply(SJs, FUN = function(x) x$SJ)))
sjs <- as.character(sort(as(sjs, "GRanges")))
Tab <- do.call(cbind, lapply(SJs, function(x) x[sjs, read]))
row.names(Tab) <- sjs
colnames(Tab) <- gsub(postfix, "", basename(SJFiles))
Tab[is.na(Tab)] <- 0
Tab <- Tab[rowSums(Tab) >= minSJRowSum & rowSums(Tab > 0) >= minSJSample, ]
return(Tab)
}
#' @rdname InternalFunc
#' @name ASSJFind
#'
#' @importFrom GenomicRanges GRanges
#' @importClassesFrom GenomicRanges GRanges
#' @importFrom GenomeInfoDb seqnames
#' @importFrom BiocGenerics start
#' @importFrom BiocGenerics end
#' @importFrom BiocGenerics strand
ASSJFind <- function(x) {
if(!is(x, "GRanges")) {
stop("x must be a GRanges object.")
}
if(anyDuplicated(as.character(x))) {
stop("Some splice junction are duplicated.")
}
DonorSites <- paste0(seqnames(x), ":", start(x), ":", strand(x))
AcceptorSites <- paste0(seqnames(x), ":", end(x), ":", strand(x))
whichAS <- DonorSites %in% names(which(table(DonorSites) > 1)) | AcceptorSites %in% names(which(table(AcceptorSites) > 1))
x$ASSJ <- as.numeric(whichAS)
return(x)
}
#' @rdname InternalFunc
#' @name ASSJFilter
#'
#' @importFrom GenomicRanges GRanges
#' @importClassesFrom GenomicRanges GRanges
#' @importFrom GenomeInfoDb seqnames
#' @importFrom BiocGenerics start
#' @importFrom BiocGenerics end
#' @importFrom BiocGenerics strand
#'
#' @param MMF minimum minor-junction fraction of multiple-alternative-splice site (same start/end junction > 2)
ASSJFilter <- function(matrix, MMJF) {
x <- as(row.names(matrix), "GRanges")
if(anyDuplicated(row.names(matrix))) {
stop("Some splice junction are duplicated.")
}
DonorSites <- paste0(seqnames(x), ":", start(x), ":", strand(x))
AcceptorSites <- paste0(seqnames(x), ":", end(x), ":", strand(x))
whichAS <- DonorSites %in% names(which(table(DonorSites) > 1)) | AcceptorSites %in% names(which(table(AcceptorSites) > 1))
assj <- as.character(x)[which(whichAS)]
x <- x[which(whichAS)]
y <- data.table::data.table(SJ = as.character(x),
start = paste0(as.character(seqnames(x)), ":", start(x), as.character(strand(x))),
end = paste0(as.character(seqnames(x)), ":", end(x), as.character(strand(x))),
reads = rowSums(matrix[as.character(x), ]))
mass <- y[, .N, by = "start"][N > 2, start]
mase <- y[, .N, by = "end"][N > 2, end]
mass <- y[start %in% mass, ]
mass$Frac <- mass[, prop.table(reads), by = start]$V1
MultiSameStartMinorJunction <- mass[Frac < MMJF, SJ]
mase <- y[end %in% mase, ]
mase$Frac <- mase[, prop.table(reads), by = end]$V1
MultiSameEndMinorJunction <- mase[Frac < MMJF, SJ]
assj <- setdiff(assj, union(MultiSameStartMinorJunction, MultiSameEndMinorJunction))
matrix[assj, ]
}
#' @rdname InternalFunc
#' @name PSICalculate
#'
#' @importFrom GenomicRanges GRanges
#' @importClassesFrom GenomicRanges GRanges
#' @importFrom GenomeInfoDb seqnames
#' @importFrom BiocGenerics start
#' @importFrom BiocGenerics end
#' @importFrom BiocGenerics strand
#'
#' @param MinSumSpliceSite The minimum number of reads of each SameStart or SameEnd for PSI Calculating
fun1 <- function(x, MinSum) {
x/ifelse(sum(x, na.rm = TRUE) >= MinSum, sum(x, na.rm = TRUE), NA)
}
PSICalculate <- function(ASSJMat, MinSumSpliceSite){
sjInfo <- data.table::as.data.table(as(row.names(ASSJMat), "GRanges"), stringsAsFactors = FALSE)[, c(-4)]
ASSJMat <- cbind(sjInfo, ASSJMat)
data.table::setkey(ASSJMat, seqnames, start, strand)
start.psi <- ASSJMat[, lapply(.SD, fun1, MinSum = MinSumSpliceSite), .SDcols = -c(1:4), by = list(seqnames, start, strand)]
start.psi$end <- ASSJMat$end
data.table::setkey(ASSJMat, seqnames, end, strand)
end.psi <- ASSJMat[, lapply(.SD, fun1, MinSum = MinSumSpliceSite), .SDcols = -c(1:4), by = list(seqnames, end, strand)]
end.psi$start <- ASSJMat$start
data.table::setcolorder(end.psi, colnames(start.psi))
psi <- rbind(start.psi, end.psi)
data.table::setkey(psi, seqnames, start, end, strand)
data.table::setcolorder(psi, c(c("seqnames", "start", "end", "strand"), setdiff(colnames(psi), c("seqnames", "start", "end", "strand"))))
psi <- psi[rowSums(psi[, -c(1:4)] != 1, na.rm = TRUE) != 0, ]
psi <- psi[, lapply(.SD, function(x) mean(x, na.rm = TRUE)), by = list(seqnames, start, end, strand)]
psi <- as.matrix(data.table::setDF(psi[, -c(1:4)], rownames = psi[, paste0(seqnames, ":", start, "-", end, ":", strand)]))
return(psi)
}
#' @rdname InternalFunc
#' @name SetIfNull
#' Set a default value if an object is null
#
#' @param x An object to set if it's null
#' @param default The value to provide if x is null
#
#' @return default if x is null, else x
#
SetIfNull <- function(x, default) {
if(is.null(x = x)){
return(default)
} else {
return(x)
}
}
#' @rdname InternalFunc
#' @name WhichCells
#
#' @param object An object to set if it's null
#' @param ident one level of design
#
WhichCells <- function(object, ident) {
row.names(colData(object))[colData(object)[[object@design]] == ident]
}
# Generate a random name
#
# Make a name from randomly sampled lowercase letters,
# pasted together with no spaces or other characters
#
# @param length How long should the name be
# @param ... Extra parameters passed to sample
#
# @return A character with nchar == length of randomly sampled letters
#
# @seealso \code{\link{sample}}
#
RandomName <- function(length = 5L, ...) {
return(paste(sample(x = letters, size = length, ...), collapse = ''))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.