#' @name perform.tpm
#' @aliases perform.tpm
#'
#' @title Performs TPM normalisation
#'
#' @description Performs TPM normalisation, scran hvg selection, scaling and variance stabilisation and regression.
#'
#' @param object IBRAP S4 class object
#' @param assay Character. String containing indicating which assay to use
#' @param slot Character. String indicating which slot within the assay should be sourced
#' @param n.genes Numerical. Top number of genes to retain when finding HVGs. Default = 1500
#' @param do.scale Boolean. Whether to scale the features variance. Default = TRUE
#' @param do.centre Boolean. Whether to centre features to zero. Default = TRUE
#' @param vars.to.regress Character. Which column in the metadata should be regressed. Default = NULL
#' @param new.assay.suffix Character. What should the new assay be called. Default = 'SCRAN'
#' @param verbose Logical Should function messages be printed?
#' @param ... Arguments to pass to Seurat::ScaleData
#'
#' @return Produces a new 'methods' assay containing normalised, scaled and HVGs.
#'
#' @examples
#'
#' object <- perform.scanpy(object = object,
#' vars.to.regress = 'RAW_total.counts', do.scale = T)
#'
#' @export
perform.tpm <- function(object,
assay = 'RAW',
slot = 'counts',
n.genes = 1500,
do.scale = FALSE,
do.center = TRUE,
vars.to.regress = NULL,
new.assay.suffix = '',
verbose = FALSE,
...) {
if(!is(object = object, class2 = 'IBRAP')) {
stop('object must be of class IBRAP\n')
}
if(!is.logical(verbose)) {
stop('verbose must be logical, TRUE/FALSE\n')
}
if(!is.character(assay)) {
stop('assay must be a character string\n')
}
if(!assay %in% names(object@methods)) {
stop('assay does not exist\n')
}
if(!is.character(slot)) {
stop('slot must be a character string\n')
}
if(!slot %in% c('counts', 'normalised', 'norm.scaled')) {
stop('slot does not exist\n')
}
if(!is.numeric(n.genes)) {
stop('n.genes must be numerical\n')
}
r <- read.table(text = as.character(IBRAP::mart_export$Gene.stable.ID.Gene.name.Gene.start..bp..Gene.end..bp.), sep = ',')
colnames(r) <- c('geneID', 'geneName', 'start', 'end')
if(is.null(r)) {
stop('cannot find gene lengths\n')
}
if(!is.logical(do.scale)) {
stop('do.scale must be logical: TRUE/FALSE\n')
}
if(!is.logical(do.center)) {
stop('do.center must be logical: TRUE/FALSE\n')
}
if(!is.null(vars.to.regress)) {
if(!is.character(vars.to.regress)) {
stop('vars.to.regress must be character string\n')
}
}
if(!is.character(new.assay.suffix)) {
stop('new.assay.suffix must be a character string\n')
}
if('_' %in% unlist(strsplit(x = new.assay.suffix, split = ''))) {
if(isTRUE(verbose)) {
cat(crayon::cyan(paste0(Sys.time(), ': _ cannot be used in new.assay.suffix, replacing with - \n')))
}
new.assay.suffix <- sub(pattern = '_', replacement = '-', x = new.assay.suffix)
}
start_time <- Sys.time()
r$Gene.length <- r$end - r$start
subset <- r[r$geneName %in% rownames(object),]
if(isTRUE(verbose)) {
cat(crayon::cyan(paste0(Sys.time(), ': matrix subsetted\n')))
}
rownames(subset) <- make.unique(names = as.character(subset$geneName), '.')
if(isTRUE(verbose)) {
cat(crayon::cyan(paste0(Sys.time(), ': rownames added\n')))
}
meta <- object@methods[[assay]]@feature_metadata[intersect(rownames((object@methods[[assay]]@feature_metadata)), rownames(subset)),]
if(isTRUE(verbose)) {
cat(crayon::cyan(paste0(Sys.time(), ': gene names interesected\n')))
}
mat <- as.matrix(object@methods[[assay]][[slot]])
mat <- mat[intersect(rownames(mat), rownames(subset)),]
ordered <- subset[match(rownames(mat), rownames(subset)),]
if(isTRUE(verbose)) {
cat(crayon::cyan(paste0(Sys.time(), ': matrices ordered\n')))
cat(crayon::cyan(paste0(Sys.time(), ': calculated counts/feature length\n')))
}
calc <- sweep(mat, 1, as.numeric(ordered$Gene.length), `/`)
scale.factor <- colSums(calc)/1000000
.counts <- sweep(calc, 2, as.numeric(scale.factor), `/`)
if(isTRUE(verbose)) {
cat(crayon::cyan(paste0(Sys.time(), ': calculations completed\n')))
}
if(isTRUE(verbose)) {
cat(crayon::cyan(paste0(Sys.time(), ': log2(x+1) transforming\n')))
}
.normalised <- log2(.counts+1)
if(isTRUE(verbose)) {
cat(crayon::cyan(paste0(Sys.time(), ': transformation completed\n')))
}
dec <- scran::modelGeneVar(x = .normalised)
.highly.variable.genes <- scran::getTopHVGs(stats = dec, n=n.genes)
seuobj <- suppressWarnings(Seurat::CreateSeuratObject(counts = mat))
seuobj@assays$RNA@data <- .normalised[.highly.variable.genes,]
if(!is.null(vars.to.regress)) {
vars.to.regress.df <- as.data.frame(object@sample_metadata[,vars.to.regress])
colnames(vars.to.regress.df) <- vars.to.regress
rownames(vars.to.regress.df) <- colnames(object)
vars.to.regress.df <- vars.to.regress.df[match(rownames(seuobj@meta.data),
rownames(vars.to.regress.df)),]
seuobj@meta.data <- cbind(seuobj@meta.data,vars.to.regress.df)
colnames(seuobj@meta.data) <- c(names(seuobj@meta.data)[1:sum(length(names(seuobj@meta.data))-length(vars.to.regress))], vars.to.regress)
seuobj <- Seurat::ScaleData(object = seuobj, do.scale=do.scale, do.center=do.center,vars.to.regress=vars.to.regress, verbose = verbose, ...)
} else {
seuobj <- Seurat::ScaleData(object = seuobj, do.scale=do.scale, do.center=do.center, verbose = verbose, ...)
}
.norm.scaled <- seuobj@assays$RNA@scale.data
feat.meta <- feature_metadata(assay = .counts, col.prefix = paste0('SCRAN', new.assay.suffix))
object@sample_metadata <- cbind(object@sample_metadata, cell_metadata(assay = as.matrix(.normalised), col.prefix = paste0('TPM', new.assay.suffix)))
object@methods[[paste0('TPM', new.assay.suffix)]] <- new(Class = 'methods',
counts = Matrix::Matrix(.counts, sparse = T),
normalised = Matrix::Matrix(.normalised, sparse = T),
norm.scaled = as.matrix(.norm.scaled),
highly.variable.genes = .highly.variable.genes,
feature_metadata = feat.meta)
if(isTRUE(verbose)) {
cat(crayon::cyan(paste0(Sys.time(), ': TPM normalisation completed\n')))
}
end_time <- Sys.time()
function_time <- end_time - start_time
if(!'normalisation_method' %in% colnames(object@pipelines)) {
object@pipelines <- data.frame(normalisation_method=paste0('TPM', new.assay.suffix), normalisation_time=function_time)
} else if (paste0('TPM', new.assay.suffix) %in% object@pipelines[,'normalisation_method']) {
object@pipelines[which(object@pipelines[,'normalisation_method']==paste0('TPM', new.assay.suffix)),] <- data.frame(normalisation_method=paste0('TPM', new.assay.suffix), normalisation_time=function_time)
} else {
object@pipelines <- rbind(object@pipelines, data.frame(normalisation_method=paste0('TPM', new.assay.suffix), normalisation_time=function_time))
}
return(object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.