Nothing
#' Create a container to store all data and results for the project. You must
#' provide a params list as generated by initialize_params(). You also need to
#' provide either a Seurat object or both a count_data matrix and a meta_data matrix.
#'
#' @param params list A list of the experiment params to use as generated by
#' initialize_params()
#' @param count_data dgCMatrix Matrix of raw counts with genes as rows
#' and cells as columns (default=NULL)
#' @param meta_data data.frame Metadata with cells as rows and variables
#' as columns. Number of rows in metadata should equal number of columns
#' in count matrix (default=NULL)
#' @param seurat_obj Seurat object that has been cleaned and includes the normalized,
#' log-transformed counts. The meta.data should include a column with the header
#' 'sex' and values of 'M' or 'F' if available. The metadata should
#' also have a column with the header 'ctypes' with the corresponding names of
#' the cell types as well as a column with header 'donors' that contains
#' identifiers for each donor. (default=NULL)
#' @param scMinimal environment A sub-container for the project typically
#' consisting of gene expression data in its raw and processed forms as well
#' as metadata (default=NULL)
#' @param gn_convert data.frame Gene identifier -> gene name conversions table.
#' Gene identifiers used in counts matrices should appear in the first column and
#' the corresponding gene symbols should appear in the second column. Can remain
#' NULL if the identifiers are already gene symbols. (default=NULL)
#' @param metadata_cols character The names of the metadata columns to use
#' (default=NULL)
#' @param metadata_col_nm character New names for the selected metadata columns
#' if wish to change their names. If NULL, then the preexisting column names are
#' used. (default=NULL)
#' @param label_donor_sex logical Set to TRUE to label donor sex in the meta data
#' by using expressing of sex-associated genes (default=FALSE)
#'
#' @return A project container of class environment that stores sub-containers
#' for each cell type as well as results and plots from all analyses.
#' @export
make_new_container <- function(params, count_data=NULL, meta_data=NULL,
seurat_obj=NULL, scMinimal=NULL, gn_convert=NULL,
metadata_cols=NULL, metadata_col_nm=NULL,
label_donor_sex=FALSE) {
if (!is.null(seurat_obj)) {
# check cell type names don't have special characters
all_ct_names <- levels(as.factor(seurat_obj@meta.data$ctypes))
names(all_ct_names) <- all_ct_names
new_ct_names <- sapply(params$ctypes_use,function(x) {
split_names <- strsplit(x, "[\\._:]", fixed = FALSE)[[1]]
if (length(split_names)>1) {
return(paste(split_names, collapse = ""))
} else {
return(x)
}
})
if (any(new_ct_names != names(new_ct_names))) {
warning("Cell type names cannot have special characters '.', '_', or ':'. Removing these characters and concatenating name components.")
all_ct_names[names(new_ct_names)] <- new_ct_names
seurat_obj@meta.data$ctypes <- as.factor(seurat_obj@meta.data$ctypes)
levels(seurat_obj@meta.data$ctypes) <- all_ct_names
params$ctypes_use <- new_ct_names
names(params$ctypes_use) <- NULL
}
scMinimal <- seurat_to_scMinimal(seurat_obj, metadata_cols=metadata_cols,
metadata_col_nm=metadata_col_nm)
} else if (!is.null(count_data) & !is.null(meta_data)) {
# throw error if metadata is not a dataframe
if (class(meta_data)[1]!='data.frame') {
stop('meta_data must be a data.frame')
}
# check cell type names don't have special characters
all_ct_names <- levels(as.factor(meta_data$ctypes))
names(all_ct_names) <- all_ct_names
new_ct_names <- sapply(params$ctypes_use,function(x) {
split_names <- strsplit(x, "[\\._:]", fixed = FALSE)[[1]]
if (length(split_names)>1) {
return(paste(split_names, collapse = ""))
} else {
return(x)
}
})
if (any(new_ct_names != names(new_ct_names))) {
warning("Cell type names cannot have special characters '.', '_', or ':'. Removing these characters and concatenating name components.")
all_ct_names[names(new_ct_names)] <- new_ct_names
meta_data$ctypes <- as.factor(meta_data$ctypes)
levels(meta_data$ctypes) <- all_ct_names
params$ctypes_use <- new_ct_names
names(params$ctypes_use) <- NULL
}
# make counts to be dgCMatrix if not already
if (class(count_data)[1]!='dgCMatrix') {
count_data <- methods::as(as.matrix(count_data),'sparseMatrix')
}
# throw error if data is pre-normalized
if (any(count_data%%1!=0)) {
stop('It seems your count_data might already be normalized. The count_data matrix should only contain integer UMI counts.')
}
# throw error if dimensions or cell names don't match
if (ncol(count_data)!=nrow(meta_data)) {
stop('ncol(count_data) must match nrow(meta_data)')
} else if (!identical(colnames(count_data),rownames(meta_data))) {
stop('Cell names must be identical in both count_data and meta_data')
}
scMinimal <- instantiate_scMinimal(count_data, meta_data,
metadata_cols=metadata_cols,
metadata_col_nm=metadata_col_nm)
} else if (is.null(scMinimal)) {
stop("Need to provide either a seurat object or a scMinimal object or both a count matrix and meta data matrix")
}
# throw warning if cell-level metadata is included
metadata_test <- scMinimal$metadata[scMinimal$metadata$ctypes==params$ctypes_use[1],]
metadata_test <- unique(metadata_test)
if (nrow(metadata_test) != length(unique(metadata_test$donors))) {
warning('You may have included metadata that varies across cells within each donor/sample.
We recommend only including metadata that varies across donors/samples.')
}
# create new empty environment for all experiment data and results
container <- new.env()
container$scMinimal_full <- scMinimal
container$experiment_params <- params
if (!is.null(gn_convert)) {
rownames(gn_convert) <- gn_convert[,1]
}
container$gn_convert <- gn_convert
if (label_donor_sex) {
container <- identify_sex_metadata(container)
}
return(container)
}
#' Initialize parameters to be used throughout scITD in various functions
#'
#' @param ctypes_use character Names of the cell types to use for the analysis
#' (default=NULL)
#' @param ncores numeric Number of cores to use (default=4)
#' @param rand_seed numeric Random seed to use (default=10)
#'
#' @return A list of the experiment parameters to use.
#' @export
#'
#' @examples
#' param_list <- initialize_params(ctypes_use = c("CD4+ T", "CD8+ T"),
#' ncores = 1, rand_seed = 10)
initialize_params <- function(ctypes_use, ncores=4, rand_seed=10) {
if (is.null(ctypes_use)) {
stop("You must provide the ctypes_use parameter")
}
params_list <- list(ctypes_use = ctypes_use,
ncores = ncores,
rand_seed = rand_seed)
return(params_list)
}
#' Update any of the experiment-wide parameters
#'
#' @param container environment Project container that stores sub-containers
#' for each cell type as well as results and plots from all analyses
#' @param ctypes_use character Names of the cell types to use for the analysis
#' (default=NULL)
#' @param ncores numeric Number of cores to use (default=NULL)
#' @param rand_seed numeric Random seed to use (default=NULL)
#'
#' @return The project container with updated experiment parameters in
#' container$experiment_params.
#' @export
#'
#' @examples
#' test_container <- update_params(test_container, ncores=1)
update_params <- function(container, ctypes_use=NULL,
ncores=NULL, rand_seed=NULL) {
# if user/code enters a value for a param then reset its value
if (!is.null(ctypes_use)) {
container$experiment_params$ctypes_use <- ctypes_use
}
if (!is.null(ncores)) {
container$experiment_params$ncores <- ncores
}
if (!is.null(rand_seed)) {
container$experiment_params$rand_seed <- rand_seed
}
return(container)
}
#' Extract metadata for sex information if not provided already
#' @useDynLib scITD
#' @importFrom Rcpp sourceCpp
#'
#' @param container environment Project container that stores sub-containers
#' for each cell type as well as results and plots from all analyses
#' @param y_gene character Gene name to use for identifying male donors (default='RPS4Y1')
#' @param x_gene character Gene name to use for identifying female donors (default='XIST')
#'
#' @return The project container with sex metadata added to the metadata.
#' @export
identify_sex_metadata <- function(container,y_gene='RPS4Y1',x_gene='XIST') {
scMinimal <- container$scMinimal_full
dge_sparse <- t(scMinimal$count_data)
# get donor sums for each gene in the dataset
donor_meta <- as.factor(scMinimal$metadata$donors)
d_sums <- get_sums(dge_sparse,donor_meta)
d_sums <- d_sums[2:nrow(d_sums),]
# normalize counts
d_sums <- t(normalize_counts(t(d_sums),scale_factor=10000))
# convert rownames to gene symbols using provided mapping
gn_names <- convert_gn(container, colnames(d_sums))
y_ndx <- which(gn_names == y_gene)
x_ndx <- which(gn_names == x_gene)
y_mean <- mean(d_sums[,y_ndx])
x_mean <- mean(d_sums[,x_ndx])
make_note <- FALSE
scMinimal$metadata$sex <- NA
for (i in 1:nrow(scMinimal$metadata)) {
d <- scMinimal$metadata$donors[i]
if (d_sums[d,y_ndx] > y_mean && d_sums[d,x_ndx] < x_mean) {
scMinimal$metadata$sex[i] <- 'M'
} else if (d_sums[d,y_ndx] < y_mean && d_sums[d,x_ndx] > x_mean) {
scMinimal$metadata$sex[i] <- 'F'
} else {
scMinimal$metadata$sex[i] <- 'A'
make_note <- TRUE
}
}
if (make_note) {
message('Some assignments are ambiguous and are labeled A in the metadata.
We recommend correcting these manually or providing the sex metadata when
instantiating scMinimal.')
}
container$scMinimal_full <- scMinimal
return(container)
}
#' Get metadata matrix of dimensions donors by variables (not per cell)
#'
#' @param container environment Project container that stores sub-containers
#' for each cell type as well as results and plots from all analyses
#' @param additional_meta character A vector of other variables to include (default=NULL)
#' @param only_analyzed logical Set to TRUE to only include donors that were included
#' in the formed tensor, otherwise set to FALSE (default=TRUE)
#'
#' @return The project container with metadata per donor (not per cell) in container$donor_metadata.
#' @export
#'
#' @examples
#' test_container <- get_donor_meta(test_container, additional_meta='lanes')
get_donor_meta <- function(container,additional_meta=NULL,only_analyzed=TRUE) {
if (only_analyzed) {
if (is.null(container$tensor_data)) {
stop('Need to run form_tensor() first or set only_analyzed to FALSE')
}
donors <- container$tensor_data[[1]]
} else {
donors <- unique(container$scMinimal_full$metadata$donors)
}
meta_sub <- container$scMinimal_full$metadata[container$scMinimal_full$metadata$donors %in% donors,]
donor_metadata <- unique(meta_sub[,c('donors',additional_meta),drop=FALSE])
rownames(donor_metadata) <- donor_metadata[,'donors']
container$donor_metadata <- donor_metadata
return(container)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.