# This script contains a list of important marker genes for cells and/or
# transcriptional factors
#' Load the corrected label of cells
#'
#' @param metadata metadata to be added to seurat
#' @param root_dir directory to the correct cell labels
#' @param file_name file_name of the correct cell labes
#' @param sheetName the excel sheet name
#' @param barcode_col the column storing the barcode, default is the rownames
#' @param process_ID_func function to preprocess the cell barcode
#' @export
load_correct_label <- function (metadata, root_dir, file_name, sheetName,
barcode_col =NULL, process_ID_func=NULL){
correct_label <- xlsx::read.xlsx (paste (root_dir, file_name, sep='/'),
sheetName=sheetName)
colnames (correct_label) [1] <- 'ID'
# correct the cell barcodes
cell_IDs <- correct_label$ID
if (!is.null (process_ID_func)){ cell_IDs <- sapply (cell_IDs, process_ID_func)}
correct_label$ID <- cell_IDs
if (is.null (barcode_col)){
order_label <- rownames (metadata)
}else{order_label <- metadata [, barcode_col]}
correct_reorder <- correct_label [ match (order_label, correct_label$ID), ]
correct_reorder %>%
select ('ID', 'Stage', 'Revised.lineage..this.study.', 'Ident', 'Ident_2') %>%
magrittr::set_colnames (c( 'ID', 'Stage', 'revised', 'Ident1', 'Ident2' )) -> final_meta
final_meta <- cbind (final_meta, metadata)
rownames (final_meta) <- rownames (metadata)
return (final_meta)
}
#' Standardise cell labels
#'
#' @param cellname a vector of cell names
#' @param conversion the dataframe to convert cell names
#' @return a character vector based on `cellname`
#' @export
standardise_name <- function (cellname, conversion=NULL){
# in case a factor vector is used
x <- as.character(cellname)
if (is.null(conversion) ) {
data (convert_name, package='TBdev')
conversion <- convert_name
}
for (i in 1:dim(conversion)[1]){
x [x %in% as.character (conversion$original [i]) ] <-
as.character (conversion$to_replace [i])
}
return (x)
}
fill_na_labels <- function (x, reference, fill_label){
na_field <- is.na (x@meta.data [, fill_label])
x@meta.data [, fill_label] <- as.character (x@meta.data [, fill_label])
x@meta.data [na_field, fill_label] <- as.character (x@meta.data [na_field, reference])
x@meta.data [, fill_label] <- as.factor (x@meta.data[, fill_label])
return (x)
}
#' Append string before a numeric vector
#'
#' @description Append a string A to the beginning of another string B if
#' string B can be coerced into numeric
#'
#' @param x string B
#' @param append_str string A
append_if_numeric <- function (x, append_str='D'){
if (is.na (as.numeric (x))){ return (x)
}else{return (paste (append_str, x, sep='') ) }
}
#' Clean metadata of Seurat objects
#'
#' @description The cell type labels are converted into levelled factors. 'D'
#' is appended before data labels.
#' @param AP aesthetic parameters. The key one is cell_order, which contains
#' sorting cell order
#' @param cell_type_col on which columns should cell type sorting occur
#' @param date_col which column contains date information
#' @return a Seurat object
#' @export
clean_metadata <- function (x, AP=NULL, cell_type_col= c('revised', 'Type',
'assigned_cluster', 'broad_type'), date_col='date'){
AP <- return_aes_param (AP)
x$Type <- standardise_name (x$Type)
if (! ('revised' %in% colnames (x@meta.data))){x$revised <- x$Type}
na_field <- x$revised %in% c('NA', NA)
x$revised [na_field] <- x$Type [na_field]
for (one_col in cell_type_col){
if (one_col %in% colnames (x@meta.data) ){
print (paste ('releveling', one_col))
x@meta.data[is.na(x@meta.data [, one_col]), one_col] <- 'unknown'
x@meta.data[x@meta.data [, one_col] == 'NA', one_col] <- 'unknown'
x@meta.data [, one_col]<- partial_relevel (x@meta.data[, one_col], AP$cell_order)
}
}
# merge date and cell type label
if (date_col %in% colnames (x@meta.data)){
# convert days into numeric figures
x$date <- trimws(gsub ('^D', '', as.character (x$date)))
x$date <- sapply (x$date, function (x) {append_if_numeric (x, 'D')})
x@meta.data %>%
tidyr::unite ('Type_date', c('Type', 'date'), sep='_', remove=F) %>%
tidyr::unite ('revised_date', c('revised', 'date'), sep='_', remove=F)-> x@meta.data
#x$date <- paste ('D', x$date, sep='')
x$date <- factor ( x$date, levels= gtools::mixedsort (unique (x$date)) )
x$Type_data <- partial_relevel (x$Type_date, AP$cell_order)
x$revised_data <- partial_relevel (x$revised_date, AP$cell_order)
}
return (x)
}
#' Incorporate Trophoblast stem cells into Seurat object
#'
#' @description a specific function unlikely to be helpful
#' @export
incorporate_TSC <- function (x, analyse_by='new_cluster2', add_by='revised'){
TSC_cells <- x$revised %in% c('hTSC_OKAE', 'hTSC_TURCO')
TSC_data <- x [, TSC_cells]
TSC_data$select <- 'select'
TSC_data@meta.data [, analyse_by] <- TSC_data@meta.data [, add_by]
x$select <- 'not select'
merge_x <- merge_seurat (list (x, TSC_data), assays='RNA')
return (merge_x [, merge_x@meta.data [, analyse_by] != 'hTSC'])
}
# ----------Cells----------
select_lineage <- function (x, feature, celltypes, discard=F){
if (discard){return (x[, ! (x@meta.data [, feature] %in% celltypes) ])
}else{return (x[, x@meta.data [, feature] %in% celltypes])}
}
# ----------dates----------
#' Convert dates into Carnegie stages
#'
#' @export
date_to_CS <- function (x, date_col = 'date'){
data (date2CS, package='TBdev')
new_CS <- as.character (x@meta.data [, date_col])
for (i in 1:nrow (date2CS)){
new_CS [ new_CS == date2CS$date [i] ] <- as.character (date2CS$CS[i])
}
x@meta.data [, date_col] <- new_CS
return (x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.