Nothing
#' import_seurat_scANANSE
#'
#' import the influences from a anansnake directory into a seurat object
#' @param seurat_object seurat object
#' @param cluster_id ID used for finding clusters of cells
#' @param anansnake_inf_dir influence directory generated by anansnake
#' @param return_df return both the seurat object and a dataframe with influence scores as a list
#' @return seurat object with the influence scores addes as an assay
#' @examples
#' sce_small <- readRDS(system.file("extdata","sce_small.Rds",package = 'AnanseSeurat'))
#' infdir <- system.file("extdata","influence",package = 'AnanseSeurat')
#' sce_small <- import_seurat_scANANSE(sce_small, anansnake_inf_dir = infdir)
#' @export
import_seurat_scANANSE <- function(seurat_object,
cluster_id = 'seurat_clusters',
anansnake_inf_dir = 'None',
return_df = FALSE) {
files <-
list.files(
path = anansnake_inf_dir,
pattern = "*.tsv",
full.names = TRUE,
recursive = FALSE
)
GRN_files <-
list.files(
path = anansnake_inf_dir,
pattern = "_diffnetwork.tsv",
full.names = TRUE,
recursive = FALSE
)
if (length(GRN_files) == 0) {
warning(paste0(
paste0(
'Error: no _diffnetwork.tsv files found in influence dir ',
anansnake_inf_dir
)
), ' \n')
warning('double check the location of the influence dir provided in anansnake_inf_dir \n')
stop()
}
files <- files[!files %in% GRN_files]
influence_scores_avg <- list()
influence_targets <- list()
i <- 1
for (file in files) {
cell_target <- stringr::str_split(basename(file), "_")[[1]][2]
cell_source <- stringr::str_split(basename(file), "_")[[1]][3]
cell_source <- stringr::str_replace(cell_source, '.tsv', '')
in_df <- utils::read.table(file, header = TRUE)
in_df <- in_df[c('factor', 'influence_score')]
colnames(in_df) <- c('factor', cell_target)
if (cell_source == 'average') {
influence_scores_avg[[i]] <- as.data.frame(in_df)
influence_targets[[i]] <- cell_target
i <- i + 1
}
}
avg_df <-
influence_scores_avg %>% purrr::reduce(dplyr::full_join, by = "factor", copy = T)
avg_df[is.na(avg_df)] <- 0
rownames(avg_df) <- avg_df$factor
avg_df$factor <- NULL
#add the TF intensities to the seurat object
TF_df <- t(avg_df)
cell_signal_list <- list()
i <- 1
#get cell IDs from the pbmcs
for (cell_type in rownames(TF_df)) {
cells_in_seurat <- seurat_object[[cluster_id]] == cell_type
if (TRUE %in% cells_in_seurat) {
relevant_IDS <-
seurat_object[, seurat_object[[cluster_id]] == cell_type][[cluster_id]]
TF_signal <- TF_df[rownames(TF_df) == cell_type, ]
TF_signal <- as.data.frame(TF_signal)
TF_signal <- t(TF_signal)
TF_signal <-
TF_signal[rep(seq_len(nrow(TF_signal)), each = nrow(relevant_IDS)),]
rownames(TF_signal) <- rownames(as.data.frame(relevant_IDS))
cell_signal_list[[i]] <-
TF_signal
} else{
warning(paste0(
paste0('no cells of id ', cell_type),
' found in seurat object'
))
}
i <- i + 1
}
TF_array <- do.call("rbind", cell_signal_list)
#add cell IDs that do not have ANANSE signal and give them a NA vallue
cell_barcodes_all <-
rownames(as.data.frame(Seurat::Idents(object = seurat_object)))
cell_barcodes_missing <-
cell_barcodes_all[!cell_barcodes_all %in% rownames(TF_array)]
cell_barcodes_missing <- as.data.frame(cell_barcodes_missing)
if (length(cell_barcodes_missing > 0)) {
message('adding cells with no influence scores with NA')
for (TF in colnames(TF_signal)) {
cell_barcodes_missing[[TF]] <- NA
}
rownames(cell_barcodes_missing) <-
cell_barcodes_missing$cell_barcodes_missing
cell_barcodes_missing$cell_barcodes_missing <- NULL
TF_output <- t(rbind(TF_array, cell_barcodes_missing))
}
else{
TF_output <- t(TF_array)
}
seurat_object[['influence']] <-
Seurat::CreateAssayObject(TF_output)
if (return_df) {
return(list(seurat_object, avg_df))
}
else{
return(seurat_object)
}
}
#' import_seurat_Maelstrom
#'
#' load Maelstrom enriched motifs
#' @param seurat_object object
#' @param cluster_id ID used for finding clusters of cells
#' @param maelstrom_file maelstrom final.out.txt file
#' @param return_df return both the seurat object and a dataframe with maelstrom scores as a list
#' @return seurat object with the maelstrom motif scores addes as an assay
#' @examples
#' sce_small <- readRDS(system.file("extdata","sce_small.Rds",package = 'AnanseSeurat'))
#' maelstromfile_path <- system.file("extdata","maelstrom","final.out.txt",package = 'AnanseSeurat')
#' sce_small <- import_seurat_maelstrom(sce_small, maelstrom_file = maelstromfile_path)
#' @export
import_seurat_maelstrom <- function(seurat_object,
cluster_id = 'seurat_clusters',
maelstrom_file = '~/final.out.txt',
return_df = FALSE) {
maelstrom_df <-
utils::read.table(
maelstrom_file,
header = TRUE,
row.names = 1,
sep = '\t',
check.names = FALSE
)
rownames(maelstrom_df) <- gsub('_', '-', rownames(maelstrom_df))
maelstrom_Zscore <-
maelstrom_df[grep("z-score ", colnames(maelstrom_df))]
maelstrom_corr <-
maelstrom_df[grep("corr ", colnames(maelstrom_df))]
#add the motif intensities to the seurat object
motif_df <- t(maelstrom_Zscore)
cell_signal_list <- list()
rownames(motif_df) <-
mapply(
gsub,
pattern = "z-score ",
x = rownames(motif_df),
replacement = ''
)
i <- 1
#get cell IDs from the single cell object
for (cell_type in rownames(motif_df)) {
#lets check if any cells with this annotation are present in the seurat object
cells_in_seurat <- seurat_object[[cluster_id]] == cell_type
if (TRUE %in% cells_in_seurat) {
relevant_IDS <-
seurat_object[, seurat_object[[cluster_id]] == cell_type][[cluster_id]]
TF_signal <- motif_df[rownames(motif_df) == cell_type, ]
TF_signal <- as.data.frame(TF_signal)
TF_signal <- t(TF_signal)
TF_signal <-
TF_signal[rep(seq_len(nrow(TF_signal)), each = nrow(relevant_IDS)),]
rownames(TF_signal) <- rownames(as.data.frame(relevant_IDS))
cell_signal_list[[i]] <-
TF_signal
} else{
warning(paste0(
paste0('no cells of id ', cell_type),
' found in seurat object'
))
}
i <- i + 1
}
TF_array <- do.call("rbind", cell_signal_list)
#add cell IDs that do not have Maelstrom signal and give them a NA vallue
cell_barcodes_all <-
rownames(as.data.frame(Seurat::Idents(object = seurat_object)))
cell_barcodes_missing <-
cell_barcodes_all[!cell_barcodes_all %in% rownames(TF_array)]
if (length(cell_barcodes_missing) > 0) {
cell_barcodes_missing <- as.data.frame(cell_barcodes_missing)
for (TF in colnames(TF_signal)) {
cell_barcodes_missing[[TF]] <- NA
}
rownames(cell_barcodes_missing) <-
cell_barcodes_missing$cell_barcodes_missing
cell_barcodes_missing$cell_barcodes_missing <- NULL
TF_array <- rbind(TF_array, cell_barcodes_missing)
}
seurat_object[['maelstrom']] <-
Seurat::CreateAssayObject(t(TF_array))
motif_df <- t(motif_df)
if (return_df) {
return(list(seurat_object, motif_df))
}
else{
return(seurat_object)
}
}
#' per_cluster_df
#'
#' generate a table of the assay score averages per cluster identifier cell
#' @param seurat_object seurat object
#' @param assay assay containing influence or motif scores generated from cluster pseudobulk
#' @param cluster_id ID used for finding clusters of cells
#' @return dataframe with assay scores, concatinating cells from each per cluster
#' @examples
#' sce_small <- readRDS(system.file("extdata","sce_small.Rds",package = 'AnanseSeurat'))
#' df <- per_cluster_df(sce_small)
#' @export
per_cluster_df <- function(seurat_object,
assay = 'influence',
cluster_id = 'seurat_clusters') {
Seurat::Idents(seurat_object) <- cluster_id
#make a dataframe with the values per cluster:
clusters <- unique(seurat_object[[cluster_id]])
#check if assay exists
if (is.null(seurat_object@assays[[assay]])) {
stop(paste0('assay ', assay, ' not found in the seurat object '))
}
cluster_data <-
as.data.frame(rownames(seurat_object@assays[[assay]]@data))
rownames(cluster_data) <- cluster_data[[1]]
for (cluster in unique(seurat_object[[cluster_id]])[[cluster_id]]) {
seurat_object_subset <- subset(x = seurat_object, idents = cluster)
#get cluster data
cluster_matrix <-
as.data.frame(seurat_object_subset@assays[[assay]]@data)
if (length(unique(as.list(cluster_matrix))) != 1) {
warning(
paste0(
'not all cells of the cluster ',
cluster,
' have the same value in the assay ',
assay
)
)
}
cluster_data[cluster] <- rowMeans(cluster_matrix)
}
cluster_data[[1]] <- NULL
cluster_data <-
cluster_data[, colSums(is.na(cluster_data)) < nrow(cluster_data)]#remove columns with NAs
return(cluster_data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.