library(tidyverse) library(qsub)
sce_path = "/group/irc/personal/robinb/BreastCancer/output/sce_cohort1_updated.rds" output_path = "/group/irc/personal/robinb/BreastCancer/" sample_id = "sample_id" group_id = "expansion_timepoint" celltype_id = "subType" covariates = NA batches = NA min_cells = 10
contrasts_oi = c("'PreE-PreNE','PreNE-PreE'") contrast_tbl = tibble(contrast = c("PreE-PreNE", "PreNE-PreE"), group = c("PreE", "PreNE"))
empirical_pval = FALSE p_val_adj = FALSE logFC_threshold = 0.50 p_val_threshold = 0.05 fraction_cutoff = 0.05 top_n_target = 250
multinichenet_wrapper = function(i, sce_path, output_path, celltype_id, sample_id, group_id, contrasts_oi, contrast_tbl, batches, covariates, min_cells, logFC_threshold, p_val_threshold, fraction_cutoff, p_val_adj, empirical_pval, top_n_target){ library(SingleCellExperiment) library(tidyverse) library(nichenetr) library(multinichenetr) ### read in NicheNet model organism = "human" lr_network_all = readRDS("/group/irc/personal/robinb/lr_network_human_allInfo_30112033.rds") %>% mutate(ligand = convert_alias_to_symbols(ligand, organism = organism), receptor = convert_alias_to_symbols(receptor, organism = organism)) lr_network = lr_network_all %>% mutate(ligand = make.names(ligand), receptor = make.names(receptor)) %>% distinct(ligand, receptor) ligand_target_matrix = readRDS("/group/irc/personal/robinb/ligand_target_matrix_nsga2r_final.rds") colnames(ligand_target_matrix) = colnames(ligand_target_matrix) %>% convert_alias_to_symbols(organism = organism) %>% make.names() rownames(ligand_target_matrix) = rownames(ligand_target_matrix) %>% convert_alias_to_symbols(organism = organism) %>% make.names() lr_network = lr_network %>% filter(ligand %in% colnames(ligand_target_matrix)) ligand_target_matrix = ligand_target_matrix[, lr_network$ligand %>% unique()] ### read in scRNAseq data sce = readRDS(sce_path) sce = alias_to_symbol_SCE(sce, "human") %>% makenames_SCE() sce = sce[, SummarizedExperiment::colData(sce)[,group_id] %in% contrast_tbl$group] ## check abundance info senders_oi = SummarizedExperiment::colData(sce)[,celltype_id] %>% unique() receivers_oi = SummarizedExperiment::colData(sce)[,celltype_id] %>% unique() print(SummarizedExperiment::colData(sce)[,celltype_id] %>% unique()) abundance_info = get_abundance_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, batches = batches) ## check for condition-specific cell types sample_group_celltype_df = abundance_info$abundance_data %>% filter(n > min_cells) %>% ungroup() %>% distinct(sample_id, group_id) %>% cross_join(abundance_info$abundance_data %>% ungroup() %>% distinct(celltype_id)) %>% arrange(sample_id) abundance_df = sample_group_celltype_df %>% left_join(abundance_info$abundance_data %>% ungroup()) abundance_df$n[is.na(abundance_df$n)] = 0 abundance_df$keep[is.na(abundance_df$keep)] = FALSE abundance_df_summarized = abundance_df %>% mutate(keep = as.logical(keep)) %>% group_by(group_id, celltype_id) %>% summarise(samples_present = sum((keep))) celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique() celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present >= 2) %>% pull(celltype_id) %>% unique() condition_specific_celltypes = intersect(celltypes_absent_one_condition, celltypes_present_one_condition) total_nr_conditions = SummarizedExperiment::colData(sce)[,group_id] %>% unique() %>% length() absent_celltypes = abundance_df_summarized %>% dplyr::filter(samples_present < 2) %>% dplyr::group_by(celltype_id) %>% dplyr::count() %>% dplyr::filter(n == total_nr_conditions) %>% dplyr::pull(celltype_id) print("condition-specific celltypes:") print(condition_specific_celltypes) print("absent celltypes:") print(absent_celltypes) senders_oi = senders_oi %>% setdiff(absent_celltypes) receivers_oi = receivers_oi %>% setdiff(absent_celltypes) retained_celltypes = union(senders_oi, receivers_oi) sce = sce[, SummarizedExperiment::colData(sce)[,celltype_id] %in% retained_celltypes] nr_celltypes = SummarizedExperiment::colData(sce)[,celltype_id] %>% unique() %>% length() nr_cells = sce@assays@data$counts %>% colnames() %>% length() nr_genes = sce@assays@data$counts %>% rownames() %>% length() nr_samples = SummarizedExperiment::colData(sce)[,sample_id] %>% unique() %>% length() print("nr_celltypes") print(nr_celltypes) print("nr_cells") print(nr_cells) print("nr_genes") print(nr_genes) print("nr_samples") print(nr_samples) ### DE analysis ## define expressed genes frq_list = get_frac_exprs(sce = sce, sample_id = sample_id, celltype_id = celltype_id, group_id = group_id, batches = batches, min_cells = min_cells, fraction_cutoff = fraction_cutoff, min_sample_prop = 0.5) DE_info = get_DE_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, batches = batches, covariates = covariates, contrasts_oi = contrasts_oi, min_cells = min_cells, expressed_df = frq_list$expressed_df) markobj = c('---', 'title: "P-value Histograms"', 'output: html_document', '---', '', '## hist_pvals', '', '```r', 'DE_info$hist_pvals', '```', '' ) markdown::markdownToHTML(text = knitr::knit(text = markobj), output = paste0(output_path, "plots/hist_pvals_preEvspreNE_01122023.html")) DE_info$hist_pvals = NULL if(empirical_pval == TRUE){ DE_info_emp = get_empirical_pvals(DE_info$celltype_de$de_output_tidy) comparison_plots = compare_normal_emp_pvals(DE_info, DE_info_emp, adj_pval = FALSE) celltype_de = DE_info_emp$de_output_tidy_emp %>% dplyr::select(-p_val, -p_adj) %>% dplyr::rename(p_val = p_emp, p_adj = p_adj_emp) markobj = c('---', 'title: "Empericial p-value Plots"', 'output: html_document', '---', '', '## comparison_plots', '', '```r', 'comparison_plots', '```', '', '## hist_pvals_emp', '', '```r', 'DE_info_emp$hist_pvals_emp', '```', '', '## z_distr_plots_emp_pval', '', '```r', 'DE_info_emp$z_distr_plots_emp_pval', '```', '' ) markdown::markdownToHTML(text = knitr::knit(text = markobj), output = paste0(output_path, "plots/emp_DE_plots_preEvspreNE_01122023.html")) } else { celltype_de = DE_info$celltype_de$de_output_tidy } print(celltype_de %>% dplyr::group_by(cluster_id, contrast) %>% dplyr::filter(p_adj <= p_val_threshold & abs(logFC) >= logFC_threshold) %>% dplyr::count() %>% dplyr::arrange(-n)) senders_oi = celltype_de$cluster_id %>% unique() receivers_oi = celltype_de$cluster_id %>% unique() genes_oi = celltype_de$gene %>% unique() retained_celltypes = c(senders_oi, receivers_oi) %>% unique() retained_celltypes = c(retained_celltypes, condition_specific_celltypes) print("retained cell types") print(retained_celltypes) sce = sce[genes_oi, SummarizedExperiment::colData(sce)[,celltype_id] %in% retained_celltypes] print(SummarizedExperiment::colData(sce)[,celltype_id] %>% unique()) sender_receiver_de = combine_sender_receiver_de( sender_de = celltype_de, receiver_de = celltype_de, senders_oi = senders_oi, receivers_oi = receivers_oi, lr_network = lr_network ) sender_receiver_tbl = sender_receiver_de %>% dplyr::distinct(sender, receiver) metadata_combined = SummarizedExperiment::colData(sce) %>% tibble::as_tibble() if(!is.na(batches)){ grouping_tbl = metadata_combined[,c(sample_id, group_id, batches)] %>% tibble::as_tibble() %>% dplyr::distinct() colnames(grouping_tbl) = c("sample","group",batches) } else { grouping_tbl = metadata_combined[,c(sample_id, group_id)] %>% tibble::as_tibble() %>% dplyr::distinct() colnames(grouping_tbl) = c("sample","group") } ### abundance_expression_info abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = union(senders_oi, condition_specific_celltypes), receivers_oi = union(receivers_oi, condition_specific_celltypes), lr_network = lr_network, batches = batches, frq_list = frq_list, abundance_info = abundance_info) markobj = c('---', 'title: "Abundance Plots"', 'output: html_document', '---', '', '## abund_plot_sample', '', '```r', 'abundance_info$abund_plot_sample', '```', '', '## abund_plot_group', '', '```r', 'abundance_info$abund_plot_group', '```', '', '## abund_barplot', '', '```r', 'abundance_info$abund_barplot', '```', '' ) markdown::markdownToHTML(text = knitr::knit(text = markobj), output = paste0(output_path, "plots/abundance_plots_preEvspreNE_01122023.html")) rm(sce) ### ligand activities n.cores = min(4, length(receivers_oi)) ligand_activities_targets_DEgenes = suppressMessages(suppressWarnings(get_ligand_activities_targets_DEgenes( receiver_de = celltype_de, receivers_oi = receivers_oi, ligand_target_matrix = ligand_target_matrix, logFC_threshold = logFC_threshold, p_val_threshold = p_val_threshold, p_val_adj = p_val_adj, top_n_target = top_n_target, verbose = TRUE, n.cores = n.cores ))) ### save intermediary output # list(abundance_expression_info = abundance_expression_info, grouping_tbl = grouping_tbl, sender_receiver_de = sender_receiver_de, ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes, contrast_tbl = contrast_tbl, sender_receiver_tbl = sender_receiver_tbl, celltype_de = celltype_de) %>% saveRDS(paste0(output_path,"output/MNN_BCA_EvsNE_intermediary_output.rds")) ### Prioritization tables prioritization_tables = suppressMessages(generate_prioritization_tables( sender_receiver_info = abundance_expression_info$sender_receiver_info, sender_receiver_de = sender_receiver_de, ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes, contrast_tbl = contrast_tbl, sender_receiver_tbl = sender_receiver_tbl, grouping_tbl = grouping_tbl, scenario = "regular", # all prioritization criteria will be weighted equally fraction_cutoff = fraction_cutoff, abundance_data_receiver = abundance_expression_info$abundance_data_receiver, abundance_data_sender = abundance_expression_info$abundance_data_sender, ligand_activity_down = TRUE )) ## correlation lr_target_prior_cor = lr_target_prior_cor_inference(prioritization_tables$group_prioritization_tbl$receiver %>% unique(), abundance_expression_info, celltype_de, grouping_tbl, prioritization_tables, ligand_target_matrix, logFC_threshold = logFC_threshold, p_val_threshold = p_val_threshold, p_val_adj = p_val_adj) ## save output if(length(condition_specific_celltypes) > 0) { print("There are condition specific cell types in the data. Continuing with the regular MultiNicheNet analysis will not include those. If preferred, the user can apply a specific worfklow tailored to analyze CCC events involving condition-specific cell types") print(condition_specific_celltypes) prioritization_tables_with_condition_specific_celltype_sender = prioritize_condition_specific_sender( abundance_info = abundance_info, abundance_expression_info = abundance_expression_info, condition_specific_celltypes = condition_specific_celltypes, grouping_tbl = grouping_tbl, fraction_cutoff = fraction_cutoff, contrast_tbl = contrast_tbl, sender_receiver_de = sender_receiver_de, lr_network = lr_network, ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes, scenario = "regular", ligand_activity_down = TRUE ) prioritization_tables_with_condition_specific_celltype_receiver = prioritize_condition_specific_receiver( abundance_info = abundance_info, abundance_expression_info = abundance_expression_info, condition_specific_celltypes = condition_specific_celltypes, grouping_tbl = grouping_tbl, fraction_cutoff = fraction_cutoff, contrast_tbl = contrast_tbl, sender_receiver_de = sender_receiver_de, lr_network = lr_network, ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes, scenario = "regular", ligand_activity_down = TRUE ) combined_prioritization_tables = list( group_prioritization_tbl = bind_rows( prioritization_tables_with_condition_specific_celltype_receiver$group_prioritization_tbl %>% filter(receiver %in% condition_specific_celltypes), prioritization_tables_with_condition_specific_celltype_sender$group_prioritization_tbl %>% filter(sender %in% condition_specific_celltypes) ) %>% bind_rows(prioritization_tables$group_prioritization_tbl) %>% arrange(-prioritization_score) %>% distinct() ) multinichenet_output = list( celltype_info = abundance_expression_info$celltype_info, celltype_de = celltype_de, ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes, prioritization_tables = prioritization_tables, prioritization_tables_with_condition_specific_celltype_sender = prioritization_tables_with_condition_specific_celltype_sender, prioritization_tables_with_condition_specific_celltype_receiver = prioritization_tables_with_condition_specific_celltype_receiver, combined_prioritization_tables = combined_prioritization_tables, grouping_tbl = grouping_tbl, lr_target_prior_cor = lr_target_prior_cor ) multinichenet_output = make_lite_output_condition_specific(multinichenet_output) } else { print("There are no condition specific cell types in the data. MultiNicheNet analysis is performed in the regular way for all cell types.") multinichenet_output = list( celltype_info = abundance_expression_info$celltype_info, celltype_de = celltype_de, ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes, prioritization_tables = prioritization_tables, grouping_tbl = grouping_tbl, lr_target_prior_cor = lr_target_prior_cor ) multinichenet_output = make_lite_output(multinichenet_output) } saveRDS(multinichenet_output, paste0(output_path,"output/MNN_BCA_preEvspreNE_final_output.rds")) return(multinichenet_output) }
qsub_config = create_qsub_config( remote = "robinb@prism.psb.ugent.be:7777", local_tmp_path = "/Users/robinb/r2gridengine", remote_tmp_path = "/scratch/irc/personal/robinb/r2gridengine", modules = "R/x86_64/4.1.3", # 4.0.3?? memory = "240G", wait = FALSE, remove_tmp_folder = FALSE, name = "MNN-BCA", max_wall_time = "500:00:00", stop_on_error = TRUE, num_cores = 1 ) job_MNN_EvsNE = qsub_lapply(X = 1, FUN = multinichenet_wrapper, object_envir = environment(multinichenet_wrapper), qsub_config = qsub_config, qsub_environment = NULL, qsub_packages = NULL, sce_path, output_path, celltype_id, sample_id, group_id, contrasts_oi, contrast_tbl, batches, covariates, min_cells, logFC_threshold, p_val_threshold, fraction_cutoff, p_val_adj, empirical_pval, top_n_target)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.