library(tidyverse)
library(qsub)
sce_path = "/group/irc/personal/robinb/BreastCancer/output/sce_cohort1_updated.rds"
output_path = "/group/irc/personal/robinb/BreastCancer/"
sample_id = "sample_id"
group_id = "expansion_timepoint"
celltype_id = "subType"
covariates = NA 
batches = NA 
min_cells = 10
contrasts_oi = c("'PreE-PreNE','PreNE-PreE'")
contrast_tbl = tibble(contrast =
                        c("PreE-PreNE", "PreNE-PreE"),
                      group = c("PreE", "PreNE"))
empirical_pval = FALSE
p_val_adj = FALSE 
logFC_threshold = 0.50
p_val_threshold = 0.05
fraction_cutoff = 0.05
top_n_target = 250
multinichenet_wrapper = function(i, sce_path, output_path, celltype_id, sample_id, group_id, contrasts_oi, contrast_tbl, batches, covariates, min_cells, logFC_threshold, p_val_threshold, fraction_cutoff, p_val_adj, empirical_pval, top_n_target){

  library(SingleCellExperiment)
  library(tidyverse)
  library(nichenetr)
  library(multinichenetr)

  ### read in NicheNet model
  organism = "human"
  lr_network_all = readRDS("/group/irc/personal/robinb/lr_network_human_allInfo_30112033.rds") %>% mutate(ligand = convert_alias_to_symbols(ligand, organism = organism), receptor = convert_alias_to_symbols(receptor, organism = organism))
  lr_network = lr_network_all  %>% mutate(ligand = make.names(ligand), receptor = make.names(receptor)) %>% distinct(ligand, receptor)

  ligand_target_matrix = readRDS("/group/irc/personal/robinb/ligand_target_matrix_nsga2r_final.rds")
 colnames(ligand_target_matrix) = colnames(ligand_target_matrix) %>% convert_alias_to_symbols(organism = organism) %>% make.names()
  rownames(ligand_target_matrix) = rownames(ligand_target_matrix) %>% convert_alias_to_symbols(organism = organism) %>% make.names()
  lr_network = lr_network %>% filter(ligand %in% colnames(ligand_target_matrix))
  ligand_target_matrix = ligand_target_matrix[, lr_network$ligand %>% unique()]

  ### read in scRNAseq data

  sce = readRDS(sce_path)
  sce = alias_to_symbol_SCE(sce, "human") %>% makenames_SCE()
  sce = sce[, SummarizedExperiment::colData(sce)[,group_id] %in% contrast_tbl$group]

   ## check abundance info
  senders_oi = SummarizedExperiment::colData(sce)[,celltype_id] %>% unique()
  receivers_oi = SummarizedExperiment::colData(sce)[,celltype_id] %>% unique()

  print(SummarizedExperiment::colData(sce)[,celltype_id] %>% unique())

  abundance_info = get_abundance_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, batches = batches)

  ## check for condition-specific cell types
  sample_group_celltype_df = abundance_info$abundance_data %>% filter(n > min_cells) %>% ungroup() %>% distinct(sample_id, group_id) %>% cross_join(abundance_info$abundance_data %>% ungroup() %>% distinct(celltype_id)) %>% arrange(sample_id)
  abundance_df = sample_group_celltype_df %>% left_join(abundance_info$abundance_data %>% ungroup())
  abundance_df$n[is.na(abundance_df$n)] = 0
  abundance_df$keep[is.na(abundance_df$keep)] = FALSE
  abundance_df_summarized = abundance_df %>% mutate(keep = as.logical(keep)) %>% group_by(group_id, celltype_id) %>% summarise(samples_present = sum((keep)))
  celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique()
  celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present >= 2) %>% pull(celltype_id) %>% unique()
  condition_specific_celltypes = intersect(celltypes_absent_one_condition, celltypes_present_one_condition)

  total_nr_conditions = SummarizedExperiment::colData(sce)[,group_id] %>% unique() %>% length() 
  absent_celltypes = abundance_df_summarized %>% dplyr::filter(samples_present < 2) %>% dplyr::group_by(celltype_id) %>% dplyr::count() %>% dplyr::filter(n == total_nr_conditions) %>% dplyr::pull(celltype_id)

  print("condition-specific celltypes:")
  print(condition_specific_celltypes)

  print("absent celltypes:")
  print(absent_celltypes)

  senders_oi = senders_oi %>% setdiff(absent_celltypes)
  receivers_oi = receivers_oi %>% setdiff(absent_celltypes)

  retained_celltypes = union(senders_oi, receivers_oi) 

  sce = sce[, SummarizedExperiment::colData(sce)[,celltype_id] %in% retained_celltypes]

  nr_celltypes = SummarizedExperiment::colData(sce)[,celltype_id] %>% unique() %>% length()
  nr_cells = sce@assays@data$counts %>% colnames() %>% length()
  nr_genes = sce@assays@data$counts %>% rownames() %>% length()
  nr_samples = SummarizedExperiment::colData(sce)[,sample_id] %>% unique() %>% length()

  print("nr_celltypes")
  print(nr_celltypes)
  print("nr_cells")
  print(nr_cells)
  print("nr_genes")
  print(nr_genes)
  print("nr_samples")
  print(nr_samples)

  ### DE analysis
  ## define expressed genes
  frq_list = get_frac_exprs(sce = sce, sample_id = sample_id, celltype_id =  celltype_id, group_id = group_id, batches = batches, min_cells = min_cells, fraction_cutoff = fraction_cutoff, min_sample_prop = 0.5)

  DE_info = get_DE_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, batches = batches, covariates = covariates, contrasts_oi = contrasts_oi, min_cells = min_cells, expressed_df = frq_list$expressed_df)

  markobj = c('---',
             'title: "P-value Histograms"',
             'output: html_document',
             '---',
             '',
             '## hist_pvals',
             '',
             '```r',
             'DE_info$hist_pvals',
             '```',
             ''
             )

  markdown::markdownToHTML(text = knitr::knit(text = markobj), output = paste0(output_path, "plots/hist_pvals_preEvspreNE_01122023.html"))

  DE_info$hist_pvals = NULL

  if(empirical_pval == TRUE){
    DE_info_emp = get_empirical_pvals(DE_info$celltype_de$de_output_tidy)
    comparison_plots = compare_normal_emp_pvals(DE_info, DE_info_emp, adj_pval = FALSE)
    celltype_de = DE_info_emp$de_output_tidy_emp %>% dplyr::select(-p_val, -p_adj) %>% dplyr::rename(p_val = p_emp, p_adj = p_adj_emp)
    markobj = c('---',
             'title: "Empericial p-value Plots"',
             'output: html_document',
             '---',
             '',
             '## comparison_plots',
             '',
             '```r',
             'comparison_plots',
             '```',
              '',
             '## hist_pvals_emp',
             '',
             '```r',
             'DE_info_emp$hist_pvals_emp',
             '```',
             '',
             '## z_distr_plots_emp_pval',
             '',
             '```r',
             'DE_info_emp$z_distr_plots_emp_pval',
             '```',
             ''
             )

    markdown::markdownToHTML(text = knitr::knit(text = markobj), output = paste0(output_path, "plots/emp_DE_plots_preEvspreNE_01122023.html"))
  } else {
    celltype_de = DE_info$celltype_de$de_output_tidy
  }

  print(celltype_de %>% dplyr::group_by(cluster_id, contrast) %>% dplyr::filter(p_adj <= p_val_threshold & abs(logFC) >= logFC_threshold) %>% dplyr::count() %>% dplyr::arrange(-n))

  senders_oi = celltype_de$cluster_id %>% unique()
  receivers_oi = celltype_de$cluster_id %>% unique()
  genes_oi = celltype_de$gene %>% unique()

  retained_celltypes = c(senders_oi, receivers_oi) %>% unique()
  retained_celltypes = c(retained_celltypes, condition_specific_celltypes) 

  print("retained cell types")
  print(retained_celltypes)

  sce = sce[genes_oi, SummarizedExperiment::colData(sce)[,celltype_id] %in% retained_celltypes]

  print(SummarizedExperiment::colData(sce)[,celltype_id] %>% unique())

  sender_receiver_de = combine_sender_receiver_de(
    sender_de = celltype_de,
    receiver_de = celltype_de,
    senders_oi = senders_oi,
    receivers_oi = receivers_oi,
    lr_network = lr_network
  )
  sender_receiver_tbl = sender_receiver_de %>% dplyr::distinct(sender, receiver)

  metadata_combined = SummarizedExperiment::colData(sce) %>% tibble::as_tibble()

  if(!is.na(batches)){
    grouping_tbl = metadata_combined[,c(sample_id, group_id, batches)] %>% tibble::as_tibble() %>% dplyr::distinct()
    colnames(grouping_tbl) = c("sample","group",batches)
  } else {
    grouping_tbl = metadata_combined[,c(sample_id, group_id)] %>% tibble::as_tibble() %>% dplyr::distinct()
    colnames(grouping_tbl) = c("sample","group")
  }
   ### abundance_expression_info

  abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = union(senders_oi, condition_specific_celltypes), receivers_oi = union(receivers_oi, condition_specific_celltypes), lr_network = lr_network, batches = batches, frq_list = frq_list, abundance_info = abundance_info)

  markobj = c('---',
             'title: "Abundance Plots"',
             'output: html_document',
             '---',
             '',
             '## abund_plot_sample',
             '',
             '```r',
             'abundance_info$abund_plot_sample',
             '```',
              '',
             '## abund_plot_group',
             '',
             '```r',
             'abundance_info$abund_plot_group',
             '```',
             '',
             '## abund_barplot',
             '',
             '```r',
             'abundance_info$abund_barplot',
             '```',
             ''
             )

  markdown::markdownToHTML(text = knitr::knit(text = markobj), output = paste0(output_path, "plots/abundance_plots_preEvspreNE_01122023.html"))

  rm(sce)

  ### ligand activities

  n.cores = min(4, length(receivers_oi))

  ligand_activities_targets_DEgenes = suppressMessages(suppressWarnings(get_ligand_activities_targets_DEgenes(
    receiver_de = celltype_de,
    receivers_oi = receivers_oi,
    ligand_target_matrix = ligand_target_matrix,
    logFC_threshold = logFC_threshold,
    p_val_threshold = p_val_threshold,
    p_val_adj = p_val_adj,
    top_n_target = top_n_target,
    verbose = TRUE, 
    n.cores = n.cores
  )))

  ### save intermediary output
  # list(abundance_expression_info = abundance_expression_info, grouping_tbl = grouping_tbl, sender_receiver_de = sender_receiver_de, ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes, contrast_tbl = contrast_tbl, sender_receiver_tbl = sender_receiver_tbl, celltype_de = celltype_de) %>% saveRDS(paste0(output_path,"output/MNN_BCA_EvsNE_intermediary_output.rds"))

  ### Prioritization tables
  prioritization_tables = suppressMessages(generate_prioritization_tables(
  sender_receiver_info = abundance_expression_info$sender_receiver_info,
  sender_receiver_de = sender_receiver_de,
  ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes,
  contrast_tbl = contrast_tbl,
  sender_receiver_tbl = sender_receiver_tbl,
  grouping_tbl = grouping_tbl,
  scenario = "regular", # all prioritization criteria will be weighted equally
  fraction_cutoff = fraction_cutoff, 
  abundance_data_receiver = abundance_expression_info$abundance_data_receiver,
  abundance_data_sender = abundance_expression_info$abundance_data_sender,
  ligand_activity_down = TRUE 
))

  ## correlation
  lr_target_prior_cor = lr_target_prior_cor_inference(prioritization_tables$group_prioritization_tbl$receiver %>% unique(), abundance_expression_info, celltype_de, grouping_tbl, prioritization_tables, ligand_target_matrix, logFC_threshold = logFC_threshold, p_val_threshold = p_val_threshold, p_val_adj = p_val_adj)

  ## save output

  if(length(condition_specific_celltypes) > 0) {
    print("There are condition specific cell types in the data. Continuing with the regular MultiNicheNet analysis will not include those. If preferred, the user can apply a specific worfklow tailored to analyze CCC events involving condition-specific cell types")
    print(condition_specific_celltypes)
    prioritization_tables_with_condition_specific_celltype_sender = prioritize_condition_specific_sender(
  abundance_info = abundance_info,
  abundance_expression_info = abundance_expression_info, 
  condition_specific_celltypes = condition_specific_celltypes, 
  grouping_tbl = grouping_tbl, 
  fraction_cutoff = fraction_cutoff, 
  contrast_tbl = contrast_tbl, 
  sender_receiver_de = sender_receiver_de, 
  lr_network = lr_network, 
  ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes,
  scenario = "regular",
  ligand_activity_down = TRUE
)
    prioritization_tables_with_condition_specific_celltype_receiver = prioritize_condition_specific_receiver(
  abundance_info = abundance_info,
  abundance_expression_info = abundance_expression_info, 
  condition_specific_celltypes = condition_specific_celltypes, 
  grouping_tbl = grouping_tbl, 
  fraction_cutoff = fraction_cutoff, 
  contrast_tbl = contrast_tbl, 
  sender_receiver_de = sender_receiver_de, 
  lr_network = lr_network, 
  ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes,
  scenario = "regular",
  ligand_activity_down = TRUE
)
  combined_prioritization_tables = list(
  group_prioritization_tbl = bind_rows(
    prioritization_tables_with_condition_specific_celltype_receiver$group_prioritization_tbl %>% filter(receiver %in% condition_specific_celltypes),
    prioritization_tables_with_condition_specific_celltype_sender$group_prioritization_tbl %>% filter(sender %in% condition_specific_celltypes)
  ) %>% bind_rows(prioritization_tables$group_prioritization_tbl) %>% arrange(-prioritization_score) %>% distinct()
  )

  multinichenet_output = list(
    celltype_info = abundance_expression_info$celltype_info,
    celltype_de = celltype_de,
    ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes,
    prioritization_tables = prioritization_tables,
    prioritization_tables_with_condition_specific_celltype_sender = prioritization_tables_with_condition_specific_celltype_sender, 
    prioritization_tables_with_condition_specific_celltype_receiver = prioritization_tables_with_condition_specific_celltype_receiver, 
    combined_prioritization_tables = combined_prioritization_tables,
    grouping_tbl = grouping_tbl,
    lr_target_prior_cor = lr_target_prior_cor
  ) 

  multinichenet_output = make_lite_output_condition_specific(multinichenet_output)


} else {
    print("There are no condition specific cell types in the data. MultiNicheNet analysis is performed in the regular way for all cell types.")
    multinichenet_output = list(
      celltype_info = abundance_expression_info$celltype_info,
      celltype_de = celltype_de,
      ligand_activities_targets_DEgenes = ligand_activities_targets_DEgenes,
      prioritization_tables = prioritization_tables,
      grouping_tbl = grouping_tbl,
      lr_target_prior_cor = lr_target_prior_cor
  ) 
    multinichenet_output = make_lite_output(multinichenet_output)
}

  saveRDS(multinichenet_output, paste0(output_path,"output/MNN_BCA_preEvspreNE_final_output.rds"))

  return(multinichenet_output)

}
qsub_config = create_qsub_config(
  remote = "robinb@prism.psb.ugent.be:7777",
  local_tmp_path = "/Users/robinb/r2gridengine", 
  remote_tmp_path = "/scratch/irc/personal/robinb/r2gridengine",
  modules = "R/x86_64/4.1.3", # 4.0.3??
  memory = "240G",
  wait = FALSE,
  remove_tmp_folder = FALSE,
  name = "MNN-BCA",
  max_wall_time = "500:00:00",
  stop_on_error = TRUE,
  num_cores = 1
)
job_MNN_EvsNE = qsub_lapply(X = 1, FUN = multinichenet_wrapper,
                           object_envir = environment(multinichenet_wrapper),
                           qsub_config = qsub_config,
                           qsub_environment = NULL,
                           qsub_packages = NULL, 
                            sce_path, output_path, celltype_id, sample_id, group_id, contrasts_oi, contrast_tbl, batches, covariates, min_cells, logFC_threshold, p_val_threshold, fraction_cutoff, p_val_adj, empirical_pval, top_n_target)


saeyslab/multinichenetr documentation built on Jan. 15, 2025, 7:55 p.m.