Giotto: Spatial Single-Cell Transcriptomics Toolbox

Documented in make_simulated_network

# cell type proximity enrichment ####

#' @title make_simulated_network
#' @name make_simulated_network
#' @description Simulate random network.
#' @keywords internal
make_simulated_network = function(gobject,
                                  spatial_network_name = 'Delaunay_network',
                                  cluster_column,
                                  number_of_simulations = 100,
                                  set_seed = TRUE,
                                  seed_number = 1234) {


  # data.table variables
  unified_cells = NULL

  spatial_network_annot = annotateSpatialNetwork(gobject = gobject,
                                                 spatial_network_name = spatial_network_name,
                                                 cluster_column = cluster_column)

  # remove double edges between same cells #
  spatial_network_annot = sort_combine_two_DT_columns(spatial_network_annot,
                                                      column1 = 'from', column2 = 'to',
                                                      myname = 'unified_cells')
  spatial_network_annot = spatial_network_annot[!duplicated(unified_cells)]

  # create a simulated network
  length_ints = nrow(spatial_network_annot)
  s1_list = list()
  s2_list = list()

  all_cell_type = c(spatial_network_annot$from_cell_type, spatial_network_annot$to_cell_type)
  middle_point = length(all_cell_type)/2

  for(sim in 1:number_of_simulations) {

    if(set_seed == TRUE) {
      seed_number = seed_number+sim
      set.seed(seed = seed_number)
    }

    reshuffled_all_cell_type = sample(x = all_cell_type, size = length(all_cell_type), replace = F)

    new_from_cell_type = reshuffled_all_cell_type[1:middle_point]
    s1_list[[sim]] = new_from_cell_type

    new_to_cell_type = reshuffled_all_cell_type[(middle_point+1):length(all_cell_type)]
    s2_list[[sim]] = new_to_cell_type

  }

  s1_vector = do.call('c', s1_list)
  s2_vector = do.call('c', s2_list)
  round_vector = rep(x = 1:number_of_simulations, each = length_ints)
  round_vector = paste0('sim',round_vector)

  # data.table variables
  s1 = s2 = unified_int = type_int = NULL

  sample_dt = data.table::data.table(s1 = s1_vector, s2 = s2_vector, round = round_vector)
  uniq_sim_comb = unique(sample_dt[,.(s1,s2)])
  uniq_sim_comb[, unified_int := paste(sort(c(s1,s2)), collapse = '--'), by  = 1:nrow(uniq_sim_comb)]
  sample_dt[uniq_sim_comb, unified_int := unified_int, on = c(s1 = 's1', s2 = 's2')]
  sample_dt[, type_int := ifelse(s1 == s2, 'homo', 'hetero')]

  return(sample_dt)

}


#' @title cellProximityEnrichment
#' @name cellProximityEnrichment
#' @description Compute cell-cell interaction enrichment (observed vs expected)
#' @param gobject giotto object
#' @param spatial_network_name name of spatial network to use
#' @param cluster_column name of column to use for clusters
#' @param number_of_simulations number of simulations to create expected observations
#' @param adjust_method method to adjust p.values
#' @param set_seed use of seed
#' @param seed_number seed number to use
#' @return List of cell Proximity scores (CPscores) in data.table format. The first
#' data.table (raw_sim_table) shows the raw observations of both the original and
#' simulated networks. The second data.table (enrichm_res) shows the enrichment results.
#' @details Spatial proximity enrichment or depletion between pairs of cell types
#' is calculated by calculating the observed over the expected frequency
#' of cell-cell proximity interactions. The expected frequency is the average frequency
#' calculated from a number of spatial network simulations. Each individual simulation is
#' obtained by reshuffling the cell type labels of each node (cell)
#' in the spatial network.
#' @export
cellProximityEnrichment <- function(gobject,
                                    spatial_network_name = 'Delaunay_network',
                                    cluster_column,
                                    number_of_simulations = 1000,
                                    adjust_method = c("none", "fdr", "bonferroni","BH",
                                                      "holm", "hochberg", "hommel",
                                                      "BY"),
                                    set_seed = TRUE,
                                    seed_number = 1234) {


  # p.adj test
  sel_adjust_method = match.arg(adjust_method, choices = c("none", "fdr", "bonferroni","BH",
                                                           "holm", "hochberg", "hommel",
                                                           "BY"))

  spatial_network_annot = annotateSpatialNetwork(gobject = gobject,
                                                 spatial_network_name = spatial_network_name,
                                                 cluster_column = cluster_column)

  # remove double edges between same cells #
  # a simplified network does not have double edges between cells #
  # spatial_network_annot[, unified_cells := paste(sort(c(to,from)), collapse = '--'), by = 1:nrow(spatial_network_annot)]

  # data.table variables
  unified_cells = type_int = N = NULL

  spatial_network_annot = sort_combine_two_DT_columns(spatial_network_annot, 'to', 'from', 'unified_cells')
  spatial_network_annot = spatial_network_annot[!duplicated(unified_cells)]

  sample_dt = make_simulated_network(gobject = gobject,
                                     spatial_network_name = spatial_network_name,
                                     cluster_column = cluster_column,
                                     number_of_simulations = number_of_simulations,
                                     set_seed = set_seed,
                                     seed_number = seed_number)

  # combine original and simulated network
  table_sim_results = sample_dt[, .N, by = c('unified_int', 'type_int', 'round')]

  ## create complete simulations
  ## add 0 if no single interaction was found
  unique_ints = unique(table_sim_results[,.(unified_int, type_int)])

  # data.table with 0's for all interactions
  minimum_simulations = unique_ints[rep(seq_len(nrow(unique_ints)), number_of_simulations), ]
  minimum_simulations[, round := rep(paste0('sim',1:number_of_simulations), each = nrow(unique_ints))]
  minimum_simulations[, N := 0]

  table_sim_minimum_results = rbind(table_sim_results, minimum_simulations)
  table_sim_minimum_results[, V1 := sum(N), by  = c('unified_int', 'type_int', 'round')]
  table_sim_minimum_results = unique(table_sim_minimum_results[,.(unified_int, type_int, round, V1)])
  table_sim_results = table_sim_minimum_results


  # data.table variables
  orig = unified_int = V1 = original = enrichm = simulations = NULL

  table_sim_results[, orig := 'simulations']
  spatial_network_annot[, round := 'original']

  table_orig_results = spatial_network_annot[, .N, by  = c('unified_int', 'type_int', 'round')]
  table_orig_results[, orig := 'original']
  data.table::setnames(table_orig_results, old = 'N', new = 'V1')

  table_results = rbind(table_orig_results, table_sim_results)



  # add missing combinations from original or simulations
  # probably not needed anymore
  all_simulation_ints = as.character(unique(table_results[orig == 'simulations']$unified_int))
  all_original_ints = as.character(unique(table_results[orig == 'original']$unified_int))
  missing_in_original = all_simulation_ints[!all_simulation_ints %in% all_original_ints]
  missing_in_simulations = all_original_ints[!all_original_ints %in% all_simulation_ints]
  create_missing_for_original = table_results[unified_int %in% missing_in_original]
  create_missing_for_original = unique(create_missing_for_original[, c('orig', 'V1') := list('original', 0)])
  create_missing_for_simulations = table_results[unified_int %in% missing_in_simulations]
  create_missing_for_simulations = unique(create_missing_for_simulations[, c('orig', 'V1') := list('simulations', 0)])

  table_results <- do.call('rbind', list(table_results, create_missing_for_original, create_missing_for_simulations))


  ## p-values
  combo_list = rep(NA, length = length(unique(table_results$unified_int)))
  p_high = rep(NA, length = length(unique(table_results$unified_int)))
  p_low = rep(NA, length = length(unique(table_results$unified_int)))

  for(int_combo in 1:length(unique(table_results$unified_int))) {

    this_combo = as.character(unique(table_results$unified_int)[int_combo])

    sub = table_results[unified_int == this_combo]

    orig_value = sub[orig == 'original']$V1
    sim_values = sub[orig == 'simulations']$V1

    length_simulations = length(sim_values)
    if(length_simulations != number_of_simulations) {
      additional_length_needed = number_of_simulations-length_simulations
      sim_values = c(sim_values, rep(0, additional_length_needed))
      #length_simulations = c(length_simulations, rep(0, additional_length_needed))
    }

    p_orig_higher = 1 - (sum((orig_value+1) > (sim_values+1))/number_of_simulations)
    p_orig_lower = 1 - (sum((orig_value+1) < (sim_values+1))/number_of_simulations)

    combo_list[[int_combo]] = this_combo
    p_high[[int_combo]] = p_orig_higher
    p_low[[int_combo]] = p_orig_lower

  }
  res_pvalue_DT = data.table::data.table(unified_int = as.vector(combo_list), p_higher_orig = p_high, p_lower_orig = p_low)


  # depletion or enrichment in barplot format
  table_mean_results <- table_results[, .(mean(V1)), by = c('orig', 'unified_int', 'type_int')]
  table_mean_results_dc <- data.table::dcast.data.table(data = table_mean_results, formula = type_int+unified_int~orig, value.var = 'V1')
  table_mean_results_dc[, original := ifelse(is.na(original), 0, original)]
  table_mean_results_dc[, enrichm := log2((original+1)/(simulations+1))]


  table_mean_results_dc <- merge(table_mean_results_dc, res_pvalue_DT, by = 'unified_int')
  data.table::setorder(table_mean_results_dc, enrichm)
  table_mean_results_dc[, unified_int := factor(unified_int, unified_int)]

  # adjust p-values for mht

  # data.table variables
  p.adj_higher = p.adj_lower = p_lower_orig = p_higher_orig = PI_value = int_ranking = NULL

  table_mean_results_dc[, p.adj_higher := stats::p.adjust(p_higher_orig, method = sel_adjust_method)]
  table_mean_results_dc[, p.adj_lower := stats::p.adjust(p_lower_orig, method = sel_adjust_method)]


  table_mean_results_dc[, PI_value := ifelse(p.adj_higher <= p.adj_lower,
                                             -log10(p.adj_higher+(1/number_of_simulations))*enrichm,
                                             -log10(p.adj_lower+(1/number_of_simulations))*enrichm)]
  data.table::setorder(table_mean_results_dc, PI_value)

  # order
  table_mean_results_dc <- table_mean_results_dc[order(-PI_value)]
  table_mean_results_dc[, int_ranking := 1:.N]

  return(list(raw_sim_table = table_results, enrichm_res = table_mean_results_dc))

}


#' @title addCellIntMetadata
#' @name addCellIntMetadata
#' @description Creates an additional metadata column with information about interacting and non-interacting cell types of the
#' selected cell-cell interaction.
#' @param gobject giotto object
#' @param spatial_network name of spatial network to use
#' @param cluster_column column of cell types
#' @param cell_interaction cell-cell interaction to use
#' @param name name for the new metadata column
#' @param return_gobject return an updated giotto object
#' @return Giotto object
#' @details This function will create an additional metadata column which selects interacting cell types for a specific cell-cell
#' interaction. For example, if you want to color interacting astrocytes and oligodendrocytes it will create a new metadata column with
#' the values "select_astrocytes", "select_oligodendrocytes", "other_astrocytes", "other_oligodendroyctes" and "other". Where "other" is all
#' other cell types found within the selected cell type column.
#' @export
addCellIntMetadata = function(gobject,
                              spatial_network = 'spatial_network',
                              cluster_column,
                              cell_interaction,
                              name = 'select_int',
                              return_gobject = TRUE) {

  if(is.null(spatial_network)) {
    stop('spatial_network must be provided, this must be an existing spatial network \n')
  }

  if(is.null(cluster_column)) {
    stop('cluster_column must be provided, this must be an existing cell metadata column, see pData(your_giotto_object) \n')
  }

  if(is.null(cell_interaction)) {
    stop('cell_interaction must be provided, this must be cell--cell interaction between cell types in cluster_column \n')
  }

  # create spatial network
  spatial_network_annot = annotateSpatialNetwork(gobject = gobject,
                                                 spatial_network_name = spatial_network,
                                                 cluster_column = cluster_column)

  # selected vs other cells

  # data.table variables
  unified_int = cell_ID = NULL

  selected_cells = unique(c(spatial_network_annot[unified_int == cell_interaction]$to,
                            spatial_network_annot[unified_int == cell_interaction]$from))

  cell_metadata = data.table::copy(pDataDT(gobject))
  cell_type_1 = strsplit(cell_interaction, split = '--')[[1]][1]
  cell_type_2 = strsplit(cell_interaction, split = '--')[[1]][2]

  cell_metadata[, c(name) := ifelse(!get(cluster_column) %in% c(cell_type_1, cell_type_2), 'other',
                                    ifelse(get(cluster_column) == cell_type_1 & cell_ID %in% selected_cells, paste0("select_", cell_type_1),
                                           ifelse(get(cluster_column) == cell_type_2 & cell_ID %in% selected_cells, paste0("select_", cell_type_2),
                                                  ifelse(get(cluster_column) == cell_type_1, paste0("other_", cell_type_1), paste0("other_", cell_type_2)))))]

  if(return_gobject == TRUE) {
    gobject@cell_metadata = cell_metadata
    return(gobject)
  } else {
    return(cell_metadata)
  }

}




# * ####
# CPG cell proximity gene changes ####

#' @title do_ttest
#' @name do_ttest
#' @description Performs t.test on subsets of a matrix
#' @keywords internal
do_ttest = function(expr_values,
                    select_ind,
                    other_ind,
                    adjust_method,
                    mean_method,
                    offset = 0.1) {

  # data.table variables
  p.value = p.adj = NULL

  mean_sel = my_rowMeans(expr_values[,select_ind], method = mean_method, offset = offset)
  mean_all = my_rowMeans(expr_values[,other_ind], method = mean_method, offset = offset)

  #if(length(select_ind) == 1){mean_sel = expr_values[,select_ind]} else{mean_sel = rowMeans(expr_values[,select_ind])}
  #if(length(other_ind) == 1){mean_all = expr_values[,other_ind]} else{mean_all = rowMeans(expr_values[,other_ind])}

  if(length(select_ind) == 1 | length(other_ind) == 1) {
    results = NaN
  } else {
    results = apply(expr_values, MARGIN = 1, function(x) {
      p.value = stats::t.test(x[select_ind], x[other_ind])$p.value
    })
  }

  # other info
  log2fc = log2((mean_sel+offset)/(mean_all+offset))
  diff = mean_sel - mean_all

  resultsDT = data.table('genes' = rownames(expr_values), 'sel' = mean_sel, 'other' = mean_all, 'log2fc' = log2fc, 'diff' = diff, 'p.value' = unlist(results))
  resultsDT[, p.value := ifelse(is.nan(p.value), 1, p.value)]
  resultsDT[, p.adj := stats::p.adjust(p.value, method = adjust_method)]
  setorder(resultsDT, p.adj)

  return(resultsDT)
}

#' @title do_limmatest
#' @name do_limmatest
#' @description Performs limma t.test on subsets of a matrix
#' @keywords internal
do_limmatest = function(expr_values,
                        select_ind,
                        other_ind,
                        mean_method,
                        offset = 0.1) {

  # data.table variables
  sel = other = genes = P.Value = adj.P.Val = p.adj = NULL

  expr_values_subset = cbind(expr_values[,select_ind], expr_values[,other_ind])
  mygroups = c(rep('sel', length(select_ind)), rep('other', length(other_ind)))
  mygroups = factor(mygroups, levels = unique(mygroups))

  design = stats::model.matrix(~0+mygroups)
  colnames(design) = levels(mygroups)
  fit = limma::lmFit(expr_values_subset, design = design)


  cont.matrix = limma::makeContrasts(
    sel_vs_other =  sel-other,
    levels = design
  )

  fitcontrast = limma::contrasts.fit(fit,  cont.matrix)
  fitc_ebayes = limma::eBayes(fitcontrast)

  # limma to DT
  limma_result = limma::topTable(fitc_ebayes, coef = 1,number = 100000, confint = T)
  limmaDT = data.table::as.data.table(limma_result); limmaDT[, genes := rownames(limma_result)]

  # other info
  mean_sel = my_rowMeans(expr_values[,select_ind], method = mean_method, offset = offset)
  mean_all = my_rowMeans(expr_values[,other_ind], method = mean_method, offset = offset)

  #if(length(select_ind) == 1){mean_sel = expr_values[,select_ind]} else{mean_sel = rowMeans(expr_values[,select_ind])}
  #if(length(other_ind) == 1){mean_all = expr_values[,other_ind]} else{mean_all = rowMeans(expr_values[,other_ind])}

  log2fc = log2((mean_sel+offset)/(mean_all+offset))
  diff = mean_sel - mean_all

  tempDT = data.table::data.table('genes' = rownames(expr_values),
                                  'sel'= mean_sel,
                                  'other' = mean_all,
                                  'log2fc' = log2fc,
                                  'diff' =  diff)
  limmaDT = data.table::merge.data.table(limmaDT, tempDT, by = 'genes')
  limmaDT = limmaDT[,.(genes, sel, other, log2fc, diff, P.Value, adj.P.Val)]
  colnames(limmaDT) = c('genes', 'sel', 'other', 'log2fc', 'diff', 'p.value', 'p.adj')

  setorder(limmaDT, p.adj)

  return(limmaDT)

}

#' @title do_wilctest
#' @name do_wilctest
#' @description Performs wilcoxon on subsets of a matrix
#' @keywords internal
do_wilctest = function(expr_values, select_ind, other_ind, adjust_method, mean_method, offset = 0.1) {

  # data.table variables
  p.value = p.adj = NULL

  mean_sel = my_rowMeans(expr_values[,select_ind], method = mean_method, offset = offset)
  mean_all = my_rowMeans(expr_values[,other_ind], method = mean_method, offset = offset)

  #if(length(select_ind) == 1){mean_sel = expr_values[,select_ind]} else{mean_sel = rowMeans(expr_values[,select_ind])}
  #if(length(other_ind) == 1){mean_all = expr_values[,other_ind]} else{mean_all = rowMeans(expr_values[,other_ind])}

  if(length(select_ind) == 1 | length(other_ind) == 1) {
    results = NaN
  } else {
    results = apply(expr_values, MARGIN = 1, function(x) {
      p.value = stats::wilcox.test(x[select_ind], x[other_ind])$p.value
    })
  }

  # other info
  log2fc = log2((mean_sel+offset)/(mean_all+offset))
  diff = mean_sel - mean_all

  resultsDT = data.table('genes' = rownames(expr_values), 'sel' = mean_sel, 'other' = mean_all, 'log2fc' = log2fc, 'diff' = diff, 'p.value' = unlist(results))
  resultsDT[, p.value := ifelse(is.nan(p.value), 1, p.value)]
  resultsDT[, p.adj := stats::p.adjust(p.value, method = adjust_method)]
  setorder(resultsDT, p.adj)

  return(resultsDT)

}


#' @title do_permuttest_original
#' @name do_permuttest_original
#' @description calculate original values
#' @keywords internal
do_permuttest_original = function(expr_values,
                                  select_ind,
                                  other_ind,
                                  name = 'orig',
                                  mean_method,
                                  offset = 0.1) {

  # data.table variables
  genes = NULL

  mean_sel = my_rowMeans(expr_values[,select_ind], method = mean_method, offset = offset)
  mean_all = my_rowMeans(expr_values[,other_ind], method = mean_method, offset = offset)

  #if(length(select_ind) == 1){mean_sel = expr_values[,select_ind]} else{mean_sel = rowMeans(expr_values[,select_ind])}
  #if(length(other_ind) == 1){mean_all = expr_values[,other_ind]} else{mean_all = rowMeans(expr_values[,other_ind])}

  log2fc = log2((mean_sel+offset)/(mean_all+offset))
  diff = mean_sel - mean_all

  resultsDT = data.table('sel' = mean_sel, 'other' = mean_all, 'log2fc' = log2fc, 'diff' =  diff)
  resultsDT[, genes := rownames(expr_values)]
  resultsDT[, name := name]

  return(resultsDT)

}

#' @title do_permuttest_random
#' @name do_permuttest_random
#' @description calculate random values
#' @keywords internal
do_permuttest_random = function(expr_values,
                                select_ind,
                                other_ind,
                                name = 'perm_1',
                                mean_method,
                                offset = 0.1,
                                set_seed = TRUE,
                                seed_number = 1234) {

  # data.table variables
  genes = NULL

  l_select_ind = length(select_ind)
  l_other_ind = length(other_ind)

  all_ind = c(select_ind, other_ind)

  if(set_seed == TRUE) {
    set.seed(seed = seed_number)
  }
  random_select = sample(all_ind, size = l_select_ind, replace = F)
  random_other = all_ind[!all_ind %in% random_select]

  # alternative
  mean_sel = my_rowMeans(expr_values[,random_select], method = mean_method, offset = offset)
  mean_all = my_rowMeans(expr_values[,random_other], method = mean_method, offset = offset)
  #if(length(select_ind) == 1){mean_sel = expr_values[,random_select]} else{mean_sel = rowMeans(expr_values[,random_select])}
  #if(length(other_ind) == 1){mean_all = expr_values[,random_other]} else{mean_all = rowMeans(expr_values[,random_other])}

  log2fc = log2((mean_sel+offset)/(mean_all+offset))
  diff = mean_sel - mean_all

  resultsDT = data.table('sel' = mean_sel, 'other' = mean_all, 'log2fc' = log2fc, 'diff' = diff)
  resultsDT[, genes := rownames(expr_values)]
  resultsDT[, name := name]

  return(resultsDT)

}

#' @title do_multi_permuttest_random
#' @name do_multi_permuttest_random
#' @description calculate multiple random values
#' @keywords internal
do_multi_permuttest_random = function(expr_values,
                                      select_ind,
                                      other_ind,
                                      mean_method,
                                      offset = 0.1,
                                      n = 100,
                                      cores = 2,
                                      set_seed = TRUE,
                                      seed_number = 1234) {

  if(set_seed == TRUE) {
    seed_number_list = seed_number:(seed_number + (n-1))
  }

  result = giotto_lapply(X = 1:n, cores = cores, fun = function(x) {

    seed_number = seed_number_list[x]

    perm_rand = do_permuttest_random(expr_values = expr_values,
                                     select_ind = select_ind,
                                     other_ind = other_ind,
                                     name = paste0('perm_', x),
                                     mean_method = mean_method,
                                     offset = offset,
                                     set_seed = set_seed,
                                     seed_number = seed_number)

  })

  final_result = do.call('rbind', result)

}

#' @title do_permuttest_random
#' @name do_permuttest_random
#' @description Performs permutation test on subsets of a matrix
#' @keywords internal
do_permuttest = function(expr_values,
                         select_ind, other_ind,
                         n_perm = 1000,
                         adjust_method = 'fdr',
                         mean_method,
                         offset = 0.1,
                         cores = 2,
                         set_seed = TRUE,
                         seed_number = 1234) {

  # data.table variables
  log2fc_diff = log2fc = sel = other = genes = p_higher = p_lower = perm_sel = NULL
  perm_other = perm_log2fc = perm_diff = p.value = p.adj = NULL

  ## original data
  original = do_permuttest_original(expr_values = expr_values,
                                    select_ind = select_ind, other_ind = other_ind,
                                    name = 'orig',
                                    mean_method = mean_method,
                                    offset = offset)

  ## random permutations
  random_perms = do_multi_permuttest_random(expr_values = expr_values,
                                            n = n_perm,
                                            select_ind = select_ind,
                                            other_ind = other_ind,
                                            mean_method = mean_method,
                                            offset = offset,
                                            cores = cores,
                                            set_seed = set_seed,
                                            seed_number = seed_number)

  ##
  random_perms[, log2fc_diff := rep(original$log2fc, n_perm) - log2fc]
  random_perms[, c('perm_sel', 'perm_other', 'perm_log2fc', 'perm_diff') := list(mean(sel), mean(other), mean(log2fc), mean(diff)), by = genes]

  ## get p-values
  random_perms[, p_higher := sum(log2fc_diff > 0), by = genes]
  random_perms[, p_higher := 1-(p_higher/n_perm)]
  random_perms[, p_lower := sum(log2fc_diff < 0), by = genes]
  random_perms[, p_lower := 1-(p_lower/n_perm)]

  ## combine results permutation and original
  random_perms_res = unique(random_perms[,.(genes, perm_sel, perm_other, perm_log2fc, perm_diff, p_higher, p_lower)])
  results_m = data.table::merge.data.table(random_perms_res, original[,.(genes, sel, other, log2fc, diff)], by = 'genes')

  # select lowest p-value and perform p.adj
  results_m[, p.value := ifelse(p_higher <= p_lower, p_higher, p_lower)]
  results_m[, p.adj := stats::p.adjust(p.value, method = adjust_method)]

  results_m = results_m[,.(genes, sel, other, log2fc, diff, p.value, p.adj, perm_sel, perm_other, perm_log2fc, perm_diff)]
  setorder(results_m, p.adj, -log2fc)

  return(results_m)

}


#' @title do_cell_proximity_test
#' @name do_cell_proximity_test
#' @description Performs a selected differential test on subsets of a matrix
#' @keywords internal
do_cell_proximity_test = function(expr_values,
                                  select_ind, other_ind,
                                  diff_test = c('permutation', 'limma', 't.test', 'wilcox'),
                                  mean_method = c('arithmic', 'geometric'),
                                  offset = 0.1,
                                  n_perm = 100,
                                  adjust_method = c("bonferroni","BH", "holm", "hochberg", "hommel",
                                                    "BY", "fdr", "none"),
                                  cores = 2,
                                  set_seed = TRUE,
                                  seed_number = 1234) {

  # get parameters
  diff_test = match.arg(diff_test, choices = c('permutation', 'limma', 't.test', 'wilcox'))
  adjust_method = match.arg(adjust_method, choices = c("bonferroni","BH", "holm", "hochberg", "hommel",
                                                       "BY", "fdr", "none"))
  mean_method = match.arg(mean_method, choices = c('arithmic', 'geometric'))


  if(diff_test == 'permutation') {
    result = do_permuttest(expr_values = expr_values,
                           select_ind = select_ind, other_ind = other_ind,
                           n_perm = n_perm, adjust_method = adjust_method, cores = cores,
                           mean_method = mean_method, offset = offset,
                           set_seed = set_seed,
                           seed_number = seed_number)

  } else if(diff_test == 'limma') {
    result = do_limmatest(expr_values = expr_values,
                          select_ind = select_ind, other_ind = other_ind,
                          mean_method = mean_method, offset = offset)

  } else if(diff_test == 't.test') {
    result = do_ttest(expr_values = expr_values,
                      select_ind = select_ind, other_ind = other_ind,
                      mean_method = mean_method, offset = offset,
                      adjust_method = adjust_method)

  } else if(diff_test == 'wilcox') {
    result = do_wilctest(expr_values = expr_values,
                         select_ind = select_ind, other_ind = other_ind,
                         mean_method = mean_method, offset = offset,
                         adjust_method = adjust_method)

  }

  return(result)

}



#' @title findCellProximityGenes_per_interaction
#' @name findCellProximityGenes_per_interaction
#' @description Identifies genes that are differentially expressed due to proximity to other cell types.
#' @keywords internal
findCellProximityGenes_per_interaction = function(expr_values,
                                                  cell_metadata,
                                                  annot_spatnetwork,
                                                  sel_int,
                                                  cluster_column = NULL,
                                                  minimum_unique_cells = 1,
                                                  minimum_unique_int_cells = 1,
                                                  exclude_selected_cells_from_test = T,
                                                  diff_test = c('permutation', 'limma', 't.test', 'wilcox'),
                                                  mean_method = c('arithmic', 'geometric'),
                                                  offset = 0.1,
                                                  adjust_method = 'bonferroni',
                                                  nr_permutations = 100,
                                                  cores = 1,
                                                  set_seed = TRUE,
                                                  seed_number = 1234) {


  # data.table variables
  unified_int = to_cell_type = from_cell_type = cell_type = int_cell_type = NULL
  nr_select = int_nr_select = nr_other = int_nr_other = unif_int = NULL

  # select test to perform
  diff_test = match.arg(arg = diff_test, choices = c('permutation', 'limma', 't.test', 'wilcox'))

  # select subnetwork
  sub_spatnetwork = annot_spatnetwork[unified_int == sel_int]

  # unique cell types
  unique_cell_types = unique(c(sub_spatnetwork$to_cell_type, sub_spatnetwork$from_cell_type))

  if(length(unique_cell_types) == 2) {

    first_cell_type = unique_cell_types[1]
    second_cell_type = unique_cell_types[2]

    # first cell type ids
    to1 = sub_spatnetwork[to_cell_type == first_cell_type][['to']]
    from1 = sub_spatnetwork[from_cell_type == first_cell_type][['from']]
    cell1_ids = unique(c(to1, from1))

    # second cell type ids
    to2 = sub_spatnetwork[to_cell_type == second_cell_type][['to']]
    from2 = sub_spatnetwork[from_cell_type == second_cell_type][['from']]
    cell2_ids = unique(c(to2, from2))

    ## all cell ids
    all_cell1 = cell_metadata[get(cluster_column) == first_cell_type][['cell_ID']]
    all_cell2 = cell_metadata[get(cluster_column) == second_cell_type][['cell_ID']]

    ## exclude selected
    if(exclude_selected_cells_from_test == TRUE) {
      all_cell1 = all_cell1[!all_cell1 %in% cell1_ids]
      all_cell2 = all_cell2[!all_cell2 %in% cell2_ids]
    }

    ## FOR CELL TYPE 1
    sel_ind1 = which(colnames(expr_values) %in% cell1_ids)
    all_ind1 = which(colnames(expr_values) %in% all_cell1)

    ## FOR CELL TYPE 2
    sel_ind2 = which(colnames(expr_values) %in% cell2_ids)
    all_ind2 = which(colnames(expr_values) %in% all_cell2)


    ## do not continue if too few cells ##
    if(length(sel_ind1) < minimum_unique_cells | length(all_ind1) < minimum_unique_cells |
       length(sel_ind2) < minimum_unique_int_cells) {
      result_cell_1 = NULL
    } else {
      result_cell_1 = do_cell_proximity_test(expr_values = expr_values,
                                             select_ind = sel_ind1,
                                             other_ind = all_ind1,
                                             diff_test = diff_test,
                                             n_perm = nr_permutations,
                                             mean_method = mean_method,
                                             offset = offset,
                                             adjust_method = adjust_method,
                                             cores = cores,
                                             set_seed = set_seed,
                                             seed_number = seed_number)
      result_cell_1[, cell_type := first_cell_type]
      result_cell_1[, int_cell_type := second_cell_type]
      result_cell_1[, nr_select := length(sel_ind1)]
      result_cell_1[, int_nr_select := length(sel_ind2)]
      result_cell_1[, nr_other := length(all_ind1)]
      result_cell_1[, int_nr_other := length(all_ind2)]

    }


    ## do not continue if too few cells ##
    if(length(sel_ind2) < minimum_unique_cells | length(all_ind2) < minimum_unique_cells |
       length(sel_ind1) < minimum_unique_int_cells) {
      result_cell_2 = NULL
    } else {
      result_cell_2 = do_cell_proximity_test(expr_values = expr_values,
                                             select_ind = sel_ind2, other_ind = all_ind2,
                                             diff_test = diff_test,
                                             n_perm = nr_permutations,
                                             mean_method = mean_method,
                                             offset = offset,
                                             adjust_method = adjust_method,
                                             cores = cores,
                                             set_seed = set_seed,
                                             seed_number = seed_number)
      result_cell_2[, cell_type := second_cell_type]
      result_cell_2[, int_cell_type := first_cell_type]
      result_cell_2[, nr_select := length(sel_ind2)]
      result_cell_2[, int_nr_select := length(sel_ind1)]
      result_cell_2[, nr_other := length(all_ind2)]
      result_cell_2[, int_nr_other := length(all_ind1)]

    }


    ## COMBINE

    if(is.null(result_cell_1) & is.null(result_cell_2)) {
      return(NULL)
    } else {
      result_cells = rbind(result_cell_1, result_cell_2)
    }

  } else if(length(unique_cell_types) == 1) {

    first_cell_type = unique_cell_types[1]

    # first cell type ids
    to1 = sub_spatnetwork[to_cell_type == first_cell_type][['to']]
    from1 = sub_spatnetwork[from_cell_type == first_cell_type][['from']]
    cell1_ids = unique(c(to1, from1))

    ## all cell ids
    all_cell1 = cell_metadata[get(cluster_column) == first_cell_type][['cell_ID']]

    ## exclude selected
    if(exclude_selected_cells_from_test == TRUE) {
      all_cell1 = all_cell1[!all_cell1 %in% cell1_ids]
    }

    ## FOR CELL TYPE 1
    sel_ind1 = which(colnames(expr_values) %in% cell1_ids)
    all_ind1 = which(colnames(expr_values) %in% all_cell1)


    ## do not continue if too few cells ##
    if(length(sel_ind1) < minimum_unique_cells | length(all_ind1) < minimum_unique_cells) {
      return(NULL)
    }

    result_cells = do_cell_proximity_test(expr_values = expr_values,
                                          select_ind = sel_ind1, other_ind = all_ind1,
                                          diff_test = diff_test,
                                          n_perm = nr_permutations,
                                          mean_method = mean_method,
                                          offset = offset,
                                          adjust_method = adjust_method,
                                          cores = cores,
                                          set_seed = set_seed,
                                          seed_number = seed_number)

    result_cells[, cell_type := first_cell_type]
    result_cells[, int_cell_type := first_cell_type]
    result_cells[, nr_select := length(sel_ind1)]
    result_cells[, int_nr_select := length(sel_ind1)]
    result_cells[, nr_other := length(all_ind1)]
    result_cells[, int_nr_other := length(all_ind1)]

  }

  result_cells[, unif_int := sel_int]

  return(result_cells)

}




#' @title findInteractionChangedGenes
#' @name findInteractionChangedGenes
#' @description Identifies cell-to-cell Interaction Changed Genes (ICG),
#' i.e. genes that are differentially expressed due to proximity to other cell types.#'
#' @param gobject giotto object
#' @param expression_values expression values to use
#' @param selected_genes subset of selected genes (optional)
#' @param cluster_column name of column to use for cell types
#' @param spatial_network_name name of spatial network to use
#' @param minimum_unique_cells minimum number of target cells required
#' @param minimum_unique_int_cells minimum number of interacting cells required
#' @param diff_test which differential expression test
#' @param mean_method method to use to calculate the mean
#' @param offset offset value to use when calculating log2 ratio
#' @param adjust_method which method to adjust p-values
#' @param nr_permutations number of permutations if diff_test = permutation
#' @param exclude_selected_cells_from_test exclude interacting cells other cells
#' @param do_parallel run calculations in parallel with mclapply
#' @param cores number of cores to use if do_parallel = TRUE
#' @param set_seed set a seed for reproducibility
#' @param seed_number seed number
#' @return cpgObject that contains the Interaction Changed differential gene scores
#' @details Function to calculate if genes are differentially expressed in cell types
#'  when they interact (approximated by physical proximity) with other cell types.
#'  The results data.table in the cpgObject contains - at least - the following columns:
#' \itemize{
#'  \item{genes:}{ All or selected list of tested genes}
#'  \item{sel:}{ average gene expression in the interacting cells from the target cell type }
#'  \item{other:}{ average gene expression in the NOT-interacting cells from the target cell type }
#'  \item{log2fc:}{ log2 fold-change between sel and other}
#'  \item{diff:}{ spatial expression difference between sel and other}
#'  \item{p.value:}{ associated p-value}
#'  \item{p.adj:}{ adjusted p-value}
#'  \item{cell_type:}{ target cell type}
#'  \item{int_cell_type:}{ interacting cell type}
#'  \item{nr_select:}{ number of cells for selected target cell type}
#'  \item{int_nr_select:}{ number of cells for interacting cell type}
#'  \item{nr_other:}{ number of other cells of selected target cell type}
#'  \item{int_nr_other:}{ number of other cells for interacting cell type}
#'  \item{unif_int:}{ cell-cell interaction}
#' }
#' @export
findInteractionChangedGenes = function(gobject,
                                       expression_values = 'normalized',
                                       selected_genes = NULL,
                                       cluster_column,
                                       spatial_network_name = 'Delaunay_network',
                                       minimum_unique_cells = 1,
                                       minimum_unique_int_cells = 1,
                                       diff_test = c('permutation', 'limma', 't.test', 'wilcox'),
                                       mean_method = c('arithmic', 'geometric'),
                                       offset = 0.1,
                                       adjust_method = c("bonferroni","BH", "holm", "hochberg", "hommel",
                                                         "BY", "fdr", "none"),
                                       nr_permutations = 1000,
                                       exclude_selected_cells_from_test = T,
                                       do_parallel = TRUE,
                                       cores = NA,
                                       set_seed = TRUE,
                                       seed_number = 1234) {



  # expression values to be used
  values = match.arg(expression_values, c('normalized', 'scaled', 'custom'))
  expr_values = select_expression_values(gobject = gobject, values = values)

  ## test selected genes ##
  if(!is.null(selected_genes)) {
    expr_values = expr_values[rownames(expr_values) %in% selected_genes, ]
  }

  ## stop test selected genes ##

  # difference test
  diff_test = match.arg(diff_test, choices = c('permutation', 'limma', 't.test', 'wilcox'))

  # p.adj test
  adjust_method = match.arg(adjust_method, choices = c("bonferroni","BH", "holm", "hochberg", "hommel",
                                                       "BY", "fdr", "none"))
  # how to calculate mean
  mean_method = match.arg(mean_method, choices = c('arithmic', 'geometric'))

  ## metadata
  cell_metadata = pDataDT(gobject)

  ## annotated spatial network
  annot_spatnetwork = annotateSpatialNetwork(gobject,
                                             spatial_network_name = spatial_network_name,
                                             cluster_column = cluster_column)


  all_interactions = unique(annot_spatnetwork$unified_int)

  if(do_parallel == TRUE) {


    fin_result = giotto_lapply(X = all_interactions, cores = cores, fun = function(x) {

      tempres = findCellProximityGenes_per_interaction(expr_values = expr_values,
                                                       cell_metadata = cell_metadata,
                                                       annot_spatnetwork = annot_spatnetwork,
                                                       minimum_unique_cells = minimum_unique_cells,
                                                       minimum_unique_int_cells = minimum_unique_int_cells,
                                                       sel_int = x,
                                                       cluster_column = cluster_column,
                                                       exclude_selected_cells_from_test = exclude_selected_cells_from_test,
                                                       diff_test = diff_test,
                                                       mean_method = mean_method,
                                                       offset = offset,
                                                       adjust_method = adjust_method,
                                                       nr_permutations = nr_permutations,
                                                       cores = 2,
                                                       set_seed = set_seed,
                                                       seed_number = seed_number)


    })


  } else {

    fin_result = list()

    for(i in 1:length(all_interactions)) {

      x = all_interactions[i]
      print(x)

      tempres = findCellProximityGenes_per_interaction(expr_values = expr_values,
                                                       cell_metadata = cell_metadata,
                                                       annot_spatnetwork = annot_spatnetwork,
                                                       minimum_unique_cells = minimum_unique_cells,
                                                       minimum_unique_int_cells = minimum_unique_int_cells,
                                                       sel_int = x,
                                                       cluster_column = cluster_column,
                                                       exclude_selected_cells_from_test = exclude_selected_cells_from_test,
                                                       diff_test = diff_test,
                                                       mean_method = mean_method,
                                                       offset = offset,
                                                       adjust_method = adjust_method,
                                                       nr_permutations = nr_permutations,
                                                       cores = 2,
                                                       set_seed = set_seed,
                                                       seed_number = seed_number)

      fin_result[[i]] = tempres

    }


  }

  final_result = do.call('rbind', fin_result)


  # data.table variables
  spec_int = cell_type = int_cell_type = type_int = NULL

  final_result[, spec_int := paste0(cell_type,'--',int_cell_type)]
  final_result[, type_int := ifelse(cell_type == int_cell_type, 'homo', 'hetero')]


  #return(final_result)

  permutation_test = ifelse(diff_test == 'permutation', nr_permutations, 'no permutations')

  cpgObject = list(CPGscores = final_result,
                   Giotto_info = list('values' = values,
                                      'cluster' = cluster_column,
                                      'spatial network' = spatial_network_name),
                   test_info = list('test' = diff_test,
                                    'p.adj' = adjust_method,
                                    'min cells' = minimum_unique_cells,
                                    'min interacting cells' = minimum_unique_int_cells,
                                    'exclude selected cells' = exclude_selected_cells_from_test,
                                    'perm' = permutation_test))
  class(cpgObject) = append(class(cpgObject), 'cpgObject')
  return(cpgObject)

}


#' @title findCellProximityGenes
#' @description Identifies cell-to-cell Interaction Changed Genes (ICG),
#' i.e. genes that are differentially expressed due to proximity to other cell types.
#' @inheritDotParams findInteractionChangedGenes
#' @seealso \code{\link{findInteractionChangedGenes}}
#' @export
findCellProximityGenes <- function(...) {

  .Deprecated(new = "findInteractionChangedGenes")

  findInteractionChangedGenes(...)

}


#' @title findICG
#' @name findICG
#' @description Identifies cell-to-cell Interaction Changed Genes (ICG),
#' i.e. genes that are differentially expressed due to proximity to other cell types.
#' @param gobject giotto object
#' @param expression_values expression values to use
#' @param selected_genes subset of selected genes (optional)
#' @param cluster_column name of column to use for cell types
#' @param spatial_network_name name of spatial network to use
#' @param minimum_unique_cells minimum number of target cells required
#' @param minimum_unique_int_cells minimum number of interacting cells required
#' @param diff_test which differential expression test
#' @param mean_method method to use to calculate the mean
#' @param offset offset value to use when calculating log2 ratio
#' @param adjust_method which method to adjust p-values
#' @param nr_permutations number of permutations if diff_test = permutation
#' @param exclude_selected_cells_from_test exclude interacting cells other cells
#' @param do_parallel run calculations in parallel with mclapply
#' @param cores number of cores to use if do_parallel = TRUE
#' @param set_seed set a seed for reproducibility
#' @param seed_number seed number
#' @return cpgObject that contains the differential gene scores
#' @details Function to calculate if genes are differentially expressed in cell types
#'  when they interact (approximated by physical proximity) with other cell types.
#'  The results data.table in the cpgObject contains - at least - the following columns:
#' \itemize{
#'  \item{genes:}{ All or selected list of tested genes}
#'  \item{sel:}{ average gene expression in the interacting cells from the target cell type }
#'  \item{other:}{ average gene expression in the NOT-interacting cells from the target cell type }
#'  \item{log2fc:}{ log2 fold-change between sel and other}
#'  \item{diff:}{ spatial expression difference between sel and other}
#'  \item{p.value:}{ associated p-value}
#'  \item{p.adj:}{ adjusted p-value}
#'  \item{cell_type:}{ target cell type}
#'  \item{int_cell_type:}{ interacting cell type}
#'  \item{nr_select:}{ number of cells for selected target cell type}
#'  \item{int_nr_select:}{ number of cells for interacting cell type}
#'  \item{nr_other:}{ number of other cells of selected target cell type}
#'  \item{int_nr_other:}{ number of other cells for interacting cell type}
#'  \item{unif_int:}{ cell-cell interaction}
#' }
#' @export
findICG = function(gobject,
                   expression_values = 'normalized',
                   selected_genes = NULL,
                   cluster_column,
                   spatial_network_name = 'Delaunay_network',
                   minimum_unique_cells = 1,
                   minimum_unique_int_cells = 1,
                   diff_test = c('permutation', 'limma', 't.test', 'wilcox'),
                   mean_method = c('arithmic', 'geometric'),
                   offset = 0.1,
                   adjust_method = c("bonferroni","BH", "holm", "hochberg", "hommel",
                                     "BY", "fdr", "none"),
                   nr_permutations = 100,
                   exclude_selected_cells_from_test = T,
                   do_parallel = TRUE,
                   cores = NA,
                   set_seed = TRUE,
                   seed_number = 1234) {


  findInteractionChangedGenes(gobject = gobject,
                              expression_values = expression_values,
                              selected_genes = selected_genes,
                              cluster_column = cluster_column,
                              spatial_network_name = spatial_network_name,
                              minimum_unique_cells = minimum_unique_cells,
                              minimum_unique_int_cells = minimum_unique_int_cells,
                              diff_test = diff_test,
                              mean_method = mean_method,
                              offset = offset,
                              adjust_method = adjust_method,
                              nr_permutations = nr_permutations,
                              exclude_selected_cells_from_test = exclude_selected_cells_from_test,
                              do_parallel = do_parallel,
                              cores = cores,
                              set_seed = set_seed,
                              seed_number = seed_number)


}


#' @title findCPG
#' @description Identifies cell-to-cell Interaction Changed Genes (ICG),
#' i.e. genes that are differentially expressed due to proximity to other cell types.
#' @inheritDotParams findICG
#' @seealso \code{\link{findICG}}
#' @export
findCPG <- function(...) {

  .Deprecated(new = "findICG")

  findICG(...)

}


#' @title filterInteractionChangedGenes
#' @name filterInteractionChangedGenes
#' @description Filter Interaction Changed Gene scores.
#' @param cpgObject ICG (interaction changed gene) score object
#' @param min_cells minimum number of source cell type
#' @param min_cells_expr minimum expression level for source cell type
#' @param min_int_cells minimum number of interacting neighbor cell type
#' @param min_int_cells_expr minimum expression level for interacting neighbor cell type
#' @param min_fdr minimum adjusted p-value
#' @param min_spat_diff minimum absolute spatial expression difference
#' @param min_log2_fc minimum log2 fold-change
#' @param min_zscore minimum z-score change
#' @param zscores_column calculate z-scores over cell types or genes
#' @param direction differential expression directions to keep
#' @return cpgObject that contains the filtered differential gene scores
#' @export
filterInteractionChangedGenes = function(cpgObject,
                                    min_cells = 4,
                                    min_cells_expr = 1,
                                    min_int_cells = 4,
                                    min_int_cells_expr = 1,
                                    min_fdr = 0.1,
                                    min_spat_diff = 0.2,
                                    min_log2_fc = 0.2,
                                    min_zscore = 2,
                                    zscores_column = c('cell_type', 'genes'),
                                    direction = c('both', 'up', 'down')) {

  # data.table variables
  nr_select = int_nr_select = zscores = log2fc = sel = other = p.adj = NULL

  if(!'cpgObject' %in% class(cpgObject)) {
    stop('\n cpgObject needs to be the output from findCellProximityGenes() or findCPG() \n')
  }

  zscores_column = match.arg(zscores_column, choices = c('cell_type', 'genes'))

  CPGscore = copy(cpgObject[['CPGscores']])

  # other parameters
  direction = match.arg(direction, choices = c('both', 'up', 'down'))


  ## sequential filter steps ##
  # 1. minimum number of source and target cells
  selection_scores = CPGscore[nr_select >= min_cells & int_nr_select >= min_int_cells]

  # 2. create z-scores for log2fc per cell type
  selection_scores[, zscores := scale(log2fc), by = c(zscores_column)]

  # 3. filter based on z-scores and minimum levels
  comb_DT = rbind(selection_scores[zscores >= min_zscore & abs(diff) >= min_spat_diff & log2fc >= min_log2_fc & sel >= min_cells_expr],
                  selection_scores[zscores <= -min_zscore & abs(diff) >= min_spat_diff & log2fc <= -min_log2_fc & other >= min_int_cells_expr])

  # 4. filter based on adjusted p-value (fdr)
  comb_DT = comb_DT[p.adj < min_fdr]


  if(direction == 'both') {
    selection_scores = selection_scores
  } else if(direction == 'up') {
    selection_scores = selection_scores[log2fc >= min_log2_fc]
  } else if(direction == 'down') {
    selection_scores = selection_scores[log2fc <= -min_log2_fc]
  }


  newobj = copy(cpgObject)
  newobj[['CPGscores']] = comb_DT

  return(newobj)

}


#' @title filterCellProximityGenes
#' @description Filter Interaction Changed Gene scores.
#' @inheritDotParams findICG
#' @seealso \code{\link{findICG}}
#' @export
filterCellProximityGenes <- function(...) {

  .Deprecated(new = "filterInteractionChangedGenes")

  filterInteractionChangedGenes(...)

}


#' @title filterICG
#' @name filterICG
#' @description Filter Interaction Changed Gene scores.
#' @param cpgObject ICG (interaction changed gene) score object
#' @param min_cells minimum number of source cell type
#' @param min_cells_expr minimum expression level for source cell type
#' @param min_int_cells minimum number of interacting neighbor cell type
#' @param min_int_cells_expr minimum expression level for interacting neighbor cell type
#' @param min_fdr minimum adjusted p-value
#' @param min_spat_diff minimum absolute spatial expression difference
#' @param min_log2_fc minimum log2 fold-change
#' @param min_zscore minimum z-score change
#' @param zscores_column calculate z-scores over cell types or genes
#' @param direction differential expression directions to keep
#' @return cpgObject that contains the filtered differential gene scores
#' @export
filterICG = function(cpgObject,
                     min_cells = 4,
                     min_cells_expr = 1,
                     min_int_cells = 4,
                     min_int_cells_expr = 1,
                     min_fdr = 0.1,
                     min_spat_diff = 0.2,
                     min_log2_fc = 0.2,
                     min_zscore = 2,
                     zscores_column = c('cell_type', 'genes'),
                     direction = c('both', 'up', 'down')) {

  filterInteractionChangedGenes(cpgObject = cpgObject,
                                min_cells = min_cells,
                                min_cells_expr = min_cells_expr,
                                min_int_cells = min_int_cells,
                                min_int_cells_expr = min_int_cells_expr,
                                min_fdr = min_fdr,
                                min_spat_diff = min_spat_diff,
                                min_log2_fc = min_log2_fc,
                                min_zscore = min_zscore,
                                zscores_column = zscores_column,
                                direction = direction)

}



#' @title filterCPG
#' @description Filter Interaction Changed Gene scores.
#' @inheritDotParams filterICG
#' @seealso \code{\link{filterICG}}
#' @export
filterCPG <- function(...) {

  .Deprecated(new = "filterICG")

  filterICG(...)

}




# * ####
# GTG gene-to-gene (pairs of CPG) ####

#' @title combineCellProximityGenes_per_interaction
#' @name combineCellProximityGenes_per_interaction
#' @description Combine CPG scores per interaction
#' @keywords internal
combineCellProximityGenes_per_interaction =  function(cpgObject,
                                                      sel_int,
                                                      selected_genes = NULL,
                                                      specific_genes_1 = NULL,
                                                      specific_genes_2 = NULL,
                                                      min_cells = 5,
                                                      min_int_cells = 3,
                                                      min_fdr = 0.05,
                                                      min_spat_diff = 0,
                                                      min_log2_fc = 0.5) {

  # data.table variables
  unif_int = genes = cell_type = p.adj = nr_select = int_nr_select = log2fc = sel = NULL
  other = p.value = perm_sel = perm_other = perm_log2fc = perm_diff = NULL
  int_cell_type = nr_other = genes_combo = genes_1 = genes_2 = type_int = NULL

  if(!'cpgObject' %in% class(cpgObject)) {
    stop('\n cpgObject needs to be the output from findCellProximityGenes() or findCPG() \n')
  }

  CPGscore = copy(cpgObject[['CPGscores']])

  test_used = cpgObject[['test_info']][['test']]

  ## subset on selected interaction
  subset = CPGscore[unif_int == sel_int]

  ##  type of interactions
  type_interaction = unique(subset[['type_int']])

  ## first filtering CPGscores on genes
  if((!is.null(specific_genes_1) & !is.null(specific_genes_2))) {
    if(length(specific_genes_1) != length(specific_genes_2)) {
      stop('\n specific_genes_1 must have the same length as specific_genes_2')
    }
    subset = subset[genes %in% c(specific_genes_1, specific_genes_2)]
  } else if(!is.null(selected_genes)) {
    subset = subset[genes %in% c(selected_genes)]
  }

  ## find number of unique cell types
  unique_cell_types = unique(c(subset$cell_type, subset$int_cell_type))

  if(length(unique_cell_types) == 2) {

    ## CELL TYPE 1
    subset_cell_1 = subset[cell_type == unique_cell_types[1]]

    if(nrow(subset_cell_1) == 0) {

      if(test_used == 'permutation') {

        subset_cell_1 = data.table::data.table('genes_1' = subset[['genes']],
                                               'sel_1' = NA,
                                               'other_1' = NA,
                                               'log2fc_1' = NA,
                                               'diff_1' = NA,
                                               'p.value_1' = NA,
                                               'p.adj_1' = NA,
                                               'perm_sel_1' = NA,
                                               'perm_other_1' = NA,
                                               'perm_log2fc_1' = NA,
                                               'perm_diff_1' = NA,
                                               'cell_type_1' = unique_cell_types[1],
                                               'int_cell_type_1' = unique_cell_types[2],
                                               'nr_select_1' = NA,
                                               'nr_other_1' = NA,
                                               'unif_int' = subset[['unif_int']])

      } else {

        subset_cell_1 = data.table::data.table('genes_1' = subset[['genes']],
                                               'sel_1' = NA,
                                               'other_1' = NA,
                                               'log2fc_1' = NA,
                                               'diff_1' = NA,
                                               'p.value_1' = NA,
                                               'p.adj_1' = NA,
                                               'cell_type_1' = unique_cell_types[1],
                                               'int_cell_type_1' = unique_cell_types[2],
                                               'nr_select_1' = NA,
                                               'nr_other_1' = NA,
                                               'unif_int' = subset[['unif_int']])
      }


    } else {


      # filter on statistics
      subset_cell_1 = subset_cell_1[p.adj <= min_fdr]
      subset_cell_1 = subset_cell_1[nr_select >= min_cells]
      subset_cell_1 = subset_cell_1[int_nr_select >= min_int_cells]
      subset_cell_1 = subset_cell_1[abs(log2fc) >= min_log2_fc]
      subset_cell_1 = subset_cell_1[abs(diff) >= min_spat_diff]

      if(test_used == 'permutation') {
        # make it specific
        subset_cell_1 = subset_cell_1[,.(genes, sel, other, log2fc, diff, p.value, p.adj,
                                         perm_sel, perm_other, perm_log2fc, perm_diff,
                                         cell_type, int_cell_type, nr_select, nr_other, unif_int)]
        data.table::setnames(subset_cell_1, old = c('genes', 'sel', 'other', 'log2fc', 'diff', 'p.value', 'p.adj',
                                                    'perm_sel', 'perm_other', 'perm_log2fc', 'perm_diff',
                                                    'cell_type', 'int_cell_type', 'nr_select', 'nr_other'),
                             new = c('genes_1', 'sel_1', 'other_1', 'log2fc_1', 'diff_1', 'p.value_1', 'p.adj_1',
                                     'perm_sel_1', 'perm_other_1',  'perm_log2fc_1', 'perm_diff_1',
                                     'cell_type_1', 'int_cell_type_1', 'nr_select_1', 'nr_other_1'))

      } else {
        # make it specific
        subset_cell_1 = subset_cell_1[,.(genes, sel, other, log2fc, diff, p.value, p.adj, cell_type, int_cell_type, nr_select, nr_other, unif_int)]
        data.table::setnames(subset_cell_1, old = c('genes', 'sel', 'other', 'log2fc', 'diff', 'p.value', 'p.adj', 'cell_type', 'int_cell_type', 'nr_select', 'nr_other'),
                             new = c('genes_1', 'sel_1', 'other_1', 'log2fc_1', 'diff_1', 'p.value_1', 'p.adj_1', 'cell_type_1', 'int_cell_type_1', 'nr_select_1', 'nr_other_1'))

      }


    }




    ## CELL TYPE 2
    subset_cell_2 = subset[cell_type == unique_cell_types[2]]

    if(nrow(subset_cell_2) == 0) {

      if(test_used == 'permutation') {

        subset_cell_2 = data.table::data.table('genes_2' = subset[['genes']],
                                               'sel_2' = NA,
                                               'other_2' = NA,
                                               'log2fc_2' = NA,
                                               'diff_2' = NA,
                                               'p.value_2' = NA,
                                               'p.adj_2' = NA,
                                               'perm_sel_2' = NA,
                                               'perm_other_2' = NA,
                                               'perm_log2fc_2' = NA,
                                               'perm_diff_2' = NA,
                                               'cell_type_2' = unique_cell_types[2],
                                               'int_cell_type_2' = unique_cell_types[1],
                                               'nr_select_2' = NA,
                                               'nr_other_2' = NA,
                                               'unif_int' = subset[['unif_int']])
      } else {

        subset_cell_2 = data.table::data.table('genes_2' = subset[['genes']],
                                               'sel_2' = NA,
                                               'other_2' = NA,
                                               'log2fc_2' = NA,
                                               'diff_2' = NA,
                                               'p.value_2' = NA,
                                               'p.adj_2' = NA,
                                               'cell_type_2' = unique_cell_types[2],
                                               'int_cell_type_2' = unique_cell_types[1],
                                               'nr_select_2' = NA,
                                               'nr_other_2' = NA,
                                               'unif_int' = subset[['unif_int']])
      }



    } else {

      # filter on statistics
      subset_cell_2 = subset_cell_2[p.adj <= min_fdr]
      subset_cell_2 = subset_cell_2[nr_select >= min_cells]
      subset_cell_2 = subset_cell_2[int_nr_select >= min_int_cells]
      subset_cell_2 = subset_cell_2[abs(log2fc) >= min_log2_fc]
      subset_cell_2 = subset_cell_2[abs(diff) >= min_spat_diff]

      if(test_used == 'permutation') {

        subset_cell_2 = subset_cell_2[,.(genes, sel, other, log2fc, diff, p.value, p.adj,
                                         perm_sel, perm_other, perm_log2fc, perm_diff,
                                         cell_type, int_cell_type, nr_select, nr_other, unif_int)]
        data.table::setnames(subset_cell_2, old = c('genes', 'sel', 'other', 'log2fc', 'diff', 'p.value', 'p.adj',
                                                    'perm_sel', 'perm_other', 'perm_log2fc', 'perm_diff',
                                                    'cell_type', 'int_cell_type', 'nr_select', 'nr_other'),
                             new = c('genes_2', 'sel_2', 'other_2', 'log2fc_2', 'diff_2', 'p.value_2', 'p.adj_2',
                                     'perm_sel_2', 'perm_other_2', 'perm_log2fc_2', 'perm_diff_2',
                                     'cell_type_2', 'int_cell_type_2', 'nr_select_2', 'nr_other_2'))


      } else {
        subset_cell_2 = subset_cell_2[,.(genes, sel, other, log2fc, diff, p.value, p.adj, cell_type, int_cell_type, nr_select, nr_other, unif_int)]
        data.table::setnames(subset_cell_2, old = c('genes', 'sel', 'other', 'log2fc', 'diff', 'p.value', 'p.adj', 'cell_type', 'int_cell_type', 'nr_select', 'nr_other'),
                             new = c('genes_2', 'sel_2', 'other_2', 'log2fc_2', 'diff_2', 'p.value_2', 'p.adj_2', 'cell_type_2', 'int_cell_type_2', 'nr_select_2', 'nr_other_2'))

      }



    }

    merge_subsets = data.table::merge.data.table(subset_cell_1, subset_cell_2, by = c('unif_int'), allow.cartesian = TRUE)

  } else if(length(unique_cell_types) == 1) {

    ## CELL TYPE 1
    subset_cell_1 = subset[cell_type == unique_cell_types[1]]

    # filter on statistics
    subset_cell_1 = subset_cell_1[p.adj <= min_fdr]
    subset_cell_1 = subset_cell_1[nr_select >= min_cells]
    subset_cell_1 = subset_cell_1[int_nr_select >= min_int_cells]
    subset_cell_1 = subset_cell_1[abs(log2fc) >= min_log2_fc]
    subset_cell_1 = subset_cell_1[abs(diff) >= min_spat_diff]

    # make it specific

    if(test_used == 'permutation') {
      subset_cell_1A = subset_cell_1[,.(genes, sel, other, log2fc, diff, p.value, p.adj,
                                        perm_sel, perm_other, perm_log2fc, perm_diff,
                                        cell_type, int_cell_type, nr_select, nr_other, unif_int)]
      data.table::setnames(subset_cell_1A, old = c('genes', 'sel', 'other', 'log2fc', 'diff', 'p.value', 'p.adj',
                                                   'perm_sel', 'perm_other', 'perm_log2fc', 'perm_diff',
                                                   'cell_type', 'int_cell_type', 'nr_select', 'nr_other'),
                           new = c('genes_1', 'sel_1', 'other_1', 'log2fc_1', 'diff_1', 'p.value_1', 'p.adj_1',
                                   'perm_sel_1', 'perm_other_1', 'perm_log2fc_1', 'perm_diff_1',
                                   'cell_type_1', 'int_cell_type_1', 'nr_select_1', 'nr_other_1'))

    } else {
      subset_cell_1A = subset_cell_1[,.(genes, sel, other, log2fc, diff, p.value, p.adj, cell_type, int_cell_type, nr_select, nr_other, unif_int)]
      data.table::setnames(subset_cell_1A, old = c('genes', 'sel', 'other', 'log2fc', 'diff', 'p.value', 'p.adj', 'cell_type', 'int_cell_type', 'nr_select', 'nr_other'),
                           new = c('genes_1', 'sel_1', 'other_1', 'log2fc_1', 'diff_1', 'p.value_1', 'p.adj_1', 'cell_type_1', 'int_cell_type_1', 'nr_select_1', 'nr_other_1'))

    }


    ## CELL TYPE 2

    if(test_used == 'permutation') {
      subset_cell_1B = subset_cell_1[,.(genes, sel, other, log2fc, diff, p.value, p.adj,
                                        perm_sel, perm_other, perm_log2fc, perm_diff,
                                        cell_type, int_cell_type, nr_select, nr_other, unif_int)]
      data.table::setnames(subset_cell_1B, old = c('genes', 'sel', 'other', 'log2fc', 'diff', 'p.value', 'p.adj',
                                                   'perm_sel', 'perm_other', 'perm_log2fc', 'perm_diff',
                                                   'cell_type', 'int_cell_type', 'nr_select', 'nr_other'),
                           new = c('genes_2', 'sel_2', 'other_2', 'log2fc_2', 'diff_2', 'p.value_2', 'p.adj_2',
                                   'perm_sel_2', 'perm_other_2', 'perm_log2fc_2', 'perm_diff_2',
                                   'cell_type_2', 'int_cell_type_2', 'nr_select_2', 'nr_other_2'))

    } else {
      subset_cell_1B = subset_cell_1[,.(genes, sel, other, log2fc, diff, p.value, p.adj, cell_type, int_cell_type, nr_select, nr_other, unif_int)]
      data.table::setnames(subset_cell_1B, old = c('genes', 'sel', 'other', 'log2fc', 'diff', 'p.value', 'p.adj', 'cell_type', 'int_cell_type', 'nr_select', 'nr_other'),
                           new = c('genes_2', 'sel_2', 'other_2', 'log2fc_2', 'diff_2', 'p.value_2', 'p.adj_2', 'cell_type_2', 'int_cell_type_2', 'nr_select_2', 'nr_other_2'))

    }

    merge_subsets = data.table::merge.data.table(subset_cell_1A, subset_cell_1B, by = c('unif_int'), allow.cartesian = TRUE)


  }

  # restrict to gene combinations if needed
  if((!is.null(specific_genes_1) & !is.null(specific_genes_2))) {
    merge_subsets[, genes_combo := paste0(genes_1,'--',genes_2)]
    all_combos = c(paste0(specific_genes_1,'--', specific_genes_2),
                   paste0(specific_genes_2,'--', specific_genes_1))
    merge_subsets = merge_subsets[genes_combo %in% all_combos]
    merge_subsets[, genes_combo := NULL]
  }

  merge_subsets[, type_int := type_interaction]
  return(merge_subsets)

}


#' @title combineInteractionChangedGenes
#' @name combineInteractionChangedGenes
#' @description Combine ICG scores in a pairwise manner.
#' @param cpgObject ICG (interaction changed gene) score object
#' @param selected_ints subset of selected cell-cell interactions (optional)
#' @param selected_genes subset of selected genes (optional)
#' @param specific_genes_1 specific geneset combo (need to position match specific_genes_2)
#' @param specific_genes_2 specific geneset combo (need to position match specific_genes_1)
#' @param min_cells minimum number of target cell type
#' @param min_int_cells minimum number of interacting cell type
#' @param min_fdr minimum adjusted p-value
#' @param min_spat_diff minimum absolute spatial expression difference
#' @param min_log2_fc minimum absolute log2 fold-change
#' @param do_parallel run calculations in parallel with mclapply
#' @param cores number of cores to use if do_parallel = TRUE
#' @param verbose verbose
#' @return cpgObject that contains the filtered differential gene scores
#' @export
combineInteractionChangedGenes = function(cpgObject,
                                     selected_ints = NULL,
                                     selected_genes = NULL,
                                     specific_genes_1 = NULL,
                                     specific_genes_2 = NULL,
                                     min_cells = 5,
                                     min_int_cells = 3,
                                     min_fdr = 0.05,
                                     min_spat_diff = 0,
                                     min_log2_fc = 0.5,
                                     do_parallel = TRUE,
                                     cores = NA,
                                     verbose = T) {

  # data.table variables
  unif_int = gene1_gene2 = genes_1 = genes_2 = comb_logfc = log2fc_1 = log2fc_2 = direction = NULL

  ## check validity
  if(!'cpgObject' %in% class(cpgObject)) {
    stop('\n cpgObject needs to be the output from findCellProximityGenes() or findCPG() \n')
  }
  CPGscore = copy(cpgObject[['CPGscores']])

  if(!is.null(selected_ints)) {
    CPGscore = CPGscore[unif_int %in% selected_ints]
  }

  all_ints = unique(CPGscore[['unif_int']])

  # parallel
  if(do_parallel == TRUE) {

    GTGresults = giotto_lapply(X = all_ints, cores = cores, fun = function(x) {

      tempres =  combineCellProximityGenes_per_interaction(cpgObject = cpgObject,
                                                           sel_int = x,
                                                           selected_genes = selected_genes,
                                                           specific_genes_1 = specific_genes_1,
                                                           specific_genes_2 = specific_genes_2,
                                                           min_cells = min_cells,
                                                           min_int_cells = min_int_cells,
                                                           min_fdr = min_fdr,
                                                           min_spat_diff = min_spat_diff,
                                                           min_log2_fc = min_log2_fc)

    })


  } else {
    # for loop
    GTGresults = list()

    for(i in 1:length(all_ints)) {

      x = all_ints[[i]]

      if(verbose == TRUE) print(x)

      tempres =  combineCellProximityGenes_per_interaction(cpgObject = cpgObject,
                                                           sel_int = x,
                                                           selected_genes = selected_genes,
                                                           specific_genes_1 = specific_genes_1,
                                                           specific_genes_2 = specific_genes_2,
                                                           min_cells = min_cells,
                                                           min_int_cells = min_int_cells,
                                                           min_fdr = min_fdr,
                                                           min_spat_diff = min_spat_diff,
                                                           min_log2_fc = min_log2_fc)
      GTGresults[[i]] = tempres
    }

  }

  final_results = do.call('rbind', GTGresults)

  final_results[, gene1_gene2 := paste0(genes_1,'--',genes_2)]

  final_results = sort_combine_two_DT_columns(final_results,
                                              column1 = 'genes_1', column2 = 'genes_2',
                                              myname = 'unif_gene_gene')

  final_results[, comb_logfc := abs(log2fc_1) + abs(log2fc_2)]
  setorder(final_results, -comb_logfc)
  final_results[, direction := ifelse(log2fc_1 > 0 & log2fc_2 > 0, 'both_up',
                               ifelse(log2fc_1 < 0 & log2fc_2 < 0, 'both_down', 'mixed'))]

  #return(final_results)

  combCpgObject = list(combCPGscores = final_results,
                       Giotto_info = list('values' = cpgObject[['Giotto_info']][['values']],
                                          'cluster' = cpgObject[['Giotto_info']][['cluster']],
                                          'spatial network' = cpgObject[['Giotto_info']][['spatial network']]),
                       test_info = list('test' = cpgObject[['test_info']][['test']],
                                        'p.adj' = cpgObject[['test_info']][['p.adj']],
                                        'min cells' = cpgObject[['test_info']][['min cells']],
                                        'min interacting cells' = cpgObject[['test_info']][['min interacting cells']],
                                        'exclude selected cells' = cpgObject[['test_info']][['exclude selected cells']],
                                        'perm' = cpgObject[['test_info']][['perm']]))
  class(combCpgObject) = append(class(combCpgObject), 'combCpgObject')
  return(combCpgObject)


}


#' @title combineCellProximityGenes
#' @description Combine ICG scores in a pairwise manner.
#' @inheritDotParams combineInteractionChangedGenes
#' @seealso \code{\link{combineInteractionChangedGenes}}
#' @export
combineCellProximityGenes <- function(...) {

  .Deprecated(new = "combineInteractionChangedGenes")

  combineInteractionChangedGenes(...)

}



#' @title combineICG
#' @name combineICG
#' @description Combine ICG scores in a pairwise manner.
#' @param cpgObject ICG (interaction changed gene) score object
#' @param selected_ints subset of selected cell-cell interactions (optional)
#' @param selected_genes subset of selected genes (optional)
#' @param specific_genes_1 specific geneset combo (need to position match specific_genes_2)
#' @param specific_genes_2 specific geneset combo (need to position match specific_genes_1)
#' @param min_cells minimum number of target cell type
#' @param min_int_cells minimum number of interacting cell type
#' @param min_fdr minimum adjusted p-value
#' @param min_spat_diff minimum absolute spatial expression difference
#' @param min_log2_fc minimum absolute log2 fold-change
#' @param do_parallel run calculations in parallel with mclapply
#' @param cores number of cores to use if do_parallel = TRUE
#' @param verbose verbose
#' @return cpgObject that contains the filtered differential gene scores
#' @export
combineICG = function(cpgObject,
                      selected_ints = NULL,
                      selected_genes = NULL,
                      specific_genes_1 = NULL,
                      specific_genes_2 = NULL,
                      min_cells = 5,
                      min_int_cells = 3,
                      min_fdr = 0.05,
                      min_spat_diff = 0,
                      min_log2_fc = 0.5,
                      do_parallel = TRUE,
                      cores = NA,
                      verbose = T) {


  combineInteractionChangedGenes(cpgObject = cpgObject,
                                 selected_ints = selected_ints,
                                 selected_genes = selected_genes,
                                 specific_genes_1 = specific_genes_1,
                                 specific_genes_2 = specific_genes_2,
                                 min_cells = min_cells,
                                 min_int_cells = min_int_cells,
                                 min_fdr = min_fdr,
                                 min_spat_diff = min_spat_diff,
                                 min_log2_fc = min_log2_fc,
                                 do_parallel = do_parallel,
                                 cores = cores,
                                 verbose = verbose)


}

#' @title combineCPG
#' @description Combine ICG scores in a pairwise manner.
#' @inheritDotParams combineICG
#' @seealso \code{\link{combineICG}}
#' @export
combineCPG <- function(...) {

  .Deprecated(new = "combineICG")

  combineICG(...)

}




# * ####
# cell communication ####


#' @title average_gene_gene_expression_in_groups
#' @name average_gene_gene_expression_in_groups
#' @description calculate average expression per cluster
#' @param gobject giotto object to use
#' @param cluster_column cluster column with cell type information
#' @param gene_set_1 first specific gene set from gene pairs
#' @param gene_set_2 second specific gene set from gene pairs
#' @return data.table with average expression scores for each cluster
#' @keywords internal
average_gene_gene_expression_in_groups = function(gobject,
                                                  cluster_column = 'cell_types',
                                                  gene_set_1,
                                                  gene_set_2) {

  average_DT = create_average_DT(gobject = gobject, meta_data_name = cluster_column)

  # change column names back to original
  new_colnames = gsub(pattern = 'cluster_', replacement = '', colnames(average_DT))
  colnames(average_DT) = new_colnames

  # keep order of colnames
  colnames_order = sort(new_colnames)

  # gene_set_1 and gene_set_2 need to have same length and all genes need to be present in data
  if(length(gene_set_1) != length(gene_set_2)) {
    stop('\n length of set1 needs to be the same as that of set2 \n')
  }

  if(!all(c(gene_set_1, gene_set_2) %in% rownames(average_DT) == T)) {
    stop('\n all selected genes from set 1 and 2 need to be present \n')
  }

  LR_pairs = paste0(gene_set_1,'-',gene_set_2)

  # get ligand and receptor information
  ligand_match = average_DT[match(gene_set_1, rownames(average_DT)), ,drop = F]
  receptor_match = average_DT[match(gene_set_2, rownames(average_DT)), ,drop = F]

  # data.table variables
  ligand = LR_comb = receptor = LR_expr = lig_expr = rec_expr = lig_cell_type = rec_cell_type = NULL

  all_ligand_cols = colnames(ligand_match)
  lig_test = data.table::as.data.table(reshape2::melt(ligand_match, measure.vars = all_ligand_cols))
  lig_test[, ligand := rep(rownames(ligand_match), ncol(ligand_match))]
  lig_test[, ligand := strsplit(ligand,'\\.')[[1]][1] , by = 1:nrow(lig_test)]
  lig_test[, LR_comb := rep(LR_pairs, ncol(ligand_match))]
  setnames(lig_test, 'value', 'lig_expr')
  setnames(lig_test, 'variable', 'lig_cell_type')

  all_receptor_cols = colnames(receptor_match)
  rec_test = data.table::as.data.table(reshape2::melt(receptor_match, measure.vars = all_receptor_cols))
  rec_test[, receptor := rep(rownames(receptor_match), ncol(receptor_match))]
  rec_test[, receptor := strsplit(receptor,'\\.')[[1]][1] , by = 1:nrow(rec_test)]
  rec_test[, LR_comb := rep(LR_pairs, ncol(receptor_match))]
  setnames(rec_test, 'value', 'rec_expr')
  setnames(rec_test, 'variable', 'rec_cell_type')

  lig_rec_test = merge(lig_test, rec_test, by = 'LR_comb', allow.cartesian = T)
  lig_rec_test[, LR_expr := lig_expr+rec_expr]


  lig_rec_test[, lig_cell_type := factor(lig_cell_type, levels = colnames_order)]
  lig_rec_test[, rec_cell_type := factor(rec_cell_type, levels = colnames_order)]
  setorder(lig_rec_test, LR_comb, lig_cell_type, rec_cell_type)

  return(lig_rec_test)

}


#' @title exprCellCellcom
#' @name exprCellCellcom
#' @description Cell-Cell communication scores based on expression only
#' @param gobject giotto object to use
#' @param cluster_column cluster column with cell type information
#' @param random_iter number of iterations
#' @param gene_set_1 first specific gene set from gene pairs
#' @param gene_set_2 second specific gene set from gene pairs
#' @param log2FC_addendum addendum to add when calculating log2FC
#' @param detailed provide more detailed information (random variance and z-score)
#' @param adjust_method which method to adjust p-values
#' @param adjust_target adjust multiple hypotheses at the cell or gene level
#' @param set_seed set seed for random simulations (default = TRUE)
#' @param seed_number seed number
#' @param verbose verbose
#' @return Cell-Cell communication scores for gene pairs based on expression only
#' @details Statistical framework to identify if pairs of genes (such as ligand-receptor combinations)
#' are expressed at higher levels than expected based on a reshuffled null distribution of gene expression values,
#' without considering the spatial position of cells.
#' More details will follow soon.
#' @export
exprCellCellcom = function(gobject,
                           cluster_column = 'cell_types',
                           random_iter = 1000,
                           gene_set_1,
                           gene_set_2,
                           log2FC_addendum = 0.1,
                           detailed = FALSE,
                           adjust_method = c("fdr", "bonferroni","BH", "holm", "hochberg", "hommel",
                                             "BY", "none"),
                           adjust_target = c('genes', 'cells'),
                           set_seed = TRUE,
                           seed_number = 1234,
                           verbose = T) {


  # data.table variables
  lig_nr = lig_cell_type = rec_nr = rec_cell_type = rand_expr = av_diff = log2fc = LR_expr = pvalue = NULL
  LR_cell_comb = p.adj = LR_comb = PI = sd_diff = z_score = NULL

  # get parameters
  adjust_method = match.arg(adjust_method, choices = c("fdr", "bonferroni","BH", "holm", "hochberg", "hommel",
                                                       "BY", "none"))
  adjust_target = match.arg(adjust_target, choices = c('genes', 'cells'))

    # get information about number of cells
  cell_metadata = pDataDT(gobject)
  nr_cell_types = cell_metadata[,.N, by = c(cluster_column)]
  nr_cells = nr_cell_types$N
  names(nr_cells) = nr_cell_types$cluster_column


  comScore = average_gene_gene_expression_in_groups(gobject = gobject,
                                                    cluster_column = cluster_column,
                                                    gene_set_1 = gene_set_1, gene_set_2 = gene_set_2)

  comScore[, lig_nr := nr_cells[lig_cell_type]]
  comScore[, rec_nr := nr_cells[rec_cell_type]]

  # prepare for randomized scores
  total_av = rep(0, nrow(comScore))

  if(detailed == FALSE) {
    total_sum = rep(0, nrow(comScore))
  } else {
    total_sum = matrix(nrow = nrow(comScore), ncol = random_iter)
  }

  total_bool = rep(0, nrow(comScore))


  ## parallel option ##
  # not yet available


  for(sim in 1:random_iter) {

    if(verbose == TRUE) cat('simulation ', sim, '\n')


    # create temporary giotto
    tempGiotto = subsetGiotto(gobject = gobject)

    # randomize annoation
    cell_types = cell_metadata[[cluster_column]]
    if(set_seed == TRUE) {
      seed_number = seed_number+sim
      set.seed(seed = seed_number)
    }
    random_cell_types = sample(x = cell_types, size = length(cell_types))
    tempGiotto = addCellMetadata(gobject = tempGiotto, new_metadata = random_cell_types, by_column = F)

    # get random communication scores
    randomScore = average_gene_gene_expression_in_groups(gobject = tempGiotto,
                                                         cluster_column = 'random_cell_types',
                                                         gene_set_1 = gene_set_1, gene_set_2 = gene_set_2)

    # average random score
    total_av = total_av + randomScore[['LR_expr']]

    # difference between observed and random
    difference = comScore[['LR_expr']] - randomScore[['LR_expr']]

    # calculate total difference
    if(detailed == FALSE) {
      total_sum = total_sum+difference
    } else {
      total_sum[,sim] = difference
    }

    # calculate p-values
    difference[difference > 0] = 1
    difference[difference < 0] = -1
    total_bool = total_bool + difference

  }

  comScore[, rand_expr := total_av/random_iter]

  if(detailed == TRUE) {
    av_difference_scores = rowMeans_giotto(total_sum)
    sd_difference_scores = apply(total_sum, MARGIN = 1, FUN = stats::sd)

    comScore[, av_diff := av_difference_scores]
    comScore[, sd_diff := sd_difference_scores]
    comScore[, z_score := (LR_expr - rand_expr)/sd_diff]

  } else {
    comScore[, av_diff := total_sum/random_iter]
  }

  comScore[, log2fc := log2((LR_expr+log2FC_addendum)/(rand_expr+log2FC_addendum))]
  comScore[, pvalue := total_bool/random_iter]
  comScore[, pvalue := ifelse(pvalue > 0, 1-pvalue, 1+pvalue)]
  comScore[, LR_cell_comb := paste0(lig_cell_type,'--',rec_cell_type)]

  if(adjust_target == 'genes') {
    comScore[, p.adj := stats::p.adjust(pvalue, method = adjust_method), by = .(LR_cell_comb)]
  } else if(adjust_target == 'cells'){
    comScore[, p.adj := stats::p.adjust(pvalue, method = adjust_method), by = .(LR_comb)]
  }


  # get minimum adjusted p.value that is not zero
  all_p.adj = comScore[['p.adj']]
  lowest_p.adj = min(all_p.adj[all_p.adj != 0])
  comScore[, PI := ifelse(p.adj == 0, log2fc*(-log10(lowest_p.adj)), log2fc*(-log10(p.adj)))]
  #comScore[, PI := log2fc*(1-p.adj)]

  data.table::setorder(comScore, LR_comb, -LR_expr)

  return(comScore)

}



#' @title create_cell_type_random_cell_IDs
#' @name create_cell_type_random_cell_IDs
#' @description creates randomized cell ids within a selection of cell types
#' @param gobject giotto object to use
#' @param cluster_column cluster column with cell type information
#' @param needed_cell_types vector of cell type names for which a random id will be found
#' @param set_seed set a seed for reproducibility
#' @param seed_number seed number
#' @return list of randomly sampled cell ids with same cell type composition
#' @keywords internal
create_cell_type_random_cell_IDs = function(gobject,
                                            cluster_column = 'cell_types',
                                            needed_cell_types,
                                            set_seed = FALSE,
                                            seed_number = 1234) {

  # subset metadata to choose from
  full_metadata = pDataDT(gobject)
  possible_metadata = full_metadata[get(cluster_column) %in% unique(needed_cell_types)]

  sample_ids = list()

  uniq_types = unique(needed_cell_types)

  for(i in 1:length(uniq_types)) {

    uniq_type = uniq_types[i]
    length_random = length(needed_cell_types[needed_cell_types == uniq_type])
    if(set_seed == TRUE) {
      set.seed(seed = seed_number)
    }
    sub_sample_ids = possible_metadata[get(cluster_column) == uniq_type][sample(x = 1:.N, size = length_random)][['cell_ID']]
    sample_ids[[i]] = sub_sample_ids

  }
  return(unlist(sample_ids))
}




#' @title specificCellCellcommunicationScores
#' @name specificCellCellcommunicationScores
#' @description Specific Cell-Cell communication scores based on spatial expression of interacting cells
#' @param gobject giotto object to use
#' @param spatial_network_name spatial network to use for identifying interacting cells
#' @param cluster_column cluster column with cell type information
#' @param random_iter number of iterations
#' @param cell_type_1 first cell type
#' @param cell_type_2 second cell type
#' @param gene_set_1 first specific gene set from gene pairs
#' @param gene_set_2 second specific gene set from gene pairs
#' @param log2FC_addendum addendum to add when calculating log2FC
#' @param min_observations minimum number of interactions needed to be considered
#' @param detailed provide more detailed information (random variance and z-score)
#' @param adjust_method which method to adjust p-values
#' @param adjust_target adjust multiple hypotheses at the cell or gene level
#' @param set_seed set a seed for reproducibility
#' @param seed_number seed number
#' @param verbose verbose
#' @return Cell-Cell communication scores for gene pairs based on spatial interaction
#' @details Statistical framework to identify if pairs of genes (such as ligand-receptor combinations)
#' are expressed at higher levels than expected based on a reshuffled null distribution
#' of gene expression values in cells that are spatially in proximity to eachother.
#' \itemize{
#'  \item{LR_comb:}{Pair of ligand and receptor}
#'  \item{lig_cell_type:}{ cell type to assess expression level of ligand }
#'  \item{lig_expr:}{ average expression of ligand in lig_cell_type }
#'  \item{ligand:}{ ligand name }
#'  \item{rec_cell_type:}{ cell type to assess expression level of receptor }
#'  \item{rec_expr:}{ average expression of receptor in rec_cell_type}
#'  \item{receptor:}{ receptor name }
#'  \item{LR_expr:}{ combined average ligand and receptor expression }
#'  \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type }
#'  \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type }
#'  \item{rand_expr:}{ average combined ligand and receptor expression from random spatial permutations }
#'  \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations }
#'  \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations }
#'  \item{z_score:}{ (optinal) z-score }
#'  \item{log2fc:}{ log2 fold-change (LR_expr/rand_expr) }
#'  \item{pvalue:}{ p-value }
#'  \item{LR_cell_comb:}{ cell type pair combination }
#'  \item{p.adj:}{ adjusted p-value }
#'  \item{PI:}{ significanc score: log2fc * -log10(p.adj) }
#' }
#' @export
specificCellCellcommunicationScores = function(gobject,
                                               spatial_network_name = 'Delaunay_network',
                                               cluster_column = 'cell_types',
                                               random_iter = 100,
                                               cell_type_1 = 'astrocyte',
                                               cell_type_2 = 'endothelial',
                                               gene_set_1,
                                               gene_set_2,
                                               log2FC_addendum = 0.1,
                                               min_observations = 2,
                                               detailed = FALSE,
                                               adjust_method = c("fdr", "bonferroni","BH", "holm", "hochberg", "hommel",
                                                                 "BY", "none"),
                                               adjust_target = c('genes', 'cells'),
                                               set_seed = FALSE,
                                               seed_number = 1234,
                                               verbose = T) {



  # data.table variables
  from_to = cell_ID = lig_cell_type = rec_cell_type = lig_nr = rec_nr = rand_expr = NULL
  av_diff = log2fc = LR_expr = pvalue = LR_cell_comb = p.adj = LR_comb = PI = NULL
  sd_diff = z_score = NULL

  # get parameters
  adjust_method = match.arg(adjust_method, choices = c("fdr", "bonferroni","BH", "holm", "hochberg", "hommel",
                                                       "BY", "none"))
  adjust_target = match.arg(adjust_target, choices = c('genes', 'cells'))

  # metadata
  cell_metadata = pDataDT(gobject = gobject)

  # get annotated spatial network
  annot_network = annotateSpatialNetwork(gobject, spatial_network_name = spatial_network_name, cluster_column = cluster_column)

  cell_direction_1 = paste0(cell_type_1,'-',cell_type_2)
  cell_direction_2 = paste0(cell_type_2,'-',cell_type_1)

  subset_annot_network = annot_network[from_to %in% c(cell_direction_1, cell_direction_2)]

  # make sure that there are sufficient observations
  if(nrow(subset_annot_network) <= min_observations) {

    return(NULL)

  } else {


    # subset giotto object to only interacting cells
    subset_ids = unique(c(subset_annot_network$to, subset_annot_network$from))
    subsetGiotto = subsetGiotto(gobject = gobject, cell_ids = subset_ids)

    # get information about number of cells
    temp_meta = pDataDT(subsetGiotto)
    nr_cell_types = temp_meta[cell_ID %in% subset_ids][,.N, by = c(cluster_column)]
    nr_cells = nr_cell_types$N
    names(nr_cells) = nr_cell_types$cell_types

    # get average communication scores
    comScore = average_gene_gene_expression_in_groups(gobject = subsetGiotto,
                                                      cluster_column = cluster_column,
                                                      gene_set_1 = gene_set_1,
                                                      gene_set_2 = gene_set_2)
    comScore = comScore[(lig_cell_type == cell_type_1 & rec_cell_type == cell_type_2) |
                          (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)]

    comScore[, lig_nr := nr_cells[lig_cell_type]]
    comScore[, rec_nr := nr_cells[rec_cell_type]]

    # prepare for randomized scores
    total_av = rep(0, nrow(comScore))

    if(detailed == FALSE) {
      total_sum = rep(0, nrow(comScore))
    } else {
      total_sum = matrix(nrow = nrow(comScore), ncol = random_iter)
    }

    total_bool = rep(0, nrow(comScore))

    # identify which cell types you need
    subset_metadata = cell_metadata[cell_ID %in% subset_ids]
    needed_cell_types = subset_metadata[[cluster_column]]



    ## simulations ##
    for(sim in 1:random_iter) {

      if(verbose == TRUE) cat('simulation ', sim, '\n')

      # get random ids and subset
      if(set_seed == TRUE) {
        seed_number = seed_number+sim
        set.seed(seed = seed_number)
      }
      random_ids = create_cell_type_random_cell_IDs(gobject = gobject,
                                                    cluster_column = cluster_column,
                                                    needed_cell_types = needed_cell_types,
                                                    set_seed = set_seed,
                                                    seed_number = seed_number)
      tempGiotto = subsetGiotto(gobject = gobject, cell_ids = random_ids)

      # get random communication scores
      randomScore = average_gene_gene_expression_in_groups(gobject = tempGiotto,
                                                           cluster_column = cluster_column,
                                                           gene_set_1 = gene_set_1,
                                                           gene_set_2 = gene_set_2)
      randomScore = randomScore[(lig_cell_type == cell_type_1 & rec_cell_type == cell_type_2) |
                                  (lig_cell_type == cell_type_2 & rec_cell_type == cell_type_1)]




      # average random score
      total_av = total_av + randomScore[['LR_expr']]

      # difference between observed and random
      difference = comScore[['LR_expr']] - randomScore[['LR_expr']]

      # calculate total difference
      if(detailed == FALSE) {
        total_sum = total_sum+difference
      } else {
        total_sum[,sim] = difference
      }

      # calculate p-values
      difference[difference > 0] = 1
      difference[difference < 0] = -1
      total_bool = total_bool + difference

    }

    comScore[, rand_expr := total_av/random_iter]

    if(detailed == TRUE) {
      av_difference_scores = rowMeans_giotto(total_sum)
      sd_difference_scores = apply(total_sum, MARGIN = 1, FUN = stats::sd)

      comScore[, av_diff := av_difference_scores]
      comScore[, sd_diff := sd_difference_scores]
      comScore[, z_score := (LR_expr - rand_expr)/sd_diff]

    } else {
      comScore[, av_diff := total_sum/random_iter]
    }


    comScore[, log2fc := log2((LR_expr+log2FC_addendum)/(rand_expr+log2FC_addendum))]
    comScore[, pvalue := total_bool/random_iter]
    comScore[, pvalue := ifelse(pvalue > 0, 1-pvalue, 1+pvalue)]
    comScore[, LR_cell_comb := paste0(lig_cell_type,'--',rec_cell_type)]

    if(adjust_target == 'genes') {
      comScore[, p.adj := stats::p.adjust(pvalue, method = adjust_method), by = .(LR_cell_comb)]
    } else if(adjust_target == 'cells'){
      comScore[, p.adj := stats::p.adjust(pvalue, method = adjust_method), by = .(LR_comb)]
    }

    # get minimum adjusted p.value that is not zero
    all_p.adj = comScore[['p.adj']]
    lowest_p.adj = min(all_p.adj[all_p.adj != 0])
    comScore[, PI := ifelse(p.adj == 0, log2fc*(-log10(lowest_p.adj)), log2fc*(-log10(p.adj)))]

    return(comScore)

  }
}


#' @title spatCellCellcom
#' @name spatCellCellcom
#' @description Spatial Cell-Cell communication scores based on spatial expression of interacting cells
#' @param gobject giotto object to use
#' @param spatial_network_name spatial network to use for identifying interacting cells
#' @param cluster_column cluster column with cell type information
#' @param random_iter number of iterations
#' @param gene_set_1 first specific gene set from gene pairs
#' @param gene_set_2 second specific gene set from gene pairs
#' @param log2FC_addendum addendum to add when calculating log2FC
#' @param min_observations minimum number of interactions needed to be considered
#' @param detailed provide more detailed information (random variance and z-score)
#' @param adjust_method which method to adjust p-values
#' @param adjust_target adjust multiple hypotheses at the cell or gene level
#' @param do_parallel run calculations in parallel with mclapply
#' @param cores number of cores to use if do_parallel = TRUE
#' @param set_seed set a seed for reproducibility
#' @param seed_number seed number
#' @param verbose verbose
#' @return Cell-Cell communication scores for gene pairs based on spatial interaction
#' @details Statistical framework to identify if pairs of genes (such as ligand-receptor combinations)
#' are expressed at higher levels than expected based on a reshuffled null distribution
#' of gene expression values in cells that are spatially in proximity to eachother..
#' \itemize{
#'  \item{LR_comb:}{Pair of ligand and receptor}
#'  \item{lig_cell_type:}{ cell type to assess expression level of ligand }
#'  \item{lig_expr:}{ average expression of ligand in lig_cell_type }
#'  \item{ligand:}{ ligand name }
#'  \item{rec_cell_type:}{ cell type to assess expression level of receptor }
#'  \item{rec_expr:}{ average expression of receptor in rec_cell_type}
#'  \item{receptor:}{ receptor name }
#'  \item{LR_expr:}{ combined average ligand and receptor expression }
#'  \item{lig_nr:}{ total number of cells from lig_cell_type that spatially interact with cells from rec_cell_type }
#'  \item{rec_nr:}{ total number of cells from rec_cell_type that spatially interact with cells from lig_cell_type }
#'  \item{rand_expr:}{ average combined ligand and receptor expression from random spatial permutations }
#'  \item{av_diff:}{ average difference between LR_expr and rand_expr over all random spatial permutations }
#'  \item{sd_diff:}{ (optional) standard deviation of the difference between LR_expr and rand_expr over all random spatial permutations }
#'  \item{z_score:}{ (optinal) z-score }
#'  \item{log2fc:}{ log2 fold-change (LR_expr/rand_expr) }
#'  \item{pvalue:}{ p-value }
#'  \item{LR_cell_comb:}{ cell type pair combination }
#'  \item{p.adj:}{ adjusted p-value }
#'  \item{PI:}{ significanc score: log2fc * -log10(p.adj) }
#' }
#' @export
spatCellCellcom = function(gobject,
                           spatial_network_name = 'Delaunay_network',
                           cluster_column = 'cell_types',
                           random_iter = 1000,
                           gene_set_1,
                           gene_set_2,
                           log2FC_addendum = 0.1,
                           min_observations = 2,
                           detailed = FALSE,
                           adjust_method = c("fdr", "bonferroni","BH", "holm", "hochberg", "hommel",
                                             "BY", "none"),
                           adjust_target = c('genes', 'cells'),
                           do_parallel = TRUE,
                           cores = NA,
                           set_seed = TRUE,
                           seed_number = 1234,
                           verbose = c('a little', 'a lot', 'none')) {

  verbose = match.arg(verbose, choices = c('a little', 'a lot', 'none'))

  ## check if spatial network exists ##
  spat_networks = showNetworks(gobject = gobject, verbose = F)
  if(!spatial_network_name %in% spat_networks) {
    stop(spatial_network_name, ' is not an existing spatial network \n',
         'use showNetworks() to see the available networks \n',
         'or create a new spatial network with createSpatialNetwork() \n')
  }


  cell_metadata = pDataDT(gobject)

  ## get all combinations between cell types
  all_uniq_values = unique(cell_metadata[[cluster_column]])
  same_DT = data.table(V1 = all_uniq_values, V2 = all_uniq_values)
  combn_DT = as.data.table(t(combn(all_uniq_values, m = 2)))
  combn_DT = rbind(same_DT, combn_DT)

  ## parallel option ##
  if(do_parallel == TRUE) {


    savelist = giotto_lapply(X = 1:nrow(combn_DT), cores = cores, fun = function(row) {

      cell_type_1 = combn_DT[row][['V1']]
      cell_type_2 = combn_DT[row][['V2']]

      specific_scores = specificCellCellcommunicationScores(gobject = gobject,
                                                            cluster_column = cluster_column,
                                                            random_iter = random_iter,
                                                            cell_type_1 = cell_type_1,
                                                            cell_type_2 = cell_type_2,
                                                            gene_set_1 = gene_set_1,
                                                            gene_set_2 = gene_set_2,
                                                            spatial_network_name = spatial_network_name,
                                                            log2FC_addendum = log2FC_addendum,
                                                            min_observations = min_observations,
                                                            detailed = detailed,
                                                            adjust_method = adjust_method,
                                                            adjust_target = adjust_target,
                                                            set_seed = set_seed,
                                                            seed_number = seed_number)

    })


  } else {

    ## for loop over all combinations ##
    savelist = list()
    countdown = nrow(combn_DT)

    for(row in 1:nrow(combn_DT)) {

      cell_type_1 = combn_DT[row][['V1']]
      cell_type_2 = combn_DT[row][['V2']]

      if(verbose == 'a little' | verbose == 'a lot') cat('\n\n PROCESS nr ', countdown,': ', cell_type_1, ' and ', cell_type_2, '\n\n')

      if(verbose %in% c('a little', 'none')) {
        specific_verbose = F
      } else {
        specific_verbose = T
      }

      specific_scores = specificCellCellcommunicationScores(gobject = gobject,
                                                            cluster_column = cluster_column,
                                                            random_iter = random_iter,
                                                            cell_type_1 = cell_type_1,
                                                            cell_type_2 = cell_type_2,
                                                            gene_set_1 = gene_set_1,
                                                            gene_set_2 = gene_set_2,
                                                            spatial_network_name = spatial_network_name,
                                                            log2FC_addendum = log2FC_addendum,
                                                            min_observations = min_observations,
                                                            detailed = detailed,
                                                            adjust_method = adjust_method,
                                                            adjust_target = adjust_target,
                                                            set_seed = set_seed,
                                                            seed_number = seed_number,
                                                            verbose = specific_verbose)
      savelist[[row]] = specific_scores
      countdown = countdown - 1
    }

  }

  finalDT = do.call('rbind', savelist)

  # data.table variables
  LR_comb = LR_expr = NULL

  data.table::setorder(finalDT, LR_comb, -LR_expr)

  return(finalDT)
}




#' @title combCCcom
#' @name combCCcom
#' @description Combine spatial and expression based cell-cell communication data.tables
#' @param spatialCC spatial cell-cell communication scores
#' @param exprCC expression cell-cell communication scores
#' @param min_lig_nr minimum number of ligand cells
#' @param min_rec_nr minimum number of receptor cells
#' @param min_padj_value minimum adjusted p-value
#' @param min_log2fc minimum log2 fold-change
#' @param min_av_diff minimum average expression difference
#' @param detailed detailed option used with \code{\link{spatCellCellcom}} (default = FALSE)
#' @return combined data.table with spatial and expression communication data
#' @export
combCCcom = function(spatialCC,
                     exprCC,
                     min_lig_nr = 3,
                     min_rec_nr = 3,
                     min_padj_value = 1,
                     min_log2fc = 0,
                     min_av_diff = 0,
                     detailed = FALSE) {


  # data.table variables
  lig_nr = rec_nr = p.adj = log2fc = av_diff = NULL

  spatialCC = spatialCC[lig_nr >= min_lig_nr & rec_nr >= min_rec_nr &
                          p.adj <= min_padj_value & abs(log2fc) >= min_log2fc & abs(av_diff) >= min_av_diff]


  if(detailed == TRUE) {
    old_detailed = c('sd_diff', 'z_score')
    new_detailed = c('sd_diff_spat', 'z_score_spat')
  } else {
    old_detailed = NULL
    new_detailed = NULL
  }


  data.table::setnames(x = spatialCC,
                       old = c('lig_expr', 'rec_expr', 'LR_expr', 'lig_nr', 'rec_nr',
                               'rand_expr', 'av_diff', old_detailed, 'log2fc', 'pvalue', 'p.adj', 'PI'),
                       new = c('lig_expr_spat', 'rec_expr_spat', 'LR_expr_spat', 'lig_nr_spat', 'rec_nr_spat',
                               'rand_expr_spat', 'av_diff_spat', new_detailed, 'log2fc_spat', 'pvalue_spat', 'p.adj_spat', 'PI_spat'))

  merge_DT = data.table::merge.data.table(spatialCC, exprCC, by = c('LR_comb', 'LR_cell_comb',
                                                                    'lig_cell_type', 'rec_cell_type',
                                                                    'ligand', 'receptor'))

  # data.table variables
  LR_expr_rnk = LR_expr = LR_comb = LR_spat_rnk = LR_expr_spat = exprPI_rnk = PI = spatPI_rnk = PI_spat = NULL

  # rank for expression levels
  merge_DT[, LR_expr_rnk := rank(-LR_expr), by = LR_comb]
  merge_DT[, LR_spat_rnk := rank(-LR_expr_spat), by = LR_comb]

  # rank for differential activity levels
  merge_DT[, exprPI_rnk := rank(-PI), by = LR_comb]
  merge_DT[, spatPI_rnk := rank(-PI_spat), by = LR_comb]

  return(merge_DT)

}