prepare_data_for_plotting: Prepares data for plotting cell frequency and MSI

View source: R/plot_batch_effect.R

prepare_data_for_plottingR Documentation

Prepares data for plotting cell frequency and MSI


Performs dimensional reduction and constructs data frame for plotting cell frequencies and MSI per clusters and metaclusters obtained from extract_pctgs_msi_per_flowsom function. For the MSI it is taking the MSI > 1.


  n_neighbours = 15,
  seed = NULL



List containing matrices with cell frequency and msi obtained in step extract_pctgs_msi_per_flowsom.


The name of the matrix to be plotted.


The size of local neighborhood in UMAP analysis, default set to 15, as in uwot::umap(). It is recommended to set it to the number of files in each batch.


Numeric set to obtain reproducible results, default NULL.


data frame for plotting


# Define files before normalization
gate_dir <- file.path(dir, "Gated")
files_before_norm <- list.files(gate_dir,
                                pattern = ".fcs",
                                full.names = T)

# Define files after normalization
norm_dir <- file.path(dir, "CytoNormed")
files_after_norm <- list.files(norm_dir,
                               pattern = ".fcs",
                               full.names = T)

# files needs to be in the same order, check and order if needed
test_match_order(x = basename(gsub("Norm_","",files_after_norm)),

batch_labels <- stringr::str_match(basename(files_before_norm), "day[0-9]*")[,1]

mx <- extract_pctgs_msi_per_flowsom(files_after_norm = files_after_norm,
                                    files_before_norm = files_before_norm,
                                    nCells = 50000,
                                    phenotyping_markers = c("CD", "HLA", "IgD"),
                                    functional_markers = c("MIP", "MCP", "IL",
                                                           "IFNa", "TNF", "TGF",
                                    xdim = 10,
                                    ydim = 10,
                                    n_metaclusters = 35,
                                    out_dir = norm_dir,
                                    arcsine_transform = TRUE,
                                    save_matrix = TRUE,
                                    seed = 343)
# create the list to store the plots
 plots <- list()
 for (name in names(mx[[1]])){
 df_plot <- prepare_data_for_plotting(frequency_msi_list = mx,
                                       matrix_type = name,
                                      n_neighbours = 11, seed = 1)

 batch <- stringr::str_match(rownames(df_plot), "day[0-9]*")[,1]
 samples_id <- ifelse(grepl("p1", rownames(df_plot)),"p1",
                      ifelse(grepl("p2", rownames(df_plot)), "p2", "ref"))
 stimulation <- stringr::str_match(rownames(df_plot), "UNS|RSQ|IMQ|LPS|CPG")[,1]

 plots[[name]] <- plot_batch_using_freq_msi(df_plot = df_plot, fill = batch,
                                            shape = samples_id, color = batch,
                                            split_by_normalization = TRUE, title = name)


gg_a <- grid_arrange_common_legend(plot_lists = plots, nrow = 2, ncol = 2,
                                  position = "right")

ggplot2::ggsave(filename ="batch_effect_frequency_MSI.png",
               device = "png",
              path = norm_dir,
               plot = gg_a,
               units = "cm",
               width = 22,
               height = 14, dpi = 300)

prybakowska/CytoQP documentation built on June 28, 2022, 12:36 a.m.