knitr::opts_chunk$set( collapse = TRUE, comment = "#>", echo = TRUE, # Change to TRUE to see code fig.height = 5.5, fig.width = 7.5 )
# library(idepGolem)
devtools::load_all()
idep_data <- get_idep_data() DATABASE <- Sys.getenv("IDEP_DATABASE")[1] YOUR_DATA_PATH <- paste0(DATABASE, "data_go/BcellGSE71176_p53.csv") YOUR_EXPERIMENT_PATH <- paste0(DATABASE, "data_go/BcellGSE71176_p53_sampleInfo.csv") expression_file <- data.frame( datapath = YOUR_DATA_PATH ) experiment_file <- data.frame( datapath = YOUR_EXPERIMENT_PATH ) load_data <- input_data( expression_file = expression_file, experiment_file = experiment_file, go_button = FALSE, demo_data_file = idep_data$demo_data_file, demo_metadata_file = idep_data$demo_metadata_file ) converted <- convert_id( query = rownames(load_data$data), idep_data = idep_data, select_org = "BestMatch" ) all_gene_info <- gene_info( converted = converted, select_org = "BestMatch", idep_data = idep_data ) converted_data <- convert_data( converted = converted, no_id_conversion = FALSE, data = load_data$data, multiple_map = "mean" ) gene_names <- get_all_gene_names( mapped_ids = converted_data$mapped_ids, all_gene_info = all_gene_info )
processed_data <- pre_process( data = converted_data$data, missing_value = "geneMedian", data_file_format = 1, low_filter_fpkm = NULL, n_min_samples_fpkm = NULL, log_transform_fpkm = NULL, log_start_fpkm = NULL, min_counts = .5, n_min_samples_count = 1, counts_transform = 1, counts_log_start = 4, no_fdr = FALSE )
total_counts_ggplot( counts_data = processed_data$raw_counts, sample_info = load_data$sample_info )
eda_scatter( processed_data = processed_data$data, plot_yaxis = colnames(processed_data$data)[1], plot_xaxis = colnames(processed_data$data)[2] )
eda_boxplot( processed_data = processed_data$data, sample_info = load_data$sample_info )
eda_density( processed_data = processed_data$data, sample_info = load_data$sample_info )
merged_raw_counts <- merge_data( all_gene_names = gene_names, data = processed_data$raw_counts, merge_ID = "ensembl_ID" ) merged_data <- merge_data( all_gene_names = gene_names, data = processed_data$data, merge_ID = "ensembl_ID" )
PCA_plot( data = processed_data$data, sample_info = load_data$sample_info, PCAx = 1, PCAy = 2, selected_color = colnames(load_data$sample_info)[[1]], selected_shape = colnames(load_data$sample_info)[[1]] ) t_SNE_plot( data = processed_data$data, sample_info = load_data$sample_info, selected_color = colnames(load_data$sample_info)[[1]], selected_shape = colnames(load_data$sample_info)[[1]] ) pc_factor_correlation( data = processed_data$data, sample_info = load_data$sample_info ) MDS_plot( data = processed_data$data, sample_info = load_data$sample_info, selected_color = colnames(load_data$sample_info)[[1]], selected_shape = colnames(load_data$sample_info)[[1]] )
PCA_biplot( data = processed_data$data, sample_info = load_data$sample_info, select_gene_id = "symbol", all_gene_names = gene_names ) PCA_Scree(processed_data = processed_data$data) PCAtools_eigencorplot( processed_data = processed_data$data, sample_info = load_data$sample_info )
n_genes <- 20 dist_funs <- dist_functions() hclust_funs <- hcluster_functions() heatmap_data <- process_heatmap_data( data = processed_data$data, n_genes_max = n_genes, gene_centering = TRUE, gene_normalize = FALSE, sample_centering = FALSE, sample_normalize = FALSE, all_gene_names = gene_names, select_gene_id = "ensembl_ID" ) heat_map <- heatmap_main( data = heatmap_data, cluster_meth = 1, heatmap_cutoff = 3, sample_info = as.data.frame(load_data$sample_info), select_factors_heatmap = "All factors", dist_function = 1, dist_funs = dist_funs, hclust_function = "average", sample_clustering = F, heatmap_color_select = c("red", "white", "blue"), row_dend = T, k_clusters = 5, re_run = 5, selected_genes = NULL )
gene_sets <- read_pathway_sets( all_gene_names_query = gene_names[1:500, ], converted = converted, go = "GOBP", select_org = "BestMatch", gmt_file = NULL, idep_data = idep_data, gene_info = all_gene_info ) pathway_info <- find_overlap( pathway_table = gene_sets$pathway_table, query_set = gene_sets$query_set, total_genes = gene_sets$total_genes, processed_data = processed_data$data, gene_info = all_gene_info, go = "GOBP", idep_data = idep_data use_filtered_background = TRUE, select_org = "BestMatch", reduced = .75 ) pathway_info
list_factors <- list_factors_ui( sample_info = load_data$sample_info, data_file_format = "1", counts_deg_method = "3" ) list_factors$choices factors <- c(list_factors$choices[[1]], list_factors$choices[[2]]) block_factors <- list_block_factors_ui( sample_info = load_data$sample_info, select_factors_model = factors ) block_factors list_model_comparisons <- list_model_comparisons_ui( load_data$sample_info, factors, load_data$data ) list_model_comparisons$choices model_comparisons <- list_model_comparisons$choices$`p53: NULL vs. WT` interaction_terms <- list_interaction_terms_ui( sample_info = load_data$sample_info, select_factors_model = factors ) interaction_terms list_reference_levels <- select_reference_levels_ui( sample_info = load_data$sample_info, select_factors_model = factors, data_file_format = "1", counts_deg_method = "3" ) list_reference_levels reference_level <- c( paste0(names(list_reference_levels)[1], ":", list_reference_levels[1][[1]][[1]]), paste0(names(list_reference_levels)[2], ":", list_reference_levels[2][[1]][[1]]) )
limma <- limma_value( data_file_format = "1", counts_deg_method = "3", raw_counts = processed_data$raw_counts, limma_p_val = .1, limma_fc = 2, select_model_comprions = model_comparisons, sample_info = load_data$sample_info, select_factors_model = factors, select_interactions = interaction_terms, select_block_factors_model = block_factors, factor_reference_levels = reference_level, processed_data = processed_data$data, counts_log_start = 4, p_vals = processed_data$p_vals, descr = processed_data$descr )
contrast_samples <- find_contrast_samples( select_contrast = limma$comparisons[[1]], all_sample_names = colnames(processed_data$data), sample_info = load_data$sample_info, select_factors_model = factors, select_model_comprions = model_comparisons, reference_levels = reference_level, counts_deg_method = "3", data_file_format = "1" ) deg_heat <- deg_heat_data( limma = limma, select_contrast = limma$comparisons[[1]], processed_data = processed_data$data, contrast_samples = contrast_samples ) up_reg_data <- pathway_info up_reg_data <- as.data.frame(up_reg_data) up_reg_data$direction <- rep("Up", nrow(up_reg_data)) # up_reg_data down_reg_data <- deg_heat$genes[deg_heat$bar == -1, ] down_reg_data <- as.data.frame(down_reg_data) down_reg_data$direction <- rep("Down", nrow(down_reg_data)) # down_reg_data main_heat_deg <- deg_heatmap( df = deg_heat$genes, bar = deg_heat$bar, heatmap_color_select = c("green", "black", "red"), cluster_rows = TRUE ) ComplexHeatmap::draw(main_heat_deg)
volcano_data <- volcano_data( select_contrast = limma$comparisons[[1]], comparisons = limma$comparisons, top_genes = limma$top_genes, limma_p_val = .1, limma_fc = 2, processed_data = processed_data$data, contrast_samples = contrast_samples ) vol_labels <- volcano_data$data |> dplyr::filter(upOrDown != "None") |> dplyr::filter(abs(Fold) > 2.5 & abs(-log10(FDR)) > 5) |> dplyr::pull(Row.names) plot_volcano( data = volcano_data$data, anotate_genes = vol_labels, plot_colors = c("red", "grey", "blue") ) ma_labels <- volcano_data$data |> dplyr::filter(upOrDown != "None") |> dplyr::filter(abs(Average) > 5 & abs(-log10(FDR)) > 5) |> dplyr::pull(Row.names) plot_ma( data = volcano_data$data, plot_colors = c("red", "grey", "blue"), anotate_genes = ma_labels )
gene_sets <- read_gene_sets( converted = converted, all_gene_names = gene_names, go = "GOBP", select_org = "BestMatch", idep_data = idep_data, my_range = c(15, 2000) ) gage <- gage_data( select_go = "GOBP", select_contrast = limma$comparisons[[2]], min_set_size = 15, max_set_size = 2000, limma = limma, gene_p_val_cutoff = 1, gene_sets = gene_sets$gene_lists, absolute_fold = FALSE, pathway_p_val_cutoff = .1, n_pathway_show = 30 ) gage[, 2] contrast_samples_pgsea <- find_contrast_samples( select_contrast = limma$comparisons[[2]], all_sample_names = colnames(processed_data$data), sample_info = load_data$sample_info, select_factors_model = factors, select_model_comprions = model_comparisons, reference_levels = reference_level, counts_deg_method = "3", data_file_format = "1" ) plot_pgsea( my_range = c(15, 2000), processed_data = processed_data$data, contrast_samples = contrast_samples_pgsea, gene_sets = gene_sets$gene_lists, pathway_p_val_cutoff = .1, n_pathway_show = 30 ) fgsea <- fgsea_data( select_contrast = limma$comparisons[[2]], my_range = c(15, 2000), limma = limma, gene_p_val_cutoff = 1, gene_sets = gene_sets$gene_lists, absolute_fold = FALSE, pathway_p_val_cutoff = .1, n_pathway_show = 30 ) reactom_data <- reactome_data( select_contrast = limma$comparisons[[2]], my_range = c(15, 2000), limma = limma, gene_p_val_cutoff = 1, converted = converted, idep_data = idep_data, pathway_p_val_cutoff = .1, n_pathway_show = 30, absolute_fold = FALSE ) pgsea_plot_all( go = "GOBP", my_range = c(15, 2000), data = processed_data$data, select_contrast = limma$comparisons[[2]], gene_sets = gene_sets$gene_lists, pathway_p_val_cutoff = .1, n_pathway_show = 30 ) pgsea_plot_data <- get_pgsea_plot_data( my_range = c(15, 2000), data = processed_data$data, select_contrast = limma$comparisons[[2]], gene_sets = gene_sets$gene_lists, sample_info = load_data$sample_info, select_factors_model = factors, select_model_comprions = model_comparisons, pathway_p_val_cutoff = .1, n_pathway_show = 30 ) select_data <- pathway_select_data( sig_pathways = "RNA processing ", gene_sets = gene_sets$gene_lists, contrast_samples = contrast_samples_pgsea, data = processed_data$data, select_org = "BestMatch", all_gene_names = gene_names ) basic_heatmap( data = select_data, c("green", "black", "red") ) pathway_list_data <- get_pathway_list_data( pathway_method = 1, gage_pathway_data = gage, fgsea_pathway_data = NULL, pgsea_plot_data = NULL, pgsea_plot_all_samples_data = NULL, go = "GOBP", select_org = "BestMatch", gene_info = all_gene_info, gene_sets = gene_sets$gene_lists ) idepGolem::enrichment_tree_plot( go_table = pathway_list_data, group = "All Groups" ) kegg_pathway( go = "KEGG", gage_pathway_data = gage, sig_pathways = "Oocyte meiosis", select_contrast = "I:p53_NULL.Treatment_IR", limma = limma, converted = converted, idep_data = idep_data, select_org = "BestMatch" )
all_gene_info <- get_gene_info( converted = converted, select_org = "BestMatch", gene_info_files = idep_data$gene_info_files ) chromosome_plotly( limma = limma, select_contrast = limma$comparisons[[2]], all_gene_info = all_gene_info, ignore_non_coding = FALSE, limma_p_val_viz = .1, limma_fc_viz = 4, label_gene_symbol = FALSE, ma_window_size = 6, ma_window_steps = 2, ch_region_p_val = .01, hide_chr = TRUE, hide_patches = TRUE ) genome_plot_data_pre <- get_genome_plot_data_pre( select_contrast = limma$comparisons[[2]], limma = limma, all_gene_info = all_gene_info ) genome_plot_data <- get_genome_plot_data( genome_plot_data_pre = genome_plot_data_pre, all_gene_info = all_gene_info, select_contrast = limma$comparisons[[2]], limma = limma, regions_p_val_cutoff = .01, statistic_cutoff = .5 ) get_genome_plot( genome_plot_data = genome_plot_data, regions_p_val_cutoff = .01, statistic_cutoff = .5 )
# "BCCC()" biclust # "BCQU()" QUBIC # "BCUnibic()" runibic # "BCXmotifs()" biclust # "BCPlaid()" biclust # "BCSpectral()" biclust # "BCBimax()" biclust # "BCQuest()" biclust biclustering <- get_biclustering( data = processed_data$data, n_genes = 1000, biclust_method = "biclust::BCCC()" ) biclust_data <- biclust::bicluster(biclustering$data, biclustering$res, 1)[[1]] basic_heatmap( data = biclust_data, c("green", "black", "red") ) biclust_gene_names <- merge_data( all_gene_names = gene_names, data = biclust_data, merge_ID = "ensembl_ID" ) # Only keep the gene names and scrap the data gene_names_query <- dplyr::select_if(biclust_gene_names, is.character) gene_sets <- read_pathway_sets( all_gene_names_query = gene_names_query, converted = converted, go = "GOBP", select_org = "BestMatch", gmt_file = NULL, idep_data = idep_data, gene_info = all_gene_info ) pathway_info <- find_overlap( pathway_table = gene_sets$pathway_table, query_set = gene_sets$query_set, total_genes = gene_sets$total_genes, processed_data = processed_data$data, gene_info = all_gene_info, go = "GOBP", idep_data = idep_data, select_org = "GOBP" use_filtered_background = FALSE, reduced = FALSE ) biclust_table <- get_biclust_table_data( res = biclustering$res, biclust_data = biclust_data, select_org = "BestMatch", all_gene_info = all_gene_info )
wgcna <- get_wgcna( data = processed_data$data, n_genes = 1000, soft_power = 5, min_module_size = 20 ) get_wgcna_modules(wgcna) network_plot <- get_network_plot( select_wgcna_module = "1. turquoise (495 genes)", wgcna = wgcna, top_genes_network = 10, select_org = "BestMatch", all_gene_info = all_gene_info, edge_threshold = .4 ) network_plot() network_query <- network_enrich_data( select_wgcna_module = "1. turquoise (495 genes)", wgcna = wgcna ) gene_query <- dplyr::filter(gene_names, ensembl_ID %in% network_query) gene_sets <- read_pathway_sets( all_gene_names_query = gene_query, converted = converted, go = "GOBP", select_org = "BestMatch", gmt_file = NULL, idep_data = idep_data, gene_info = all_gene_info ) pathway_info <- find_overlap( pathway_table = gene_sets$pathway_table, query_set = gene_sets$query_set, total_genes = gene_sets$total_genes, processed_data = processed_data$data, gene_info = all_gene_info, go = "GOBP", idep_data = idep_data, select_org = "BestMatch" use_filtered_background = TRUE, reduced = FALSE ) plot_scale_independence( wgcna = wgcna ) plot_mean_connectivity( wgcna = wgcna )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.