Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
eval = FALSE, #this makes so that chunks are not rerun
cache = TRUE,
warning = FALSE)
## -----------------------------------------------------------------------------
# # install.packages("devtools")
# devtools::install_github("dzhakparov/GeneSelectR", build_vignettes = FALSE)
#
## -----------------------------------------------------------------------------
# GeneSelectR::configure_environment()
## -----------------------------------------------------------------------------
# GeneSelectR::set_reticulate_python()
# library(GeneSelectR)
# # rest of your code
## -----------------------------------------------------------------------------
# data("UrbanRandomSubset")
# head(UrbanRandomSubset[,1:10])
## -----------------------------------------------------------------------------
# GeneSelectR::set_reticulate_python()
# library(GeneSelectR)
# library(dplyr)
# # the rest of the code
## -----------------------------------------------------------------------------
# X <- UrbanRandomSubset %>% dplyr::select(-treatment) # get the feature matrix
# y <- UrbanRandomSubset['treatment'] # store the data point label in a separate vector
## -----------------------------------------------------------------------------
# y <- as.factor(y)
# y <- as.integer(y)
## -----------------------------------------------------------------------------
# y <- as.vector(y)
## -----------------------------------------------------------------------------
# selection_results <- GeneSelectR::GeneSelectR(X = X,
# y = y,
# njobs = -1) # all cores will be used
# selection_results
## -----------------------------------------------------------------------------
# fs_param_grids <- list(
# "Lasso" = list(
# "feature_selector__estimator__C" = c(0.01, 0.1, 1L, 10L),
# "feature_selector__estimator__solver" = c('liblinear','saga')
# ),
# "Univariate" = list(
# "feature_selector__param" = seq(50L, 200L, by = 50L)
# ),
# "boruta" = list(
# "feature_selector__perc" = seq(80L, 100L, by = 10L),
# 'feature_selector__n_estimators' = c(50L, 100L, 250L, 500L)
# ),
# "RandomForest" = list(
# "feature_selector__estimator__n_estimators" = seq(100L, 500L,by = 50L),
# "feature_selector__estimator__max_depth" = c(10L, 20L, 30L),
# "feature_selector__estimator__min_samples_split" = c(2L, 5L, 10L),
# "feature_selector__estimator__min_samples_leaf" = c(1L, 2L, 4L),
# "feature_selector__estimator__bootstrap" = c(TRUE, FALSE)
# )
# )
## -----------------------------------------------------------------------------
# selection_results <- GeneSelectR::GeneSelectR(X = X,
# y = y,
# njobs = -1,
# search_type = 'grid')
## -----------------------------------------------------------------------------
# selection_results <- GeneSelectR::GeneSelectR(X = X,
# y = y,
# njobs = -1,
# search_type = 'bayesian')
## -----------------------------------------------------------------------------
# # sklearn is already imported when the library is loaded
# # define the feature selection submodule and wanted methods with an estimator
# feature_selection <- sklearn$feature_selection
# select_from_model <- feature_selection$SelectFromModel
# RFE <- feature_selection$RFE
# rf <- sklearn$ensemble$RandomForestClassifier
#
# # feature selection methods of your choice
# my_methods <- list('RFE' = RFE(estimator = rf(), n_features_to_select = 100L),
# 'SelectFromModel' = select_from_model(estimator = rf()))
#
## -----------------------------------------------------------------------------
# # params for the feature selection methods of your choice
# my_params <- list('RFE' = list('feature_selector__step' = seq(0.1, 0.001, 1, 10)),
# 'SelectFromModel' = list('feature_selector__estimator__n_estimators' = c(50L, 100L, 250L, 500L),
# "feature_selector__estimator__max_depth" = c(10L, 20L, 30L),
# "feature_selector__estimator__bootstrap" = c(TRUE, FALSE))
# )
## -----------------------------------------------------------------------------
# selection_results <- GeneSelectR::GeneSelectR(X = X,
# y = y,
# njobs = -1,
# feature_selection_methods=my_methods,
# fs_param_grids = my_params)
## -----------------------------------------------------------------------------
# minmax <- sklearn$preprocessing$MinMaxScaler()
# varthr <- sklearn$feature_selection$VarianceThreshold()
# preprocessing <- list( 'MinMaxScaler' = minmax,
# 'VarianceThreshold' = varthr)
#
# selection_results <- GeneSelectR::GeneSelectR(X = X,
# y = y,
# njobs = -1,
# feature_selection_methods=my_methods,
# fs_param_grids = my_params,
# preprocessing_steps = preprocessing)
## -----------------------------------------------------------------------------
# # import xgboost
# # NOTE: it should be installed in the working conda env
# xgb <- reticulate::import('xgboost')
# xgb.classifier <- xgb$XGBClassifier()
#
# selection_results <- GeneSelectR::GeneSelectR(X = X,
# y = y,
# njobs = -1,
# classifier = xgb.classifier)
## -----------------------------------------------------------------------------
# xgb <- reticulate::import('xgboost')
# xgb.classifier <- xgb$XGBClassifier()
#
# xgb_param_grid <- list(
# "classifier__learning_rate" = c(0.01, 0.05, 0.1),
# "classifier__n_estimators" = c(100L, 200L, 300L),
# "classifier__max_depth" = c(3L, 5L, 7L),
# "classifier__min_child_weight" = c(1L, 3L, 5L),
# "classifier__gamma" = c(0, 0.1, 0.2),
# "classifier__subsample" = c(0.8, 1.0),
# "classifier__colsample_bytree" = c(0.8, 1.0)
# )
#
# selection_results <- GeneSelectR::GeneSelectR(X = X,
# y = y,
# njobs = -1,
# classifier = xgb.classifier,
# classifier_grid = xgb_param_grid)
## -----------------------------------------------------------------------------
# plot_feature_importance(selection_results, top_n_features = 10)
## -----------------------------------------------------------------------------
# plot_metrics(selection_results)
#
# # or access it as a dataframe
# selection_results$test_metrics
# selection_results$cv_mean_score
## -----------------------------------------------------------------------------
# overlap <- calculate_overlap_coefficients(selection_results)
# overlap
## -----------------------------------------------------------------------------
# plot_overlap_heatmaps(overlap)
## -----------------------------------------------------------------------------
# custom_list <- list(custom_list = c('char1','char2','char3','char4','char5'),
# custom_list2 = c('char1','char2','char3','char4','char5'))
# overlap1 <- calculate_overlap_coefficients(selection_results, custom_lists = custom_list)
# plot_overlap_heatmaps(overlap1)
## -----------------------------------------------------------------------------
# upset_upet(selection_results)
#
# # plot upset with custom lists
# upset_upet(selection_results, custom_lists = custom_list)
#
## -----------------------------------------------------------------------------
# # Get the annotations for the genes in the analysis
# # The example dataset contains sequencing from humans
# ah <- AnnotationHub::AnnotationHub()
# human_ens <- AnnotationHub::query(ah, c("Homo sapiens", "EnsDb"))
# human_ens <- human_ens[['AH98047']]
# annotations_ahb <- ensembldb::genes(human_ens, return.type = "data.frame") %>%
# dplyr::select(gene_id,gene_name,entrezid,gene_biotype)
#
# annotations_df <- annotate_gene_lists(pipeline_results = selection_results,
# annotations_ahb = annotations_ahb,
# format = 'ENSEMBL')
# annotations_df
## -----------------------------------------------------------------------------
# # Formal class 'AnnotatedGeneLists' [package "GeneSelectR"] with 2 slots
# # ..@ inbuilt :List of 4
# # .. ..$ Lasso :Formal class 'GeneList' [package "GeneSelectR"] with 3 slots
# # .. .. .. ..@ SYMBOL : chr [1:54] "MCOLN3" "AGL" "NBPF26" "EXO1" ...
# # .. .. .. ..@ ENSEMBL : chr [1:54] "ENSG00000055732" "ENSG00000162688" "ENSG00000273136" "ENSG00000174371" ...
# # .. .. .. ..@ ENTREZID: chr [1:54] "55283" "178" "101060684" "9156" ...
# # .. ..$ Univariate :Formal class 'GeneList' [package "GeneSelectR"] with 3 slots
# # .. .. .. ..@ SYMBOL : chr [1:74] "CDA" "BMP8B" "MCOLN3" "AGL" ...
# # .. .. .. ..@ ENSEMBL : chr [1:74] "ENSG00000158825" "ENSG00000116985" "ENSG00000055732" "ENSG00000162688" ...
# # .. .. .. ..@ ENTREZID: chr [1:74] "978" "656" "55283" "178" ...
# # .. ..$ RandomForest:Formal class 'GeneList' [package "GeneSelectR"] with 3 slots
# # .. .. .. ..@ SYMBOL : chr [1:73] "CDA" "BMP8B" "MCOLN3" "NBPF26" ...
# # .. .. .. ..@ ENSEMBL : chr [1:73] "ENSG00000158825" "ENSG00000116985" "ENSG00000055732" "ENSG00000273136" ...
# # .. .. .. ..@ ENTREZID: chr [1:73] "978" "656" "55283" "101060684" ...
# # .. ..$ boruta :Formal class 'GeneList' [package "GeneSelectR"] with 3 slots
# # .. .. .. ..@ SYMBOL : chr [1:36] "MCOLN3" "AGL" "NBPF26" "BNIPL" ...
# # .. .. .. ..@ ENSEMBL : chr [1:36] "ENSG00000055732" "ENSG00000162688" "ENSG00000273136" "ENSG00000163141" ...
# # .. .. .. ..@ ENTREZID: chr [1:36] "55283" "178" "101060684" "149428" ...
# # ..@ permutation:List of 4
# # .. ..$ Lasso :Formal class 'GeneList' [package "GeneSelectR"] with 3 slots
# # .. .. .. ..@ SYMBOL : chr [1:4] "HBB" "" "ABCC12" "LTF"
# # .. .. .. ..@ ENSEMBL : chr [1:4] "ENSG00000244734" "ENSG00000259529" "ENSG00000140798" "ENSG00000012223"
# # .. .. .. ..@ ENTREZID: chr [1:4] "3043" "NA" "94160" "4057"
# # .. ..$ Univariate :Formal class 'GeneList' [package "GeneSelectR"] with 3 slots
# # .. .. .. ..@ SYMBOL : chr [1:19] "KAZN" "RGS16" "IGFN1" "CHI3L1" ...
# # .. .. .. ..@ ENSEMBL : chr [1:19] "ENSG00000189337" "ENSG00000143333" "ENSG00000163395" "ENSG00000133048" ...
# # .. .. .. ..@ ENTREZID: chr [1:19] "23254" "6004" "91156" "1116" ...
# # .. ..$ RandomForest:Formal class 'GeneList' [package "GeneSelectR"] with 3 slots
# # .. .. .. ..@ SYMBOL : chr [1:16] "ZBED6" "CYP2E1" "HBB" "OTOGL" ...
# # .. .. .. ..@ ENSEMBL : chr [1:16] "ENSG00000257315" "ENSG00000130649" "ENSG00000244734" "ENSG00000165899" ...
# # .. .. .. ..@ ENTREZID: chr [1:16] "100381270" "1571" "3043" "283310" ...
# # .. ..$ boruta :Formal class 'GeneList' [package "GeneSelectR"] with 3 slots
# # .. .. .. ..@ SYMBOL : chr [1:8] "CHI3L1" "HBB" "IDH1" "PTPRN" ...
# # .. .. .. ..@ ENSEMBL : chr [1:8] "ENSG00000133048" "ENSG00000244734" "ENSG00000138413" "ENSG00000054356" ...
# # .. .. .. ..@ ENTREZID: chr [1:8] "1116" "3043" "3417" "5798" ...
## -----------------------------------------------------------------------------
# print(annotated_df@inbuilt$RandomForest@ENTREZID)
## -----------------------------------------------------------------------------
# custom_list <- list(background = c('gene1', 'gene2', 'gene3', 'gene4'),
# DEGs = c('gene5','gene6','gene7'))
# annotations_df_with_custom <- annotate_gene_lists(pipeline_results = selection_results,
# annotations_ahb = annotations_ahb,
# format = 'ensembl_gene_name',
# custom_lists = custom_list)
# annotations_df_with_custom
## -----------------------------------------------------------------------------
# annotated_GO <- GO_enrichment_analysis(annotated_df)
## -----------------------------------------------------------------------------
# annotated_GO <- GO_enrichment_analysis(annotated_df,
# list_type = 'permutation', #run GO enrichment on permutation based selected features
# keyType = 'ENSEMBL', # run analysis with ENSEMBLIDs
# ont = 'BP') # run BP ontology
# annotated_GO
## -----------------------------------------------------------------------------
# annot_child_fractions <- compute_GO_child_term_metrics(GO_data = annotated_GO,
# GO_terms = c("GO:0002376", "GO:0044419"),
# plot = FALSE)
## -----------------------------------------------------------------------------
# str(annot_child_fractions)
# # 'data.frame': 12 obs. of 6 variables:
# # $ feature_list : chr "Lasso" "Univariate" "RandomForest" "boruta" ...
# # $ all_terms_number : int 5413 5421 5981 4643 1308 1752 5413 5421 5981 4643 ...
# # $ offspring_nodes_number: int 432 449 495 379 196 170 112 110 128 100 ...
# # $ offspring_terms : chr "GO:0001767;GO:0001768;GO:0001771;GO:0001773;GO:0001774;GO:0001776;GO:0001779;GO:0001780;GO:0001782;GO:0001787;G"| __truncated__ "GO:0001767;GO:0001768;GO:0001771;GO:0001773;GO:0001774;GO:0001776;GO:0001780;GO:0001782;GO:0001787;GO:0001909;G"| __truncated__ "GO:0001767;GO:0001768;GO:0001771;GO:0001773;GO:0001774;GO:0001776;GO:0001779;GO:0001782;GO:0001787;GO:0001865;G"| __truncated__ "GO:0001767;GO:0001768;GO:0001771;GO:0001773;GO:0001774;GO:0001776;GO:0001779;GO:0001780;GO:0001782;GO:0001865;G"| __truncated__ ...
# # $ fraction : num 7.98 8.28 8.28 8.16 14.98 ...
# # $ GO_term : chr "GO:0002376" "GO:0002376" "GO:0002376" "GO:0002376" ...
## -----------------------------------------------------------------------------
# # simplifyEnrichment produces a heatmap
# hmap <- run_simplify_enrichment(annotated_GO,
# method = 'lovain',
# measure = 'Resnik')
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.