doc/reflect.R

## ---- echo = FALSE, message = FALSE--------------------------------------
knitr::opts_chunk$set(collapse = T, comment = "#>")
knitr::opts_chunk$set(fig.width = 4, fig.height = 4) 
options(tibble.print_min = 4L, tibble.print_max = 4L)
library(reflect)

## ------------------------------------------------------------------------
library(reflect)

# data of a EGFR-mutant cohort
egfr_data

## ------------------------------------------------------------------------
mat_value <- egfr_data$mat_value
df_sample <- egfr_data$df_sample
df_feature <- egfr_data$df_feature

## ----fig1, fig.height = 5, fig.width = 6---------------------------------
# plot tumor types of patients and cell lines
plot_bar_tumortypes_stack_tcga_ccl(df_sample)

## ------------------------------------------------------------------------
# Calculate gap statistic profile along a sequence of wbound and get the best wbound
# wbounds <- seq(1.1, sqrt(ncol(mat_value)), 100)
# gapstat_bestwbound <- get_best_wbound(mat_value, wbounds = wbounds)

# load a pre-computed result
gapstat_bestwbound <- egfr_result$gapstat_bestwbound
names(gapstat_bestwbound)

# plot gap statistic profile as a function of the number of features
# and gap statistic profile as a function of the tunning parameter wbound
plot_gapstat(gapstat_bestwbound$df_gapstat, plot_tuning_parameter = TRUE)

## ------------------------------------------------------------------------
# get the optimal wbound
best_wbound <- gapstat_bestwbound$best_wbound
best_wbound

## ------------------------------------------------------------------------
# do sparse hierarchical clustering
shc <- sparse_hclust(mat_value, best_wbound)

# plot weights of discriminant features
df_weight <- data.frame(Feature = names(shc$weight), 
                        Weight = shc$weight)
plot_bar_weights(df_weight)

## ----fig2, fig.height = 5, fig.width = 7.2-------------------------------
# load pre-defined color sets for tumor types
df_tcga_color <- reflect_color$df_tcga_color
df_ccl_color <- reflect_color$df_ccl_color

# load pre-defined color sets for categories
# here the categories are common variants and rare variants of samples
df_category_color <- reflect_color$df_category_color

# get the dendrogram generated by the sparse hierarchical clustering
cluster_rows <- as.dendrogram(egfr_result$shc$hc)

# plot heatmap
plot_heatmap(mat_value,
             df_sample = df_sample,
             df_weight = df_weight,
             cluster_rows = cluster_rows,
             df_tcga_color = df_tcga_color,
             df_ccl_color = df_ccl_color,
             df_category_color = df_category_color,
             category_name = "Variants",
             show_row_names = FALSE)

## ------------------------------------------------------------------------
# get the expression matrix that has been clustered 
mat_value_clustered <- shc$mat_value_clustered

# calculate recurrence P values
mat_recur_pval <- get_recur_pval(mat_value_clustered, df_feature)

mat_recur_pval[1:3, 1:3]

## ------------------------------------------------------------------------
# get the expression matrix that has been clustered 
recur_actionable <- get_recur_actionable_features(mat_value, 
                                                  mat_recur_pval, 
                                                  pval_threshold = 0.05,
                                                  df_feature = df_feature)

# recurrent and actionable features for each sample
df_recur_actionable <- recur_actionable$df_recur_actionable
df_recur_actionable[1:3, ]

## ------------------------------------------------------------------------
# recurrent and actionable features for each sample
df_coaltered_targets <- get_coaltered_targets(df_sample, 
                                              df_recur_actionable)
df_coaltered_targets[1:3, ]

## ------------------------------------------------------------------------
# run an end-to-end reflect pipeline
# res <- reflect_pipeline(mat_value, df_sample, df_feature)
# take take a long time, skip here

# load a pre-computed result
res <- egfr_result
names(res)

## ------------------------------------------------------------------------
# tunning parameter selection
# gapstat_bestwbound <- get_best_wbound(mat_value)
# take take a long time, skip here
best_wbound <- gapstat_bestwbound$best_wbound

# run a REFLECT pipeline given a precomputed tunning parameter
res <- reflect_pipeline2(best_wbound, mat_value, df_sample, df_feature)
names(res)
korkutlab/reflect documentation built on July 5, 2021, 7:38 a.m.