#' Gene selection based on variability
#'
#' @param disease_component_tumors A matrix with the disease component data of the entire dataset.
#' @param percent Percentile for gene selection.
#'
#' @return A vector containing the names of the selected genes.
#' @export
#'
#' @examples
#' \dontrun{
#' gene_selection(disease_component_tumors,0.99)
#' }
gene_selection <- function(disease_component_tumors,percent = 0.85){
MaxAbs595 <- apply(disease_component_tumors,1,function(x) base::max(base::abs(stats::quantile(x,c(0.05,0.95)))))
selected_genes <- names(MaxAbs595[MaxAbs595 > stats::quantile(MaxAbs595,percent)])
return(selected_genes)
}
#' gene_selection_surv
#'
#' Selects markers for mapper based on the product of standard deviation plus one times the Z score obtained thorugh fitting a cox proportional hazard model to the expression of each gene.
#'
#' @param D_Comp Disease component object
#' @param p_Data Data.frame with the phenotype data.
#' @param Status_Col_Name Column for sample filtering.
#' @param Status_Value Value for the sample filtering co-variate.
#' @param Cox_All Output from the cox_all_genes function.
#' @param n_top Number of markers presenting the maximum values for the product.
#' @param type_sel Select top from bottom and top or from absolute value.
#'
#' @return
#' @export
#'
#' @examples
#' \dontrun{
#' gene_selection_surv(D_Comp,p_Data,Status_Col_Name,Status_Value,Cox_All,n_top)
#' }
gene_selection_surv <- function(D_Comp,p_Data,Status_Col_Name,Status_Value,Cox_All,n_top,type_sel = c("Top_Bot","Abs")){
Cox_All <- Cox_All[rownames(D_Comp),]
if(type_sel == "Top_Bot"){
print("Is Top_Bot")
probes_test <- (apply(D_Comp[,p_Data[,Status_Col_Name] == Status_Value],1,sd)+1) * Cox_All[,4]
if(n_top %% 2 == 0){
n_top <- n_top / 2
print(n_top)
}else{
n_top <- (n_top + 1)/2
print(n_top)
}
selected_probes <- names(c(probes_test[order(probes_test,decreasing = T)][1:n_top],probes_test[order(probes_test,decreasing = F)][1:n_top]))
return(selected_probes)
}else if(type_sel == "Abs"){
print("Is Abs")
probes_test <- (apply(D_Comp[,p_Data[,Status_Col_Name] == Status_Value],1,sd)+1) * abs(Cox_All[,4])
selected_probes <- names(probes_test[order(probes_test,decreasing = T)])[1:n_top]
return(selected_probes)
}
}
#' Survival analysis based on gene expression levels.
#'
#' Carries out univariate cox protportional hazard models for the expression levels of each gene included in the dataset and its link with relapse-free or overall survival.
#'
#' @param eData Expression data for disease samples
#' @param time_vector Vector including time to relapse or time to death information
#' @param event_vector Numeric vector indicating if relapse or death have been produced.
#'
#' @return A matrix with the results of the application of proportional hazard models using the expression levels of eahc gene as covariate.
#' @export
#'
#' @examples
#' \dontrun{
#' cox_all_genes(expression_vector_disease,time_vector,event_vector)
#' }
cox_all_genes <- function(eData,time_vector,event_vector){
pb <- utils::txtProgressBar(min = 0, max = nrow(eData), style = 3)
list_out <- list()
for(i in 1:nrow(eData)){
utils::setTxtProgressBar(pb, i)
temp <- summary(survival::coxph(survival::Surv(time_vector,as.numeric(event_vector))~eData[i,]))$coefficients[1,]
list_out[[i]] <- temp
}
df_out <- data.frame(do.call("rbind",list_out))
colnames(df_out) <- c("coef","exp_coef","se_coef","z","Pr_z")
rownames(df_out) <-rownames(eData)
df_out <- as.matrix(df_out)
return(df_out)
}
#' Gene selector based on association to survival.
#'
#' @param cox_all A matrix output from the cox_all_genes function
#' @param percent A two element vector indicating the bottom and top quantiles for gene selection.
#'
#' @return Returns a list of genes that present z-values above or below the selected quantile thresholds.
#' @export
#'
#' @examples
#' \dontrun{
#' get_survival_related_genes(cox_all,percent)
#' }
get_survival_related_genes <- function(cox_all,percent = c(0.05,0.95)){
genes_asso_surv <- rownames(cox_all[cox_all[,"z"] < stats::quantile(cox_all[,"z"],probs = percent[1]) | cox_all[,"z"] > stats::quantile(cox_all[,"z"],probs = percent[2]),])
return(genes_asso_surv)
}
#' surivival_analysis_multiple_groups
#'
#' Survival analysis and plottding of the clustering results. https://www.reneshbedre.com/blog/survival-analysis.html
#'
#' @param pheno_data data.frame with phenotype data. Must include a columns called pCh_DFS_E, pCh_DFS_T, and pCh_Status including the event, time to relapse, and the status of the sample T: Disease NT: healthy
#' @param out_one_D Mapper output.
#' @param thr_groups Minimum number of samples to include a node in the analysis.
#' @param ylim_val y-axis limits for survival plot.
#' @param xlim_val x-axis limits for survival plot.
#' @param type Divide by node or by pam50 in the latter case a column pam50_frma containing the pam50 classification will be requested.
#' @param selected_nodes_2 subset of nodes to compare.
#'
#' @return
#' @export
#'
#' @examples
#' \dontrun{
#' surivival_analysis_multiple_groups(pheno_data,out_one_D,thr_groups = 50,ylim_val = c(0.5, 1),xlim_val = c(0,200),type = "node")
#' }
surivival_analysis_multiple_groups <- function(pheno_data,out_one_D,thr_groups = 50,ylim_val = c(0.5, 1),xlim_val = c(0,200),type = "node",selected_nodes_2 = ""){
univoq_group <- out_one_D$Unique_Samp_Node
p_merged <- merge(pheno_data,univoq_group,by.x = 1,by.y = 1 )
if(type == "node"){
group_colors <- ggplotColours(length(unique(p_merged$unique_cluster)))
print("Printing group colors: ")
print(group_colors)
names_for_group_colors <- unique(p_merged$unique_cluster)[order(as.numeric(unique(gsub("Node_","",p_merged$unique_cluster))))]
print("Printing names for group colors: ")
print(names_for_group_colors)
names(group_colors) <- names_for_group_colors
print("Printing named group colors: ")
print(group_colors)
print("Selecting nodes: ")
p_merged$unique_cluster <- factor(p_merged$unique_cluster,levels = names_for_group_colors)
p_merged <- p_merged[p_merged$pCh_Status == "T",]
p_merged <- p_merged[!(is.na(p_merged$pCh_DFS_E) | is.na(p_merged$pCh_DFS_T)),]
selected_nodes <- names(table(p_merged$unique_cluster) > thr_groups)[table(p_merged$unique_cluster) > thr_groups]
p_merged <- p_merged[p_merged$unique_cluster %in% selected_nodes,]
p_merged$pCh_DFS_T <- as.numeric(p_merged$pCh_DFS_T)
p_merged$pCh_DFS_E <- as.numeric(p_merged$pCh_DFS_E)
group_colors <- group_colors[selected_nodes]
p_merged <<- p_merged
fit <- survival::survfit(survival::Surv(time = p_merged$pCh_DFS_T, event = p_merged$pCh_DFS_E)~p_merged$unique_cluster)
surv = survival::Surv(time = as.numeric(p_merged$pCh_DFS_T), event = as.numeric(p_merged$pCh_DFS_E))
log_rank_test <- survival::survdiff(formula = survival::Surv(time = as.numeric(p_merged$pCh_DFS_T), event = as.numeric(p_merged$pCh_DFS_E)) ~ p_merged$unique_cluster)
plot_out <- survminer::ggsurvplot(fit = fit,data = p_merged, pval = TRUE, surv.median.line = "hv", xlab = "Survival time", ylab = "Survival probability",ylim = ylim_val,xlim = xlim_val,color = "unique_cluster",palette = unname(group_colors))
}
else if(type == "node_sub"){
group_colors <- ggplotColours(length(unique(p_merged$unique_cluster)))
print("Printing group colors: ")
print(group_colors)
names_for_group_colors <- unique(p_merged$unique_cluster)[order(as.numeric(unique(gsub("Node_","",p_merged$unique_cluster))))]
print("Printing names for group colors: ")
print(names_for_group_colors)
names(group_colors) <- names_for_group_colors
print("Printing named group colors: ")
print(group_colors)
print("Selecting nodes: ")
p_merged$unique_cluster <- factor(p_merged$unique_cluster,levels = names_for_group_colors)
p_merged <- p_merged[p_merged$pCh_Status == "T",]
p_merged <- p_merged[!(is.na(p_merged$pCh_DFS_E) | is.na(p_merged$pCh_DFS_T)),]
selected_nodes <- names(table(p_merged$unique_cluster) > thr_groups)[table(p_merged$unique_cluster) > thr_groups]
p_merged <- p_merged[p_merged$unique_cluster %in% selected_nodes,]
p_merged$pCh_DFS_T <- as.numeric(p_merged$pCh_DFS_T)
p_merged$pCh_DFS_E <- as.numeric(p_merged$pCh_DFS_E)
group_colors <- group_colors[selected_nodes_2]
print("Printing group colors for the selected nodes: ")
print(group_colors)
p_merged <- p_merged[p_merged$unique_cluster %in% selected_nodes_2,]
p_merged <<- p_merged
fit <- survival::survfit(survival::Surv(time = p_merged$pCh_DFS_T, event = p_merged$pCh_DFS_E)~p_merged$unique_cluster)
surv = survival::Surv(time = as.numeric(p_merged$pCh_DFS_T), event = as.numeric(p_merged$pCh_DFS_E))
log_rank_test <- survival::survdiff(formula = survival::Surv(time = as.numeric(p_merged$pCh_DFS_T), event = as.numeric(p_merged$pCh_DFS_E)) ~ p_merged$unique_cluster)
plot_out <- survminer::ggsurvplot(fit = fit,data = p_merged, pval = TRUE, surv.median.line = "hv", xlab = "Survival time", ylab = "Survival probability",ylim = ylim_val,xlim = xlim_val,color = "unique_cluster",palette = unname(group_colors))
}
else if(type == "pam"){
group_colors <- ggplotColours(length(unique(p_merged$pam50_frma)))
names(group_colors) <- c("Normal","LumA","LumB","Basal","Her2")
p_merged <- p_merged[p_merged$pam50_frma %in% c("Normal","LumA","LumB","Basal","Her2"),]
p_merged$pCh_DFS_T <- as.numeric(p_merged$pCh_DFS_T)
p_merged$pCh_DFS_E <- as.numeric(p_merged$pCh_DFS_E)
p_merged$pam50_frma <- factor(p_merged$pam50_frma,levels = c("Normal", "LumA", "LumB","Basal","Her2"))
p_merged <<- p_merged
fit <- survival::survfit(survival::Surv(time = p_merged$pCh_DFS_T, event = p_merged$pCh_DFS_E)~p_merged$pam50_frma)
surv = survival::Surv(time = as.numeric(p_merged$pCh_DFS_T), event = as.numeric(p_merged$pCh_DFS_E))
log_rank_test <- survival::survdiff(formula = survival::Surv(time = as.numeric(p_merged$pCh_DFS_T), event = as.numeric(p_merged$pCh_DFS_E)) ~ p_merged$pam50_frma)
plot_out <- survminer::ggsurvplot(fit = fit,data = p_merged, pval = TRUE, surv.median.line = "hv", xlab = "Survival time", ylab = "Survival probability",ylim = ylim_val,xlim = xlim_val,color = "pam50_frma",palette = unname(group_colors))
}
p_merged_temp <- p_merged
rm(list = "p_merged",envir = globalenv())
return(list(fit,p_merged_temp,surv,plot_out,log_rank_test))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.