#' @export
#' @importFrom Rfast colRanks
#' @importFrom dplyr summarise id desc
#' @importFrom plotly group_by
#' @import octad.db
#' @importFrom ExperimentHub ExperimentHub
#' @importFrom octad.db get_ExperimentHub_data
#' @importFrom utils write.csv data txtProgressBar read.csv2 head read.csv
#' @importFrom grDevices pdf
#### runsRGES #######
runsRGES <- function(dz_signature = NULL, choose_fda_drugs = FALSE, max_gene_size = 500, cells = NULL, output = FALSE,
outputFolder = NULL, weight_cell_line = NULL, permutations = 10000) {
if (missing(dz_signature)) {
stop("Disease signature input not found")
}
if (is.null(dz_signature$Symbol) | is.null(dz_signature$log2FoldChange)) {
stop("Either Symbol or log2FoldChange collumn in Disease signature is missing")
}
# output check
if (output == TRUE & is.null(outputFolder)) {
outputFolder <- tempdir()
message("outputFolder is NULL, writing output to tempdir()")
} else if (output == TRUE & !is.null(outputFolder)) {
if (output == TRUE & !dir.exists(outputFolder)) {
dir.create(outputFolder)
} else if (output == TRUE & dir.exists(outputFolder)) {
warning("Existing directory ", outputFolder, " found, containtment might be overwritten")
}
}
message("Started sRGES computation. Average computation time ~1-3mins.")
start_time <- Sys.time()
# write paths
if (output == TRUE) {
output_path <- file.path(outputFolder, paste0("/all_", paste(cells, collapse = "_"), "_lincs_score.csv"))
sRGES_output_path <- file.path(outputFolder, paste0("sRGES", cells, ".csv", collapse = "_"))
sRGES_output_path_drug <- file.path(outputFolder, "sRGES_FDAapproveddrugs.csv")
dz_sig_output_path <- file.path(outputFolder, "dz_sig_used.csv")
}
lincs_sig_info <- get_ExperimentHub_data("EH7270") # bioconductor addition
getsRGES <- function(RGES, cor, pert_dose, pert_time, diff, max_cor) {
sRGES <- RGES
pert_time <- ifelse(pert_time < 24, "short", "long")
pert_dose <- ifelse(pert_dose < 10, "low", "high")
if (pert_time == "short" & pert_dose == "low") {
sRGES <- sRGES + diff[4]
}
if (pert_dose == "low" & pert_time == "long") {
sRGES <- sRGES + diff[2]
}
if (pert_dose == "high" & pert_time == "short") {
sRGES <- sRGES + diff[1]
}
return(sRGES * cor/max_cor)
}
cmap_score_ultimate <- function(sig_up, sig_down, drug_signature) {
num_genes <- length(drug_signature)
ks_up <- 0
ks_down <- 0
connectivity_score <- 0
drug_signature <- rank(drug_signature)
up_tags_rank <- drug_signature[as.vector(sig_up)]
down_tags_rank <- drug_signature[as.vector(sig_down)]
up_tags_position <- sort(up_tags_rank)
down_tags_position <- sort(down_tags_rank)
num_tags_up <- length(up_tags_position)
num_tags_down <- length(down_tags_position)
if (num_tags_up > 1) {
a_up <- 0
b_up <- 0
a_up <- max(vapply(seq_len(num_tags_up), function(j) {
j/num_tags_up - up_tags_position[j]/num_genes
}, FUN.VALUE = numeric(1)))
b_up <- max(vapply(seq_len(num_tags_up), function(j) {
up_tags_position[j]/num_genes - (j - 1)/num_tags_up
}, FUN.VALUE = numeric(1)))
if (a_up > b_up) {
ks_up <- a_up
} else {
ks_up <- -b_up
}
} else {
ks_up <- 0
}
if (num_tags_down > 1) {
a_down <- 0
b_down <- 0
a_down <- max(vapply(seq_len(num_tags_down), function(j) {
j/num_tags_down - down_tags_position[j]/num_genes
}, FUN.VALUE = numeric(1)))
b_down <- max(vapply(seq_len(num_tags_down), function(j) {
down_tags_position[j]/num_genes - (j - 1)/num_tags_down
}, FUN.VALUE = numeric(1)))
if (a_down > b_down) {
ks_down <- a_down
} else {
ks_down <- -b_down
}
} else {
ks_down <- 0
}
if (ks_up == 0 & ks_down != 0) {
connectivity_score <- -ks_down
} else if (ks_up != 0 & ks_down == 0) {
connectivity_score <- ks_up
} else if (sum(sign(c(ks_down, ks_up))) == 0) {
connectivity_score <- ks_up - ks_down
} else {
connectivity_score <- ks_up - ks_down
}
return(connectivity_score)
}
if (!missing(cells)) {
lincs_sig_info$cell_id <- toupper(lincs_sig_info$cell_id)
lincs_sig_info <- subset(lincs_sig_info, lincs_sig_info$cell_id %in% cells)
} else if (!missing(cells) & nrow(lincs_sig_info) == 0) {
stop("Wrong cell line name. List of possible cell lines is available via command unique(octad.db::lincs_sig_info$cell_id)")
} else if (missing(cells)) {
cells <- "" # plug for older code version
}
lincs_signatures <- get_ExperimentHub_data("EH7271")
if (choose_fda_drugs) {
fda_drugs <- get_ExperimentHub_data("EH7269")
lincs_sig_info_FDA <- subset(lincs_sig_info, id %in% colnames(lincs_signatures) & tolower(lincs_sig_info$pert_iname) %in%
tolower(fda_drugs$pert_iname))
# FDAdf <- select(lincs_sig_info_FDA, lincs_sig_info_FDA$pert_id, lincs_sig_info_FDA$pert_iname)
FDAdf <- select(lincs_sig_info_FDA, c("pert_id", "pert_iname")) #the code above does not work
FDAdf <- unique(FDAdf) #[, seq_len(2)])
if (output == TRUE) {
write.csv(FDAdf, file = file.path(outputFolder, "FDA_approved_drugs.csv"), row.names = FALSE)
}
lincs_sig_info <- subset(lincs_sig_info, id %in% colnames(lincs_signatures))
} else {
lincs_sig_info <- subset(lincs_sig_info, id %in% colnames(lincs_signatures))
}
# remove duplicate instances
lincs_sig_info <- lincs_sig_info[!duplicated(lincs_sig_info$id), ]
sig.ids <- lincs_sig_info$id
#### compute RGES####
gene.list <- toupper(rownames(lincs_signatures))
dz_signature <- subset(dz_signature, dz_signature$Symbol %in% gene.list)
dz_genes_up <- subset(dz_signature, dz_signature$log2FoldChange > 0)
dz_genes_up <- dz_genes_up[order(dz_genes_up$log2FoldChange, decreasing = TRUE), ]
dz_genes_down <- subset(dz_signature, dz_signature$log2FoldChange < 0)
dz_genes_down <- dz_genes_down[order(dz_genes_down$log2FoldChange, decreasing = TRUE), ]
############### compute RGES caps gene selection to max gene size
if (nrow(dz_genes_up) > max_gene_size) {
dz_genes_up <- dz_genes_up %>%
head(max_gene_size)
}
if (nrow(dz_genes_down) > max_gene_size) {
dz_genes_down <- dz_genes_down %>%
head(max_gene_size)
}
if (output == TRUE) {
write.csv(rbind(dz_genes_up, dz_genes_down), dz_sig_output_path)
}
dz_cmap_scores <- NULL
cmap_exp_sig <- Rfast::colRanks(-1 * lincs_signatures, method = "max")
names.list <- list(rownames(lincs_signatures), colnames(lincs_signatures))
dimnames(cmap_exp_sig) <- names.list
pb <- txtProgressBar(min = 1, max = permutations, style = 3) # set progressbar
i <- 0
time_vector <- 0
cmap_exp_signature <- data.frame(ids = gene.list, rank = cmap_exp_sig[, as.vector(sig.ids)])
cmap_exp_sig <- as.data.frame(cmap_exp_sig)
cmap_exp_sig$ids <- NULL
dz_cmap_scores <- apply(cmap_exp_sig[as.vector(sig.ids)], 2, FUN = function(x) {
cmap_score_ultimate(dz_genes_up$Symbol, dz_genes_down$Symbol, drug_signature = x)
})
# random scores
lincs_signatures <- get_ExperimentHub_data("EH7271")
random_sig_ids <- sample(colnames(lincs_signatures), permutations, replace = TRUE)
random_cmap_scores <- NULL
cmap_exp_signature <- as.data.frame(Rfast::colRanks(-1 * lincs_signatures[, as.character(random_sig_ids)],
method = "max"))
rm(lincs_signatures) # free some memory
random_cmap_scores <- apply(cmap_exp_signature, 2, FUN = function(x) {
cmap_score_ultimate(sample(seq_len(length(dz_genes_up$Symbol)), replace = TRUE), sample(seq_len(length(dz_genes_down$Symbol)),
replace = TRUE), drug_signature = x)
})
p <- vapply(dz_cmap_scores, function(score) {
sum(random_cmap_scores < score)/length(random_cmap_scores)
}, FUN.VALUE = numeric(1))
padj <- p.adjust(p, "fdr")
results <- data.frame(id = sig.ids, cmap_score = dz_cmap_scores, p, padj)
results <- merge(results, lincs_sig_info, by = "id")
results <- results[order(results$cmap_score), ]
#################### bug plug
if (output == FALSE) {
output_path <- file.path(tempdir(), paste0("/all_", paste(cells, collapse = "_"), "_lincs_score.csv"))
write.csv(results, output_path) #bugfix, otherwise empty input passed to lincs_drug_prediction, fix ASAP
lincs_drug_prediction <- read.csv(output_path)
unlink(output_path) #delete csv plug
} else if (output == TRUE) {
write.csv(results, output_path)
lincs_drug_prediction <- read.csv(output_path)
}
#################### summarize RGES lincs_drug_prediction=results
lincs_drug_prediction_subset <- subset(lincs_drug_prediction, lincs_drug_prediction$pert_dose > 0 & lincs_drug_prediction$pert_time %in%
c(6, 24))
# pairs that share the same drug and cell id
lincs_drug_prediction_pairs <- merge(lincs_drug_prediction_subset, lincs_drug_prediction_subset, by = c("pert_iname",
"cell_id"))
# x is the reference
lincs_drug_prediction_pairs <- subset(lincs_drug_prediction_pairs, lincs_drug_prediction_pairs$id.x != lincs_drug_prediction_pairs$id.y &
lincs_drug_prediction_pairs$pert_time.x == 24 & lincs_drug_prediction_pairs$pert_dose.x == 10) # , select <- c('cmap_score.x', 'cmap_score.y', 'pert_dose.y', 'pert_time.y'))
# difference of RGES to the reference
lincs_drug_prediction_pairs$cmap_diff <- lincs_drug_prediction_pairs$cmap_score.x - lincs_drug_prediction_pairs$cmap_score.y
lincs_drug_prediction_pairs$dose <- round(log(lincs_drug_prediction_pairs$pert_dose.y, 2), 1)
# estimate difference
lincs_drug_prediction_pairs$dose_bin <- ifelse(lincs_drug_prediction_pairs$pert_dose.y < 10, "low", "high")
diff <- tapply(lincs_drug_prediction_pairs$cmap_diff, paste(lincs_drug_prediction_pairs$dose_bin, lincs_drug_prediction_pairs$pert_time.y),
mean)
# ignore weighting cell lines
if (!missing(weight_cell_line)) {
lincs_cell_line_weight <- weight_cell_line
pred <- merge(lincs_drug_prediction, lincs_cell_line_weight, by.x = "cell_id", by.y = 0)
} else {
pred <- lincs_drug_prediction
pred$medcor <- 1
}
pred$RGES <- vapply(seq_len(nrow(pred)), function(id) {
getsRGES(pred$cmap_score[id], pred$medcor[id], pred$pert_dose[id], pred$pert_time[id], diff, max(pred$medcor))
}, FUN.VALUE = numeric(1))
cmpd_freq <- table(pred$pert_iname)
pred <- subset(pred, pred$pert_iname %in% names(cmpd_freq[cmpd_freq > 0]))
pred_merged <- pred %>%
group_by(pred$pert_iname) %>%
dplyr::summarise(mean = mean(RGES), n = length(RGES), median = median(RGES), sd = sd(RGES)) #replace pred$RGES with RGES
pred_merged$sRGES <- pred_merged$mean
pred_merged <- pred_merged[order(pred_merged$sRGES), ]
# rename first column
colnames(pred_merged)[1] <- "pert_iname"
if (output == TRUE) {
write.csv(pred_merged, sRGES_output_path)
}
if (choose_fda_drugs) {
pred_merged_drug <- merge(fda_drugs, pred_merged, by = "pert_iname")
pred_merged_drug <- pred_merged_drug[order(pred_merged_drug$sRGES), ]
if (output == TRUE) {
write.csv(pred_merged_drug, sRGES_output_path_drug)
}
}
message("Finished computations in", round(Sys.time() - start_time, 2), units(Sys.time() - start_time), ",writing output")
return(pred_merged)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.