# -*- tab-width:2;indent-tabs-mode:t;show-trailing-whitespace:t;rm-trailing-spaces:t -*-
# vi: set ts=2 noet:
library(plyr)
library(dplyr)
library(readr)
library(fdrtool)
library(BioChemPantry)
library(SEAR)
staging_directory <- get_staging_directory("chembl21")
target_info <- readr::read_tsv(
paste0(staging_directory, "/data/target_info.tsv"),
col_types = readr::cols(
.default = col_character(),
component_id = col_integer(),
tax_id = col_integer(),
iuphar_ids = col_integer(),
entrez_id = col_integer()))
sea_wrapper_fname <- paste0(getwd(), "/scripts/sea-wrapper.sh")
#' Submit a cluster job to compute all target vs. target SEA scores
submit_compute_sea_network <- function(
library_id,
fp_type,
config_files=NULL,
target_range="all",
verbose=TRUE,
dry_run=FALSE
){
sea_library_fname <- paste0(staging_directory, "/data/", library_id, ".sea")
sea_library <- unpack.library(
sea_library_fname, verbose=T)
sea_library <- sea_library$targets %>%
left_join(
sea_library$molecules,
by=c("compound"))
run_base <- paste0(
staging_directory, "/data/runs/",
library_id, "_vs_", library_id)
unlink(run_base, recursive=T, force=T)
dir.create(run_base)
if(verbose){
cat(
"Computing SEA target vs. target for all targets in library:\n",
"\tlibrary_id: ", library_id, "\n",
"\tfp_type: ", fp_type, "\n",
"\tconfig_files: ", paste(names(config_files), config_files, collapse=", ", sep=": "), "\n",
"\trun_base: ", run_base, "\n",
"\tsea_library_fname: ", sea_library_fname, "\n",
"\tdry_run: ", ifelse(dry_run, "yes", "no"), "\n",
sep="")
}
n_targets <- sea_library %>% distinct(target) %>% nrow
if(target_range == 'all'){
target_range <- paste0("1-", n_targets)
}
if(verbose){
cat(
"\tn_targets: ", n_targets, "\n",
"\ttarget_range: ", target_range, "\n",
sep="")
}
#### prepare inputs
if(verbose){
cat("Preparing run directories and data...\n")
}
if(!is.null(config_files)){
do.call(SEAR::write.config_files, c(dir=run_base, config_files))
}
outputs_path <- paste0(run_base, "/outputs")
unlink(outputs_path, recursive=T, force=T)
dir.create(outputs_path)
logs_path <-paste0(run_base, "/logs")
unlink(logs_path, recursive=T, force=T)
dir.create(logs_path)
inputs_path <- paste0(run_base, "/inputs")
unlink(inputs_path, recursive=T, force=T)
dir.create(inputs_path, recursive=T)
sea_library %>%
d_ply(c("target"), function(df){
target <- df$target[1]
target_fname <- paste0(inputs_path, "/", target, ".csv")
df %>%
select(compound, smiles) %>%
write.table(
target_fname,
quote=F,
sep=",",
row.names=F,
col.names=T)
})
cmd <- paste0(
sea_wrapper_fname, " ",
run_base, " ",
sea_library_fname, " ",
fp_type)
script <- paste0("
cd ", logs_path, "
qsub -t ", target_range, " ", cmd, "\n")
if(verbose){
cat("script:\n", script, sep="")
}
if(!dry_run){
system(paste0(script, "\n"))
if(verbose){
cat("Run submitted, to check if it is done do 'qstat'\n\n", sep="")
}
} else {
if(verbose){
cat("To submit run copy and paste in shell, then to check if it is done do 'qstat'\n\n", sep="")
}
}
run_base
}
collect_compute_sea_network <- function(
staging_directory,
library_id,
run_base,
target_info
){
scores <- list.files(paste0(run_base, "/outputs")) %>%
plyr::ldply(function(target){
df <- readr::read_csv(
paste0(run_base, "/outputs/", target, "/", target, ".csv.out.csv"),
col_types=cols(
`Query ID` = col_character(),
`Target ID` = col_character(),
`Affinity Threshold (nM)` = col_integer(),
`P-Value` = col_double(),
`Max Tc` = col_double(),
`Z-Score` = col_double(),
Name = col_character(),
Description = col_character())) %>%
dplyr::transmute(
target1 = `Target ID`,
target2 = `Query ID`,
affinity = `Affinity Threshold (nM)`,
Pvalue = `P-Value`,
MaxTC = `Max Tc`,
Zscore = `Z-Score`)
})
fdr <- expand.grid(
target1 = target_info$uniprot_entry,
target2 = target_info$uniprot_entry) %>%
dplyr::left_join(
scores %>% dplyr::select(target1, target2, Pvalue),
by=c("target1", "target2")) %>%
dplyr::mutate(
Pvalue = ifelse(is.na(Pvalue), runif(n()), Pvalue))
a <- fdrtool(fdr$Pvalue, statistic="pvalue")
fdr <- fdr %>%
dplyr::mutate(
Qvalue=a$qval,
Evalue=Pvalue * ((nrow(target_info) * nrow(target_info))/2 - nrow(target_info)) ) %>%
dplyr::select(-Pvalue)
# this is slow because there are lot of fdr values
scores <- scores %>%
left_join(fdr, by=c("target1", "target2"))
scores <- scores %>%
left_join(
target_info %>%
dplyr::select(
target1 = uniprot_entry,
entrez_id1 = entrez_id,
gene_name1 = gene_name,
description1 = description,
target1_class_1 = class_1,
target1_class_2 = class_2,
target1_class_3 = class_3,
target1_class_4 = class_4,
target1_class_5 = class_5,
target1_class_6 = class_6),
by=c("target1")) %>%
left_join(
target_info %>%
dplyr::select(
target2 = uniprot_entry,
entrez_id2 = entrez_id,
gene_name2 = gene_name,
description2 = description,
target2_class_1 = class_1,
target2_class_2 = class_2,
target2_class_3 = class_3,
target2_class_4 = class_4,
target2_class_5 = class_5,
target2_class_6 = class_6),
by=c("target2")) %>%
dplyr::select(
target1,
target2,
MaxTC,
Qvalue,
entrez_id1,
gene_name1,
description1,
target1_class_1,
target1_class_2,
target1_class_3,
target1_class_4,
target1_class_5,
target1_class_6,
entrez_id2,
gene_name2,
description2,
target2_class_1,
target2_class_2,
target2_class_3,
target2_class_4,
target2_class_5,
target2_class_6,
Zscore,
Pvalue,
Evalue)
scores_fname <- paste0(staging_directory, "/data/", library_id, ".scores.csv")
scores %>% write_csv(scores_fname)
scores
}
run_base <- submit_compute_sea_network(
library_id="chembl21_ECFC6",
target_range='all',
fp_type='rdkit_ecfc',
config_files=list(fpcore="[rdkit_ecfc]\ncircle_radius=3\n"),
verbose=TRUE,
dry_run=TRUE)
collect_compute_sea_network(
staging_directory=staging_directory,
library_id="chembl21_ECFC6",
run_base=run_base,
target_info=target_info)
#######
run_base <- submit_compute_sea_network(
library_id="chembl21_APDP",
target_range='all',
fp_type='rdkit_apdp',
config_files=NULL,
verbose=TRUE,
dry_run=TRUE)
collect_compute_sea_network(
staging_directory=staging_directory,
library_id="chembl21_ECFC6",
run_base=run_base,
target_info=target_info)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.