#' DrugPrediction Test
#'
#' version 2
#' @param path_to_bcclsmmart (string):path to FINAL_norm.txt run through normalizeRUV in SMMARTFunctions.R
#' @param cell_line_response (string):cell_line_response_threshold_0.50_large_and_small_screen.RData
#' @param state (string): path to "smmart_trained_machine_learning_model.RData" from drugPredTrain
#' @param path_to_targetid (string): path to target_id.txt
#' @param path_to_trusight (string): path to gene list Trusight.csv
#' @param threshold (numeric): 0.25, 0.5, 0.75
#' @return Y_predicted(data.frame): prediction
#'
#' @export
drugPredTest<- function(path_to_bcclsmmart,
cell_line_response = "cell_line_response_threshold_0.50_large_and_small_screen.RData",
state = "smmart_trained_machine_learning_model.RData",
kbmtl_train ="kbmtl_semisupervised_classification_variational_train.R",
kbmtl_test = "kbmtl_semisupervised_classification_variational_test.R",
path_to_targetid = "target_id.txt",
path_to_trusight = "Trusight_genes.csv",
threshold = 0.5){
###########TESTING
normalization_type <- "none"
classification_type <- "trusight"
threshold <- threshold
#Gene Names
target_id <- read.csv(path_to_targetid, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
target_id <- target_id[which(target_id$X.7 == "protein_coding"),]
target_id <- unique(target_id[, c("X.1", "X.5")])
rownames(target_id) <- target_id$X.1
#Drug Screen data, creates cell_line_response matrix
load(cell_line_response)
JWGray_GR50 <- cell_line_response
#Training
JWGray_RNASeq <- read.csv(path_to_bcclsmmart, row.names = 1, check.names = FALSE, sep = "\t")
JWGray_RNASeq <- JWGray_RNASeq[, grepl("ACXX|AAXX", colnames(JWGray_RNASeq))]
colnames(JWGray_RNASeq) <- toupper(sapply(colnames(JWGray_RNASeq), function(name) {strsplit(name, split = "_", fixed = TRUE)[[1]][1]}))
colnames(JWGray_RNASeq)[1:7] <- substr(colnames(JWGray_RNASeq)[1:7], 2, nchar(colnames(JWGray_RNASeq)[1:7]))
JWGray_RNASeq <- t(JWGray_RNASeq)
JWGray_RNASeq <- matrix(as.numeric(JWGray_RNASeq), nrow = nrow(JWGray_RNASeq), ncol = ncol(JWGray_RNASeq), dimnames = list(rownames(JWGray_RNASeq), colnames(JWGray_RNASeq)))
#Commmon Drug Screen and Cell lines
common_cell_lines <- intersect(rownames(JWGray_RNASeq), rownames(JWGray_GR50))
JWGray_RNASeq <- JWGray_RNASeq[common_cell_lines,]
JWGray_GR50 <- JWGray_GR50[common_cell_lines,]
#Test Data
SMMART_RNASeq <- read.csv(path_to_bcclsmmart, row.names = 1, check.names = FALSE, sep = "\t")
SMMART_RNASeq = SMMART_RNASeq[, !grepl("ACXX|AAXX", colnames(SMMART_RNASeq))]
SMMART_RNASeq = SMMART_RNASeq[, !grepl("RNA170328LH|RNA170329LH|^X170322_NS500642", colnames(SMMART_RNASeq))]
SMMART_RNASeq <- t(SMMART_RNASeq)
SMMART_RNASeq <- matrix(as.numeric(SMMART_RNASeq), nrow = nrow(SMMART_RNASeq), ncol = ncol(SMMART_RNASeq), dimnames = list(rownames(SMMART_RNASeq), colnames(SMMART_RNASeq)))
SMMART_RNASeq <- SMMART_RNASeq[grepl(rownames(SMMART_RNASeq), pattern = "UHR|horizon") == FALSE,]
hits <- which(rownames(target_id) %in% colnames(JWGray_RNASeq))
JWGray_RNASeq <- JWGray_RNASeq[,rownames(target_id)[hits]]
SMMART_RNASeq <- SMMART_RNASeq[,rownames(target_id)[hits]]
colnames(JWGray_RNASeq) <- target_id$X.5[hits]
colnames(SMMART_RNASeq) <- target_id$X.5[hits]
JWGray_RNASeq <- log2(JWGray_RNASeq + 1)
SMMART_RNASeq <- log2(SMMART_RNASeq + 1)
X_train <- scale(JWGray_RNASeq)
Y_train <- JWGray_GR50
frequencies <- colSums(Y_train == 1, na.rm = TRUE) / colSums(is.na(Y_train) == FALSE)
Y_train <- Y_train[,which(frequencies > 0.05 & frequencies < 0.95)]
X_test <- scale(SMMART_RNASeq)
valid_genes <- which(colSums(is.na(X_train)) == 0)
X_train <- X_train[, valid_genes]
X_test <- X_test[, valid_genes]
X_test[is.na(X_test)] <- 0
if (classification_type == "trusight") {
trusight_genes <- read.csv(path_to_trusight, header = FALSE, stringsAsFactors = FALSE)[,1]
X_train <- X_train[, colnames(X_train) %in% trusight_genes]
X_test <- X_test[, colnames(X_test) %in% trusight_genes]
}
K_train <- X_train %*% t(X_train)
K_test <- X_train %*% t(X_test)
normalizer <- max(abs(K_train))
K_train <- K_train / normalizer
K_test <- K_test / normalizer
###########TRAINING
load(file=state)
prediction <- kbmtl_semisupervised_classification_variational_test(K_test, state)
Y_predicted <- as.data.frame(prediction$P)
colnames(Y_predicted) <- colnames(Y_train)
return(Y_predicted)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.