#' eMIRNA Function for training SVM classifier.
#'
#' \code{eMIRNA.Train} Returns a SVM classifier for predicting miRNAs.
#'
#' @param pos Path to .csv or R object with calculated Features from known
#' positive miRNA sequences, filtered by length and Secondary Folding Structure.
#' \code{file}.
#'
#' @param neg Path to .csv or R object with calculated Features from other known
#' negative non-coding sequences, filtered by length and Secondary Folding
#' Structure.
#' \code{file}.
#'
#' @param imbalance Imbalance correction algorithm for equilibrating positive and
#' negative sequences.
#'
#' @examples
#' eMIRNA.Train(Positive, Negative, imbalance = "smote")
#'
#'@import caret
#'@import bimba
#'@import LiblineaR
#'@importFrom stats predict
#'@importFrom utils write.table
#' @export
eMIRNA.Train <- function(pos, neg, imbalance="none"){
message("Training SVM model")
setwd("~/")
Dir0 <- "eMIRNA"
dir.create(file.path(Dir0), showWarnings=FALSE)
Dir <- "SVM_Results"
setwd("~/eMIRNA/")
dir.create(file.path(Dir), showWarnings = FALSE)
workdir <- "~/eMIRNA/SVM_Results/"
setwd(workdir)
final.table <- rbind(pos, neg)
class <- c(rep("miRNA", nrow(pos)), rep("Other", nrow(neg)))
final.table <- as.data.frame(cbind(final.table, class))
final.table$class <- factor(final.table$class)
if(imbalance == "smote"){
set.seed(1234)
algorithms <- c("NRAS", "SMOTE")
final.table <- sampling_sequence(final.table, algorithms=algorithms)
intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
training <- final.table[intrain,]
testing <- final.table[-intrain,]
#SVM Linear
grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))
trctrl <- trainControl(method = "cv", number = 10,
savePredictions = TRUE, classProbs=TRUE)
svm_Linear <- train(class ~., data = training, method = "svmLinear",
trControl = trctrl,
tuneGrid = grid_lineal,
tuneLength = 10)
write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
return(svm_Linear)
} else if(imbalance == "adasyn"){
set.seed(1234)
algorithms <- c("NRAS", "ADASYN")
final.table <- sampling_sequence(final.table, algorithms=algorithms)
intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
training <- final.table[intrain,]
testing <- final.table[-intrain,]
#SVM Linear
grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))
trctrl <- trainControl(method = "cv", number = 10,
savePredictions = TRUE, classProbs=TRUE)
svm_Linear <- train(class ~., data = training, method = "svmLinear",
trControl = trctrl,
tuneGrid = grid_lineal,
tuneLength = 10)
write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
return(svm_Linear)
} else if(imbalance == "bdlsmote1"){
set.seed(1234)
algorithms <- c("NRAS", "BDLSMOTE")
list1 <- list()
list2 <- list(borderline=1)
parameters <- list(list1, list2)
final.table <- sampling_sequence(final.table, algorithms=algorithms,
parameters=parameters)
intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
training <- final.table[intrain,]
testing <- final.table[-intrain,]
#SVM Linear
grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))
trctrl <- trainControl(method = "cv", number = 10,
savePredictions = TRUE, classProbs=TRUE)
svm_Linear <- train(class ~., data = training, method = "svmLinear",
trControl = trctrl,
tuneGrid = grid_lineal,
tuneLength = 10)
write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
return(svm_Linear)
} else if(imbalance == "bdlsmote2"){
set.seed(1234)
algorithms <- c("NRAS", "BDLSMOTE")
list1 <- list()
list2 <- list(borderline=2)
parameters <- list(list1, list2)
final.table <- sampling_sequence(final.table, algorithms=algorithms,
parameters=parameters)
intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
training <- final.table[intrain,]
testing <- final.table[-intrain,]
#SVM Linear
grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))
trctrl <- trainControl(method = "cv", number = 10,
savePredictions = TRUE, classProbs=TRUE)
svm_Linear <- train(class ~., data = training, method = "svmLinear",
trControl = trctrl,
tuneGrid = grid_lineal,
tuneLength = 10)
write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
return(svm_Linear)
} else if(imbalance == "mwmote"){
set.seed(1234)
algorithms <- c("NRAS", "MWMOTE")
final.table <- sampling_sequence(final.table, algorithms=algorithms)
intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
training <- final.table[intrain,]
testing <- final.table[-intrain,]
#SVM Linear
grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))
trctrl <- trainControl(method = "cv", number = 10,
savePredictions = TRUE, classProbs=TRUE)
svm_Linear <- train(class ~., data = training, method = "svmLinear",
trControl = trctrl,
tuneGrid = grid_lineal,
tuneLength = 10)
write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
return(svm_Linear)
} else if(imbalance == "ros"){
set.seed(1234)
algorithms <- c("NRAS", "ROS")
final.table <- sampling_sequence(final.table, algorithms=algorithms)
intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
training <- final.table[intrain,]
testing <- final.table[-intrain,]
#SVM Linear
grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))
trctrl <- trainControl(method = "cv", number = 10,
savePredictions = TRUE, classProbs=TRUE)
svm_Linear <- train(class ~., data = training, method = "svmLinear",
trControl = trctrl,
tuneGrid = grid_lineal,
tuneLength = 10)
write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
return(svm_Linear)
} else if(imbalance == "rwo"){
set.seed(1234)
algorithms <- c("NRAS", "RWO")
final.table <- sampling_sequence(final.table, algorithms=algorithms)
intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
training <- final.table[intrain,]
testing <- final.table[-intrain,]
#SVM Linear
grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))
trctrl <- trainControl(method = "cv", number = 10,
savePredictions = TRUE, classProbs=TRUE)
svm_Linear <- train(class ~., data = training, method = "svmLinear",
trControl = trctrl,
tuneGrid = grid_lineal,
tuneLength = 10)
write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
return(svm_Linear)
} else if(imbalance == "slsmote"){
set.seed(1234)
algorithms <- c("NRAS", "SLSMOTE")
final.table <- sampling_sequence(final.table, algorithms=algorithms)
intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
training <- final.table[intrain,]
testing <- final.table[-intrain,]
#SVM Linear
grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))
trctrl <- trainControl(method = "cv", number = 10,
savePredictions = TRUE, classProbs=TRUE)
svm_Linear <- train(class ~., data = training, method = "svmLinear",
trControl = trctrl,
tuneGrid = grid_lineal,
tuneLength = 10)
write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
return(svm_Linear)
} else if(imbalance == "none"){
set.seed(1234)
intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
training <- final.table[intrain,]
testing <- final.table[-intrain,]
#SVM Linear
grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))
trctrl <- trainControl(method = "cv", number = 10,
savePredictions = TRUE, classProbs=TRUE)
svm_Linear <- train(class ~., data = training, method = "svmLinear",
trControl = trctrl,
tuneGrid = grid_lineal,
tuneLength = 10)
write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
return(svm_Linear)
} else {
message("SVM Training Failed. Please provide a correct algorithm for class imbalance resolution.")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.