R/eMIRNA.Train.R

#' eMIRNA Function for training SVM classifier.
#'
#' \code{eMIRNA.Train} Returns a SVM classifier for predicting miRNAs.
#'
#' @param pos Path to .csv or R object with calculated Features from known
#' positive miRNA sequences, filtered by length and Secondary Folding Structure.
#'  \code{file}.
#'
#' @param neg Path to .csv or R object with calculated Features from other known
#' negative non-coding sequences, filtered by length and Secondary Folding
#' Structure.
#'  \code{file}.
#'
#' @param imbalance Imbalance correction algorithm for equilibrating positive and
#' negative sequences.
#'
#' @examples
#' eMIRNA.Train(Positive, Negative, imbalance = "smote")
#'
#'@import caret
#'@import bimba
#'@import LiblineaR
#'@importFrom stats predict
#'@importFrom utils write.table
#' @export




eMIRNA.Train <- function(pos, neg, imbalance="none"){
  message("Training SVM model")
  setwd("~/")
  Dir0 <- "eMIRNA"
  dir.create(file.path(Dir0), showWarnings=FALSE)
  Dir <- "SVM_Results"
  setwd("~/eMIRNA/")
  dir.create(file.path(Dir), showWarnings = FALSE)
  workdir <- "~/eMIRNA/SVM_Results/"
  setwd(workdir)

  final.table <- rbind(pos, neg)

  class <- c(rep("miRNA", nrow(pos)), rep("Other", nrow(neg)))

  final.table <- as.data.frame(cbind(final.table, class))
  final.table$class <- factor(final.table$class)

  if(imbalance == "smote"){

    set.seed(1234)
    algorithms <- c("NRAS", "SMOTE")
    final.table <- sampling_sequence(final.table, algorithms=algorithms)
    intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
    training <- final.table[intrain,]
    testing <- final.table[-intrain,]


    #SVM Linear
    grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))

    trctrl <- trainControl(method = "cv", number = 10,
                           savePredictions = TRUE, classProbs=TRUE)

    svm_Linear <- train(class ~., data = training, method = "svmLinear",
                        trControl = trctrl,
                        tuneGrid = grid_lineal,
                        tuneLength = 10)


    write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
    write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
    saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
    return(svm_Linear)

  }  else if(imbalance == "adasyn"){

    set.seed(1234)
    algorithms <- c("NRAS", "ADASYN")
    final.table <- sampling_sequence(final.table, algorithms=algorithms)

    intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
    training <- final.table[intrain,]
    testing <- final.table[-intrain,]


    #SVM Linear
    grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))

    trctrl <- trainControl(method = "cv", number = 10,
                           savePredictions = TRUE, classProbs=TRUE)

    svm_Linear <- train(class ~., data = training, method = "svmLinear",
                        trControl = trctrl,
                        tuneGrid = grid_lineal,
                        tuneLength = 10)


    write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
    write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
    saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
    return(svm_Linear)

  } else if(imbalance == "bdlsmote1"){

    set.seed(1234)
    algorithms <- c("NRAS", "BDLSMOTE")
    list1 <- list()
    list2 <- list(borderline=1)
    parameters <- list(list1, list2)
    final.table <- sampling_sequence(final.table, algorithms=algorithms,
                                     parameters=parameters)

    intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
    training <- final.table[intrain,]
    testing <- final.table[-intrain,]


    #SVM Linear
    grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))

    trctrl <- trainControl(method = "cv", number = 10,
                           savePredictions = TRUE, classProbs=TRUE)

    svm_Linear <- train(class ~., data = training, method = "svmLinear",
                        trControl = trctrl,
                        tuneGrid = grid_lineal,
                        tuneLength = 10)


    write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
    write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
    saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
    return(svm_Linear)

  } else if(imbalance == "bdlsmote2"){

    set.seed(1234)
    algorithms <- c("NRAS", "BDLSMOTE")
    list1 <- list()
    list2 <- list(borderline=2)
    parameters <- list(list1, list2)
    final.table <- sampling_sequence(final.table, algorithms=algorithms,
                                     parameters=parameters)

    intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
    training <- final.table[intrain,]
    testing <- final.table[-intrain,]


    #SVM Linear
    grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))

    trctrl <- trainControl(method = "cv", number = 10,
                           savePredictions = TRUE, classProbs=TRUE)

    svm_Linear <- train(class ~., data = training, method = "svmLinear",
                        trControl = trctrl,
                        tuneGrid = grid_lineal,
                        tuneLength = 10)


    write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
    write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
    saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
    return(svm_Linear)

  } else if(imbalance == "mwmote"){

    set.seed(1234)
    algorithms <- c("NRAS", "MWMOTE")
    final.table <- sampling_sequence(final.table, algorithms=algorithms)

    intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
    training <- final.table[intrain,]
    testing <- final.table[-intrain,]


    #SVM Linear
    grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))

    trctrl <- trainControl(method = "cv", number = 10,
                           savePredictions = TRUE, classProbs=TRUE)

    svm_Linear <- train(class ~., data = training, method = "svmLinear",
                        trControl = trctrl,
                        tuneGrid = grid_lineal,
                        tuneLength = 10)


    write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
    write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
    saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
    return(svm_Linear)

  } else if(imbalance == "ros"){

    set.seed(1234)
    algorithms <- c("NRAS", "ROS")
    final.table <- sampling_sequence(final.table, algorithms=algorithms)

    intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
    training <- final.table[intrain,]
    testing <- final.table[-intrain,]


    #SVM Linear
    grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))

    trctrl <- trainControl(method = "cv", number = 10,
                           savePredictions = TRUE, classProbs=TRUE)

    svm_Linear <- train(class ~., data = training, method = "svmLinear",
                        trControl = trctrl,
                        tuneGrid = grid_lineal,
                        tuneLength = 10)


    write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
    write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
    saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
    return(svm_Linear)

  } else if(imbalance == "rwo"){

    set.seed(1234)
    algorithms <- c("NRAS", "RWO")
    final.table <- sampling_sequence(final.table, algorithms=algorithms)

    intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
    training <- final.table[intrain,]
    testing <- final.table[-intrain,]


    #SVM Linear
    grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))

    trctrl <- trainControl(method = "cv", number = 10,
                           savePredictions = TRUE, classProbs=TRUE)

    svm_Linear <- train(class ~., data = training, method = "svmLinear",
                        trControl = trctrl,
                        tuneGrid = grid_lineal,
                        tuneLength = 10)


    write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
    write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
    saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
    return(svm_Linear)

  } else if(imbalance == "slsmote"){

    set.seed(1234)
    algorithms <- c("NRAS", "SLSMOTE")
    final.table <- sampling_sequence(final.table, algorithms=algorithms)

    intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
    training <- final.table[intrain,]
    testing <- final.table[-intrain,]


    #SVM Linear
    grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))

    trctrl <- trainControl(method = "cv", number = 10,
                           savePredictions = TRUE, classProbs=TRUE)

    svm_Linear <- train(class ~., data = training, method = "svmLinear",
                        trControl = trctrl,
                        tuneGrid = grid_lineal,
                        tuneLength = 10)


    write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
    write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
    saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
    return(svm_Linear)

  } else if(imbalance == "none"){

    set.seed(1234)
    intrain <- createDataPartition(y = final.table$class, p= 0.8, list = FALSE)
    training <- final.table[intrain,]
    testing <- final.table[-intrain,]


    #SVM Linear
    grid_lineal <- expand.grid(C = seq(0.01, 1, 0.1))

    trctrl <- trainControl(method = "cv", number = 10,
                           savePredictions = TRUE, classProbs=TRUE)

    svm_Linear <- train(class ~., data = training, method = "svmLinear",
                        trControl = trctrl,
                        tuneGrid = grid_lineal,
                        tuneLength = 10)


    write.table(training, "~/eMIRNA/SVM_Results/training.csv", sep=",", quote=F, col.names=NA)
    write.table(testing, "~/eMIRNA/SVM_Results/testing.csv", sep=",", quote=F, col.names=NA)
    saveRDS(svm_Linear, "~/eMIRNA/SVM_Results/SVM.rds")
    return(svm_Linear)

  } else {

    message("SVM Training Failed. Please provide a correct algorithm for class imbalance resolution.")
  }

}
emarmolsanchez/eMIRNA_Rmodules documentation built on May 14, 2019, 5 a.m.