pIR: pIR: Estimating Isoelectric Point (pI) of Peptide and Proteins from Amino Acid Sequence.

Documented in aaIndex defaultPeptideTrainData loadSVMModel pISVMpeptide pISVMsequences svmPIBuildData svmPIData transformData

#' defaultPeptideTrainData
#' This function acces to the data to trace train the SVM method

defaultPeptideTrainData <- function(){
    filePath <- system.file("extdata", "svmDataDefault.csv", package = "pIR")
    data <- read.table(file = filePath, sep = ",", header = TRUE)
    return(data)
}


#' loadSVMModel
#'
#' This function acces to a SVM model trained
#' @param model The model to be loaded
#'
#'
loadSVMModel <- function(model = "default"){

  if(model=="default"){
    filePath <- system.file("extdata", "svmModelDefault.rda", package = "pIR")
    svm <- load(file = filePath, .GlobalEnv)
    return(svm)
  }
  if(model=="heller"){
    filePath <- system.file("extdata", "svmModelHeller.rda", package = "pIR")
    svm <- load(file = filePath, .GlobalEnv)
    return(svm)
  }
  if(model=="branca"){
    filePath <- system.file("extdata", "svmModelBranca.rda", package = "pIR")
    svm <- load(file = filePath, .GlobalEnv)
    return(svm)
  }
  #otherwise return default trained model
  filePath <- system.file("extdata", "svmModelDefault.rda", package = "pIR")
  svm <- load(file = filePath, .GlobalEnv)
  return(svm)
}



#' transformData
#'
#' This function transform a new data (dataframe) using center and scale transformation.
#' This transformation is necesary previous to use svm method.
#'
#' @param instances The data to be transformed
#' @param defaultTrainingSet A flag to get the training set used in the transformation (if TRUE, default training set will be used)
#' @param trainingSet The training set used to apply transformation.
#'
#' @details If trainingSet flag is not "default", "heller" or "branca", the current data will be used to apply the transformation requeried
#' @details before use a SVM model, the set supply must contains the following variables: calibrated, expasy and aaindex.
#'
transformData <- function(instances = instance, trainingSet = "default"){

  if(trainingSet=="default"){
    #load default dataset
    filePath <- system.file("extdata", "svmDataSetDefault.rda", package = "pIR")
    load(filePath)

  }else if(trainingSet=="heller"){
    #load heller dataset
    filePath <- system.file("extdata", "svmHellerDataSet.rda", package = "pIR")
    load(filePath)

  }else if(trainingSet=="branca"){
    #load branca dataset
    filePath <- system.file("extdata", "svmBrancaDataSet.rda", package = "pIR")
    load(filePath)

  }else if(trainingSet=="current"){
    #otherwise, use the provided dataset to apply transformation...
    data <- instances
  }

   #retrive attributes from dataset. It must be equal to new instance attributes.
   peptides_propeties <- subset(data, select=c("calibrated", "expasy", "aaindex"))

   #create preProcess object
   preObject <- preProcess(peptides_propeties, method = c("center", "scale"))

   #process new instances using (trainingSet) option
   preData <- predict(preObject, newdata = instances)

   return(preData)
}




#' pISVMsequences
#'
#' This function predict the pI from multiple sequences contained into dataframe.
#'
#' @param df The dataset with sequences. It must contains the variables: "calibrated", "expasy" and "aaindex".
#' @param model The SVM-based model to be used in the prediction (use "default", "heller" or "branca" options)
#' @param newModel A flag enabling the posibility to choose a new model to be used.
#'
#' @details By default, this method use a svm-model from the setting of the "model" parameter and keeping
#' @details the parameter "newModel" = FALSE. However, it is possible to build a new svm model from the current dataset
#' @details setting "newModel"=TRUE. To do it, The input dataframe must contains the variables requeried to train
#' @details a svm model: "calibrated", "expasy" and "aaindex".
#'
pISVMsequences <- function(df = dataframe, model = "default", newModel = FALSE){

   if(!newModel){

    #loading default svm model
    svm <- loadSVMModel(model)

    #processing data...requeried previous to use svm model
    processedData <- transformData(instances = df, trainingSet = model)

    #predicting pI with svm model
     pI <- predict(svmModel, newdata = processedData)

    #build dataframe with new pI values predicted
    df$pIsvm <- as.vector(pI, mode = "any")

  }else{

    #build new svm model from current data
    svmModel <- svmPIData(data = df)

    #read varibles from dataframe
    sequences_propeties <- subset(df, select=c("calibrated", "expasy", "aaindex"))

    #processing data...requeried previous to use svm model
    processedData <- transformData(instances = sequences_propeties, trainingSet = "current")

    #predicting pI with svm model
    pI <- predict(svmModel, newdata = processedData)

    #build dataframe with new pI values predicted
    df$pIsvm <- as.vector(pI, mode = "any")

  }

  return(df)
}


#' pISVMpeptide
#'
#' This function predict the pI of a single sequence.
#'
#' @param sequence The sequence to be used
#' @param model The SVM-based model to be used in the prediction (use "default", "heller" or "branca" options)
#'
#'
pISVMpeptide <- function(sequence, model = "default"){
    #computing the features requeried to vectorize the peptide/protein
    aaindex      <- aaIndex(sequence = sequence)
    calibrated   <- pIBjell(sequence = sequence, pkSetMethod = "calibrated")
    expasy       <- pIBjell(sequence = sequence, pkSetMethod = "expasy")

    #loading svm-model
    svm  <- loadSVMModel(model = model)

    #Building sequence as a dataframe
    seq <- data.frame(calibrated, expasy, aaindex)

    #pre-processing new instance using the same model training-dataset processing
    seq <- transformData(instances = seq, trainingSet = model)

    pI <- predict(svmModel, newdata = seq)
    return(pI)
}

# dafaultTrainData
# This function acces to the data to trace train the SVM method

defaultProteinTrainData <- function(){
    filePath <- system.file("extdata", "svmProteinData.csv", package = "pIR")
    data <- read.table(file = filePath, sep = ",", header = TRUE)
    return(data)
}

# svmPIDefault
#
# This function train the original dataset from the SVM dataset a get the model

svmBuildPeptideData <- function(defaultModel = FALSE, method = "svmRadial", numberIter = 2){
    data <- defaultPeptideTrainData()
    if(defaultModel){
        loadDefaultModel()
        svmModel <- svmModel
    }else{
        load("data/svmDataSetDefault.rda")
        peptides_properties <- subset(data, select=c("calibrated", "expasy", "aaindex"))
        peptides_experimental <- subset(data, select=c("pIExp"))
        svmModel <- svmProfile(dfExp = peptides_experimental, dfProp = peptides_properties, method = method, numberIter = numberIter)

    }
    return(svmModel)
}

svmBuildProteinData <- function(loadData = FALSE, method = "svmRadial", numberIter = 2){
    data <- defaultProteinTrainData()
    if(loadData){
        data <- svmPIBuildSVM(originalData = data)
        save(data,file="data/svmProteinData.rda")
    }else{
        load("data/svmProteinData.rda")
    }
    peptides_properties <- subset(data, select=c("expasy", "skoog", "calibrated", "solomon", "rodwell", "emboss", "lehninger", "grimsley", "patrickios", "DtaSelect", "aaindex"))
    peptides_experimental <- subset(data, select=c("pIExp"))
    svmProfileValue <- svmProfile(dfExp = peptides_experimental, dfProp = peptides_properties, method = method, numberIter = numberIter)
    return(svmProfileValue)
}


#' svmPIBuildSVMData
#'
#' This function take a data frame in the way of: sequence pIExp. It return a dataframe with features to train a svm model
#' @param originalData The original dra frame
#'
svmPIBuildData <- function(originalData){

    data <- originalData

    colnames(data) <-c("sequence", "pIExp")

    #Add all the bjell methods and pk Sets
    data <- mdply(data, function(sequence, pIExp) { pIBjell(sequence = sequence, pkSetMethod = "calibrated") })

    colnames(data) <-c("sequence", "pIExp", "calibrated")

    #Add all the bjell methods and pk Sets
    data <- mdply(data, function(sequence, pIExp, calibrated) { pIBjell(sequence = sequence, pkSetMethod = "expasy") })

    colnames(data) <-c("sequence", "pIExp", "calibrated", "expasy")

    #Add all the bjell methods and pk Sets
    data <- mdply(data, function(sequence, pIExp, calibrated, expasy) { pIBjell(sequence = sequence, pkSetMethod = "skoog") })

    colnames(data) <-c("sequence", "pIExp", "calibrated", "expasy", "skoog")

    #Add all the bjell methods and pk Sets
    data <- mdply(data, function(sequence, pIExp, calibrated, expasy, skoog) { aaIndex(sequence = sequence) })

    colnames(data) <-c("sequence", "pIExp", "calibrated", "expasy", "skoog", "aaindex")

    #write.table(data, file = "data.csv", sep = ",", col.names = NA, qmethod = "double")

    #print(data)

    return(data)
}

#' svmPIData
#'
#' This function takes a data frame an return a model with the best variables.
#' The training process could be time-consuming.
#'
#' @param data The df
#'

svmPIData <- function(data){
    peptides_propeties <- subset(data, select=c("calibrated", "expasy", "aaindex"))
    peptides_experimental <- subset(data, select=c("pIExp"))
    svmProfileValue <- svmProfile(dfExp = peptides_experimental, dfProp = peptides_propeties, method = "svmRadial", numberIter = 2)
    return(svmProfileValue)
}



svmProfile <- function(dfExp, dfProp, method = "svmRadial", numberIter = 2){

    #load Data
    # This is the data file with the descriptors:
    peptides_desc <- as.matrix(dfProp);
    # This is the Data File with the Experimental Isoelectric Point
    peptides_class<-as.matrix(dfExp);

    #Scale and center data
    peptides_desc<- scale(peptides_desc,center=TRUE,scale=TRUE);

    #Divide the dataset in train and test sets

    # Create an index of the number to train
    inTrain <- createDataPartition(peptides_class, p = 3/4, list = FALSE)[,1];

    #Create the Training Dataset for Descriptors
    trainDescr <- peptides_desc[inTrain,];

    # Create the Testing dataset for Descriptors
    testDescr <- peptides_desc[-inTrain,];

    trainClass <- peptides_class[inTrain];
    testClass <- peptides_class[-inTrain];

    mod <- getModelInfo("svmRadial", regex = FALSE)[[1]]

    mod$predict <- function(modelFit, newdata, submodels = NULL) {
        svmPred <- function(obj, x) {
            hasPM <- !is.null(unlist(obj@prob.model))
            if(hasPM) {
                pred <- lev(obj)[apply(predict(obj, x, type = "probabilities"), 1, which.max)]
            } else pred <- predict(obj, x)
            pred
        }
        out <- try(svmPred(modelFit, newdata), silent = TRUE)
        if(is.character(lev(modelFit))) {
            if(class(out)[1] == "try-error") {
                warning("kernlab class prediction calculations failed; returning NAs")
                out <- rep("", nrow(newdata))
                out[seq(along = out)] <- NA
            }
        } else {
            if(class(out)[1] == "try-error") {
                warning("kernlab prediction calculations failed; returning NAs")
                out <- rep(NA, nrow(newdata))
            }
        }
        if(is.matrix(out)) out <- out[,1]
        out
    }

    #Support Vector Machine Object
    svmProfileValue <- rfe(trainDescr,trainClass, sizes = (1:3),rfeControl = rfeControl(functions = caretFuncs,number = numberIter, verbose = TRUE),method = mod);

    return (svmProfileValue)
}

#'aaIndex
#'
#' This function compute the isoelectric point using the aa Index
#' @param sequence The peptide sequence
#'
aaIndex <- function(sequence){

    sequence <- reformat(seq = sequence)
    sequence <- toupper(sequence)

    #aaV <- strsplit(sequence, "", fixed = TRUE)
    #aaNTerm <- aaV[[1]][1]
    #aaCTerm <- aaV[[1]][nchar(sequence)]

    aaZimmermanDes <- data.frame(key    <- c("A",  "L",  "R",  "K",   "N",   "M",   "D",  "F", "C",  "P",  "Q",  "S", "E",  "T",  "G",  "W",  "H",  "Y",  "I",  "V"),
                                 value  <- c( 6.00, 5.98, 10.76, 9.74, 5.41, 5.74, 2.77, 5.48, 5.05, 6.30, 5.65, 5.68,3.22, 5.66, 5.97, 5.89, 7.59, 5.66, 6.02, 5.96))
    colnames(aaZimmermanDes) <- c("key", "value")
    #count <- 2;

    lev <- c("A",  "L",  "R", "K", "N", "M", "D", "F", "C","P", "Q", "S", "E", "T", "G", "W", "H", "Y", "I", "V")

    aaTable <- table(factor(prot <- strsplit(sequence, "")[[1]], levels = lev))

    pKNValues <- loadNTermPK(pkSet = "calibrated")
    pKCValues <- loadCTermPK(pkSet = "calibrated")

    pKNTerm <- retrievePKValue(prot[1], pKNValues)
    pKCTerm <- retrievePKValue(prot[length(prot)], pKCValues)

    oldw <- getOption("warn")
    options(warn = -1)

    temp <- aaTable*aaZimmermanDes

    options(warn = oldw)
    zimm <- pKNTerm + pKCTerm + sum(temp$value)

    zimm = zimm/(length(prot)+2)

    return (as.numeric(zimm))
}

ypriverol/pIR documentation built on May 4, 2019, 5:33 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ypriverol/pIR
pIR: Estimating Isoelectric Point (pI) of Peptide and Proteins from Amino Acid Sequence.

R/pISVM.R
In ypriverol/pIR: pIR: Estimating Isoelectric Point (pI) of Peptide and Proteins from Amino Acid Sequence.

Defines functions defaultPeptideTrainData loadSVMModel transformData pISVMsequences pISVMpeptide defaultProteinTrainData svmBuildPeptideData svmBuildProteinData svmPIBuildData svmPIData svmProfile aaIndex

Documented in aaIndex defaultPeptideTrainData loadSVMModel pISVMpeptide pISVMsequences svmPIBuildData svmPIData transformData

R Package Documentation

Browse R Packages

We want your feedback!

ypriverol/pIR pIR: Estimating Isoelectric Point (pI) of Peptide and Proteins from Amino Acid Sequence.

R/pISVM.R In ypriverol/pIR: pIR: Estimating Isoelectric Point (pI) of Peptide and Proteins from Amino Acid Sequence.

Defines functions defaultPeptideTrainData loadSVMModel transformData pISVMsequences pISVMpeptide defaultProteinTrainData svmBuildPeptideData svmBuildProteinData svmPIBuildData svmPIData svmProfile aaIndex

Documented in aaIndex defaultPeptideTrainData loadSVMModel pISVMpeptide pISVMsequences svmPIBuildData svmPIData transformData

R Package Documentation

Browse R Packages

We want your feedback!

ypriverol/pIR
pIR: Estimating Isoelectric Point (pI) of Peptide and Proteins from Amino Acid Sequence.

R/pISVM.R
In ypriverol/pIR: pIR: Estimating Isoelectric Point (pI) of Peptide and Proteins from Amino Acid Sequence.