R/addPrepro.R

Defines functions addPrepro

Documented in addPrepro

#' Additional preprocessing of raw data
#'
#' @return
#' @export
#'
#'
addPrepro <- function() {

  if(readline("Fill missing protein names with FASTA headers? (ja/nein) ") == "ja") {

    for(i in 1:length(raw.data)) {

      if("Fasta.headers" %in% names(raw.data[[i]]) && "Protein.names" %in% names(raw.data[[i]])) {

        for(j in 1:nrow(raw.data[[i]])) {
          if(raw.data[[i]]$Protein.names[j] == "") {
            raw.data[[i]]$Protein.names[j] <- raw.data[[i]]$Fasta.headers[j]
          }
        }

        message(paste("Done with table ", i, ".", sep = ""))
      }

      else if("Protein.names" %in% names(raw.data[[i]])){
        message(paste("Couldn't find FASTA headers column in table ", i, ".", sep = ""))
      }

      else if("Fasta.headers" %in% names(raw.data[[i]])){
        message(paste("Couldn't find protein names column in table ", i, ".", sep = ""))
      }

      else {
        message(paste("Unknown error in table ", i, ", bro.", sep = ""))
      }

    }

  }



  #Unification of protein IDs, protein names and gene names




  for(i in 1:length(raw.data)) {

    if(readline("Unify protein IDs, protein names and gene names? (ja/nein) ") == "ja") {

      if("Protein.IDs" %in% names(raw.data[[i]])) {

        #Add first protein ID as rownames
        for(j in 1:nrow(raw.data[[i]])) {
          rownames(raw.data[[i]])[j] <- strsplit(raw.data[[i]]$Protein.IDs[j], split = ";")[[1]][1]
        }
      }

      else {
        message("Protein.IDs column not found.")
      }


      if("Protein.names" %in% names(raw.data[[i]])) {

        #Single protein names
        for(j in 1:nrow(raw.data[[i]])) {
          raw.data[[i]]$Protein.names1[j] <- strsplit(raw.data[[i]]$Protein.names[j], split = ";")[[1]][1]
        }

      }

      else {
        message("Protein.names column not found.")
      }


      if("Gene.names" %in% names(raw.data[[i]])) {

        #Single gene names
        for(j in 1:nrow(raw.data[[i]])){
          raw.data[[i]]$Gene.names1[j] <- strsplit(raw.data[[i]]$Gene.names[j], split = ";")[[1]][1]
        }

        raw.data[[i]]$Gene.names1[is.na(raw.data[[i]]$Gene.names2)] <- paste("MissingNo.", 1:sum(is.na(raw.data[[i]]$Gene.names2)), sep = "")

      }

      else {
        message("Gene.names column not found.")
      }
    }
  }






  assign("raw.data", raw.data, pos = .GlobalEnv)

}
nicohuttmann/htmnanalysis documentation built on Dec. 6, 2020, 3:02 a.m.