R/setColRowNames.R

Defines functions setColRowNames

Documented in setColRowNames

#' Changes colnames to universal namees and rownames to proteins identifiers
#'
#' @param data
#'
#' @return
#' @export
#'
#'
setColRowNames <- function(data) {

  printn(colnames(data))

  ok <- F

  while(!ok) {

    addition <- data.frame(matrix(NA, nrow = nrow(data), ncol = 3))

    colnames(addition) <- c("protein", "gene", "name")

    #Protein IDs
    x <- as.numeric(readline("Column of protein identifiers? "))

    message(paste0("Using <", colnames(data)[x], "> as protein identifier."))

    addition[, 1] <- unlist(lapply(strsplit(data[, x], split = ";"), FUN = function(x) if(length(x) > 0) x[[1]] else NA))


    #Gene names
    x <- as.numeric(readline("Column of gene names? "))

    message(paste0("Using <", colnames(data)[x], "> as gene names."))

    addition[, 2] <- unlist(lapply(strsplit(data[, x], split = ";"), FUN = function(x) if(length(x) > 0) x[[1]] else NA))


    #Protein names
    x <- as.numeric(readline("Column of protein names? "))

    message(paste0("Using <", colnames(data)[x], "> as protein names."))

    addition[, 3] <- unlist(lapply(strsplit(data[, x], split = ";"), FUN = function(x) if(length(x) > 0) x[[1]] else NA))


    if(sum(is.na(addition[, 1])) > 0) {
      addition[is.na(addition[, 1]), 1] <- paste0("id", 1:sum(is.na(addition[, 1])))
    }

    if(sum(is.na(addition[, 2])) > 0) {
      addition[is.na(addition[,2]), 2] <- addition[is.na(addition[,2]), 1]
    }

    if(sum(is.na(addition[, 3])) > 0) {
      addition[is.na(addition[,3]), 3] <- addition[is.na(addition[,3]), 1]
    }



    x <- as.numeric(readline("Column to be used for rownames? "))

    rownames <- unlist(lapply(strsplit(as.character(data[, x]), split = ";"), FUN = function(x) if(length(x) > 0) x[[1]] else NA))

    message(paste0("Using <", colnames(data)[x], "> as rownames."))


    while(length(unique(rownames)) != nrow(data)) {

      x <- as.numeric(readline("Add an additional column. "))

      rownames <- paste(rownames,
                        unlist(lapply(strsplit(as.character(data[, x]), split = ";"), FUN = function(x) if(length(x) > 0) x[[1]] else NA)),
                        sep = ".")
      message(paste0("Using <", colnames(data)[x], "> as additonal identifier."))

    }



    data <- cbind(addition, data)




    rownames(data) <- rownames

    ok <- ok("Names ok?")

  }

  data

}
nicohuttmann/htmnanalysis documentation built on Dec. 6, 2020, 3:02 a.m.