R/unused/featureClean.R

## NOT IMPORTANT ANYMORE



featureClean <- function(DT_featureLibrary,
                         nameLibrary){

    ## Important library column names other than Mass Info and sample runs
    ## Same as in |featureLibrary|
    libNames <- c("GeneralID",
                  "Family",
                  "Q1_Modification_Value",
                  "Barcode",
                  "Structural.ID.by.EPI-IDA",
                  "MolecularID",
                  "TotalCarbonID",
                  "ExactMass")

    ## Subset 'DT_featureLibrary' into runs (like 'libCols' in |featureLibrary|)
    {
        ## Assumption that first run starts after exact mass
        minimumCol <- grep("ExactMass", colnames(DT_featureLibrary)) + 1
        libCols <- DT_featureLibrary[, minimumCol:ncol(DT_featureLibrary)]

        ## Proper rounding of feature values and conversion of "NA" to <NA>
        for (j in seq_along(libCols)){
            set(libCols, j = j, value = sprintf("%.2f", as.numeric(round(as.numeric(sprintf("%.7f", libCols[[j]])), 2))))
            #set(libCols, which(libCols[[j]] == "NA"), j, NA)
        }
    }

    ## Copy original column run names for re-replacing at the end
    nameDT_featureLibrary <- colnames(libCols)

    ## Copy column names of 'libCols' for library matching
    {
        matchDT_featureLibrary <- colnames(libCols)
        matchDT_featureLibrary <- strsplit(matchDT_featureLibrary, ";_;")
        matchDT_featureLibrary <- unlist(lapply(matchDT_featureLibrary, function(x) x[-(2:4)]))
    }

    ## Replace column names in libCols with matchable ones.
    colnames(libCols) <- matchDT_featureLibrary

    ## Convert data table of sample run features to a matrix
    libCols <- as.matrix(libCols)

    ## Load library and match column indices to DT_featureLibrary by sample run
    {
        library <- read.xlsx(nameLibrary,
                             skipEmptyCols = T,
                             skipEmptyRows = F)

        ## Copy of original library names
        nameLibCol <- colnames(library)

        ## Convert special characters and remove multiple whitespace/periods
        colnames(library) <- gsub("\\s+", ".", colnames(library))

        temp <- vector(mode = "character", length = ncol(libCols))
        for(i in 1:length(temp)){
            if(length(grep(colnames(libCols)[i], colnames(library))) == 0){
                stop("Weirdly, I can't match the uncleaned library of features based on the Qsessions to the original library of RTs.")
            } else{
                temp[i] <- grep(colnames(libCols)[i], colnames(library))
            }
        }

        ## Remove helper rows in library
        dropRow <- 0
        while(any(grep("/", library[1,])) == F){
            dropRow <- dropRow + 1
            library <- library[-1,]
        }
    }

    ## Sample run column indices in DT_featureLibrary ordered by library sample run
    nameLibCol <- nameLibCol[as.numeric(temp)]

    ## Only retain library columns matching 'DT_featureLibrary/libCols'
    {
        library <- as.data.table(library)
        library <- library[, as.numeric(temp), with=F]

        if(any(dim(library) != dim(libCols))){
            stop("Dimensions of library and DT_featureLibrary don't match.")
        }

        ## Proper rounding of feature values and conversion of "NA" to <NA>
        for (j in seq_along(library)){
            set(library, j = j, value = sprintf("%.2f", as.numeric(round(as.numeric(sprintf("%.7f", library[[j]])), 2))))
            #set(library, which(library[[j]] == "NA"), j, NA)
        }
    }

    ## Match 'library' and 'libCols' columns and check for retention times that do
    ## not map (will be NA in 'libCols')
    ## Any retention time that maps is converted to an NA in 'libCols'
    {
        library <- as.matrix(library)

        #temp <- replace(libCols, libCols != library, "NA")

        temp <- libCols != library
        temp <- as.vector(temp)
        temp <- which(temp == T)

        tempLibCols <- as.vector(libCols)
        tempLibCols[temp] <- -1

        tempLibCols <- matrix(tempLibCols, nrow = nrow(libCols), ncol = ncol(libCols), byrow = F)

        # expect_equal(dim(library), dim(libCols)) ## Always true!
        # expect_equal(libCols, library) ## Potentially not true unless cleaned and "NA" -> <NA>
    }











}
jchitpin/blistR documentation built on July 8, 2019, 6:29 p.m.