R/helpers.R

.checkRformatF <- function(dirCa, filCa, vrbLa) {

    rowVc <- read.table(file.path(dirCa, filCa),
                        check.names = FALSE,
                        header = TRUE,
                        sep = "\t",
                        stringsAsFactors = FALSE)[, 1]

    colVc <- unlist(read.table(file.path(dirCa, filCa),
                               check.names = FALSE,
                               nrows = 1,
                               sep = "\t",
                               stringsAsFactors = FALSE))[-1]

    if(any(duplicated(rowVc)))
        stop("The following ",
             ifelse(names(filCa) == 'sampleMetadata', 'sample', 'variable'),
             " name(s) is/are duplicated in the ",
             names(filCa),
             ": '",
             paste(rowVc[duplicated(rowVc)], collapse = "', '"), "'",
             call. = FALSE)

    if(any(duplicated(colVc)))
        stop("The following ", ifelse(names(filCa) == 'sampleMetadata', 'variable', 'sample'), " name(s) is/are duplicated in the ",
             names(filCa),
             ": '",
             paste(colVc[duplicated(colVc)], collapse="', '"), "'",
             call. = FALSE)

    rowMakVc <- make.names(rowVc, unique = TRUE)

    rowDifVl <- rowVc != rowMakVc

    if(any(rowDifVl)) {
        rowDifDF <- data.frame(row = 1:length(rowVc),
                               actual = rowVc,
                               preferred = rowMakVc)
        rowDifDF <- rowDifDF[rowDifVl, , drop = FALSE]
        if(vrbLa) {
            cat("\n\nWarning: The following ",
                ifelse(names(filCa) == 'sampleMetadata', 'sample', 'variable'),
                " name(s) of the ",
                names(filCa),
                " is/are not in the standard R format, which may result in errors when loading the data:\n", sep = "")
            print(rowDifDF)
        }
    }

    colMakVc <- make.names(colVc, unique = TRUE)

    colDifVl <- colVc != colMakVc

    if(any(colDifVl)) {
        colDifDF <- data.frame(col = 1:length(colVc),
                               actual = colVc,
                               preferred = colMakVc)
        colDifDF <- colDifDF[colDifVl, , drop = FALSE]
        if(vrbLa) {
            cat("\n\nWarning: The following ",
                ifelse(names(filCa) == 'sampleMetadata', 'variable', 'sample'),
                " name(s) of the ",
                names(filCa),
                " is/are not in the standard R format, which may result in errors when loading the data:\n", sep="")
            print(colDifDF)
        }
    }
}


.checkW4mFormatF <- function(datMN, samDF, varDF) {

    chkL <- TRUE

    if(!identical(rownames(datMN), rownames(samDF))) {
        ## checking sample names

        chkL <- FALSE

        datSamDifVc <- setdiff(rownames(datMN), rownames(samDF))

        if(length(datSamDifVc)) {
            cat("\nThe following samples were found in the dataMatrix column names but not in the sampleMetadata row names:\n", sep="")
            print(cbind.data.frame(col = as.numeric(sapply(datSamDifVc, function(samC) which(rownames(datMN) == samC))),
                                   name = datSamDifVc))
        }

        samDatDifVc <- setdiff(rownames(samDF), rownames(datMN))

        if(length(samDatDifVc)) {
            cat("\n\nThe following samples were found in the sampleMetadata row names but not in the dataMatrix column names:\n", sep="")
            print(cbind.data.frame(row = as.numeric(sapply(samDatDifVc, function(samC) which(rownames(samDF) == samC))),
                                   name = samDatDifVc))
        }

        if(nrow(datMN) != nrow(samDF)) {
            cat("\n\nThe dataMatrix has ", nrow(datMN), " columns (ie samples) whereas the sampleMetadata has ", nrow(samDF), " rows\n", sep="")
        } else if(identical(gsub("^X", "", rownames(datMN)), rownames(samDF))) {
            cat("\n\nThe dataMatrix column names start with an 'X' but not the sampleMetadata row names\n", sep="")
        } else if(identical(gsub("^X", "", rownames(samDF)), rownames(datMN))) {
            cat("\n\nThe sampleMetadata row names start with an 'X' but not the dataMatrix column names\n", sep="")
        } else if(identical(sort(rownames(datMN)), sort(rownames(samDF)))) {
            cat("\n\nThe dataMatrix column names and the sampleMetadata row names are not in the same order:\n", sep="")
            print(cbind.data.frame(indice = 1:nrow(datMN),
                                   dataMatrix_columnnames=rownames(datMN),
                                   sampleMetadata_rownames=rownames(samDF))[rownames(datMN) != rownames(samDF), , drop = FALSE])
        } else {
            cat("\n\nThe dataMatrix column names and the sampleMetadata row names are not identical:\n", sep="")
            print(cbind.data.frame(indice = 1:nrow(datMN),
                                   dataMatrix_columnnames=rownames(datMN),
                                   sampleMetadata_rownames=rownames(samDF))[rownames(datMN) != rownames(samDF), , drop = FALSE])
        }

    }

    if(!identical(colnames(datMN), rownames(varDF))) {
        ## checking variable names

        chkL <- FALSE

        datVarDifVc <- setdiff(colnames(datMN), rownames(varDF))

        if(length(datVarDifVc)) {
            cat("\nThe following variables were found in the dataMatrix row names but not in the variableMetadata row names:\n", sep="")
            print(cbind.data.frame(row = as.numeric(sapply(datVarDifVc, function(varC) which(colnames(datMN) == varC))),
                                   name = datVarDifVc))

        }

        varDatDifVc <- setdiff(rownames(varDF), colnames(datMN))

        if(length(varDatDifVc)) {
            cat("\n\nThe following variables were found in the variableMetadata row names but not in the dataMatrix row names:\n", sep="")
            print(cbind.data.frame(row = as.numeric(sapply(varDatDifVc, function(varC) which(rownames(varDF) == varC))),
                                   name = varDatDifVc))
        }

        if(ncol(datMN) != nrow(varDF)) {
            cat("\n\nThe dataMatrix has ", nrow(datMN), " rows (ie variables) whereas the variableMetadata has ", nrow(varDF), " rows\n", sep="")
        } else if(identical(sort(colnames(datMN)), sort(rownames(varDF)))) {
            cat("\n\nThe dataMatrix row names and the variableMetadata row names are not in the same order:\n", sep="")
            print(cbind.data.frame(row = 1:ncol(datMN),
                                   dataMatrix_rownames=colnames(datMN),
                                   variableMetadata_rownames=rownames(varDF))[colnames(datMN) != rownames(varDF), , drop = FALSE])
        } else {
            cat("\n\nThe dataMatrix row names and the variableMetadata row names are not identical:\n", sep="")
            print(cbind.data.frame(row = 1:ncol(datMN),
                                   dataMatrix_rownames=colnames(datMN),
                                   variableMetadata_rownames=rownames(varDF))[colnames(datMN) != rownames(varDF), , drop = FALSE])
        }
    }

    return(chkL)

}
ethevenot/r-w4m2bioc documentation built on May 16, 2019, 9:06 a.m.