R/gl.compliance.check.r

Defines functions gl.compliance.check

Documented in gl.compliance.check

#' @name gl.compliance.check
#' @title Checks a genlight object to see if it complies with dartR
#'  expectations and amends it to comply if necessary
#'  @family environment

#' @description
#' This function will check to see that the genlight object conforms to
#' expectation in regard to dartR requirements (see details), and if it does
#' not, will rectify it.
#' 
#' @param x Name of the input genlight object [required].
#' @param verbose Verbosity: 0, silent or fatal errors; 1, begin and end; 2,
#' progress log ; 3, progress and results summary; 5, full report
#' [default 2 or as specified using gl.set.verbosity].
#' 
#' @details
#' A genlight object used by dartR has a number of requirements that allow
#' functions within the package to operate correctly. The genlight object
#' comprises:
#' \enumerate{
#' \item The SNP genotypes or Tag Presence/Absence data (SilicoDArT);
#' \item An associated dataframe (gl@other$loc.metrics) containing the locus
#' metrics (e.g. Call Rate, Repeatability, etc);
#' \item An associated dataframe (gl@other$ind.metrics) containing the
#' individual/sample metrics (e.g. sex, latitude (=lat), longitude(=lon), etc);
#' \item A specimen identity field (indNames(gl)) with the unique labels applied
#' to each individual/sample;
#' \item A population assignment (popNames) for each individual/specimen;
#' \item Flags that indicate whether or not calculable locus metrics have been
#' updated.
#' }
#' 
#' @author Custodian: Luis Mijangos -- Post to
#'  \url{https://groups.google.com/d/forum/dartr}
#'  
#' @examples
#' x <- gl.compliance.check(testset.gl)
#' x <- gl.compliance.check(testset.gs)
#' 
#' @export
#' @return A genlight object that conforms to the expectations of dartR

gl.compliance.check <- function(x,
                                verbose = NULL) {
    # SET VERBOSITY
    verbose <- gl.check.verbosity(verbose)
    
    # FLAG SCRIPT START
    funname <- match.call()[[1]]
    utils.flag.start(func = funname,
                     build = "v.2023.2",
                     verbose = verbose)
    
    # DO THE JOB
    
    # if ploidy is null
    if (is.null(x@ploidy)) {
        # if diploid
        if (unique(ploidy(x)) == 2) {
            ploidy(x) <- rep(2, nInd(x))
            # if haploid
        } else if (unique(ploidy(x)) == 1) {
            ploidy(x) <- rep(1, nInd(x))
        } else {
          # allow polyploidy
          cat(warn("  The ploidy level > 2, check your data if it is not what you expect"))
          ploidy(x) <- ploidy(x)
        }
    }
    
    # CHECKS DATATYPE
    datatype <- utils.check.datatype(x, verbose = verbose)
    
    if(is(x,"genlight")){
      
      if (!is(x, "dartR")) {
        class(x) <- "dartR"  
        if (verbose>2) {
          cat(warn("Warning: Standard adegenet genlight object encountered. Converted to compatible dartR genlight object\n"))
          cat(warn("                    Should you wish to convert it back to an adegenet genlight object for later use outside dartR, 
                 please use function dartR2gl\n"))
        }
      }
    }
    
    # if slot loc.all is empty
    if(datatype == "SNP" & is.null(x@loc.all)){
      x$loc.all <- rep("A/C",nLoc(x)) 
      if(verbose >= 2){
      cat(warn("  The slot loc.all, which stores allele name for each locus, is empty. Creating a dummy variable (A/C) to insert in this slot. \n"))
      }
    }
    
    # if loci have no name
    if (is.null(locNames(x))) {
        locNames(x) <- paste0("Loc", 1:nLoc(x))
    }
    
    # Check that the data exist, and that they are restricted to the
    # appropriate values
    
    if (datatype == "SNP") {
        mat <- as.matrix(x)
        scores <- c(0, 1, 2, NA)
        if (verbose >= 2) {
            cat(report("  Checking coding of SNPs\n"))
        }
        if (max(mat) %in% scores) {
            if (verbose >= 1) {
                cat(report("    SNP data scored NA, 0, 1 or 2 confirmed\n"))
            }
        } else {
            if (verbose >= 1) {
                cat(
                    error(
                        "    Error: SNP data must be scored NA, 0 or 1 or 2, 
                        revisit data input\n"
                    )
                )
            }
        }
    } else {
        mat <- as.matrix(x)
        scores <- c(0, 1, NA)
        if (verbose >= 2) {
            cat(report("  Checking coding of Tag P/A data\n"))
        }
        if (max(mat) %in% scores) {
            if (verbose >= 1) {
                cat(
                    report(
                        "    Tag P/A data (SilicoDArT) scored 1, 0 (present or 
                        absent) confirmed\n"
                    )
                )
            }
        } else {
            if (verbose >= 1) {
                cat(
                    error(
               "    Error: Tag P/A data (SilicoDArT) must be scored NA
              for missing, 0 for absent or 1 for present, revisit data input\n"
                    )
                )
            }
        }
    }
    
    # Check that the population variable exists, and if it does not, create it
    # with a single population 'pop1'
    
    if (verbose >= 2) {
      cat(report("  Checking for population assignments\n"))
    }
    if (is.null(pop(x)) |
        is.na(length(pop(x))) | length(pop(x)) <= 0) {
      if (verbose >= 1) {
        cat(
          warn(
            "  Population assignments not detected, individuals assigned
                    to a single population labelled 'pop1'\n"
          )
        )
      }
      pop(x) <- array("pop1", dim = nInd(x))
      pop(x) <- as.factor(pop(x))
    } else {
      if (verbose >= 1) {
        cat(report("    Population assignments confirmed\n"))
      }
    }
    
    # Check for the locus metrics, and create if they do not exist.
    # Check for the locus metrics flags, and create if they do not exist.
    # Check for the verbosity flag, and create if it does not exist.
    
    if (verbose >= 2) {
        cat(report("  Checking locus metrics and flags\n"))
    }
    x <- utils.reset.flags(x, set = FALSE, verbose = 0)
    
    # Calculate locus metrics
    if (verbose >= 2) {
        cat(report("  Recalculating locus metrics\n"))
    }
    x <- gl.recalc.metrics(x, verbose = 0)
    
    # Check for monomorphic loci
    if (verbose >= 2) {
        cat(report("  Checking for monomorphic loci\n"))
    }
    x2 <- gl.filter.monomorphs(x, verbose = 0)
    if (nLoc(x2) == nLoc(x)) {
        if (verbose >= 1) {
            cat(report("    No monomorphic loci detected\n"))
        }
        x@other$loc.metrics.flags$monomorphs <- TRUE
    } else {
        if (verbose >= 1) {
            cat(warn("    Dataset contains monomorphic loci\n"))
        }
        x@other$loc.metrics.flags$monomorphs <- FALSE
    }
    
    # Check for loci with all NAs
    if (verbose >= 2) {
        cat(report("  Checking for loci with all missing data\n"))
    }
    x2 <- gl.filter.allna(x, verbose = 0)
    if (nLoc(x2) == nLoc(x)) {
        if (verbose >= 1) {
            cat(report("    No loci with all missing data detected\n"))
        }
        x@other$loc.metrics.flags$allna <- TRUE
    } else {
        if (verbose >= 1) {
            cat(warn("    Dataset contains loci with all missing dat\n"))
        }
        x@other$loc.metrics.flags$allna <- FALSE
    }
    
    # Check that the number of values in the loc.metrics dataframe is the same
    # as the number of loci
    if (nLoc(x) != nrow(x@other$loc.metrics)) {
        cat(
            warn(
                "  The number of rows in the loc.metrics table does not match 
                the number of loci! This is potentially a major problem if there
                is a mismatch of the loci with the metadata. Trace back to 
                identify the cause.\n"
            )
        )
    }
    
    # check that individual names are unique, and if not, add underscore and
    # letters
    if (verbose >= 2) {
        cat(report("  Checking whether individual names are unique.\n"))
    }
    
    if (any(duplicated(indNames(x)))) {
        if (verbose >= 2) {
            cat(
                warn(
                    "  Individual names are not unique. Appending an extra 
                    number to make them unique.\n"
                )
            )
        }
        indNames(x) <- make.unique(indNames(x), sep = '_')
    }
    
    # Check that the individual metrics exist, and if not, create the df
    
    if (verbose >= 2) {
        cat(report("  Checking for individual metrics\n"))
    }
    if (is.null(x@other$ind.metrics)) {
        if (verbose >= 1) {
            cat(warn("  Warning: Creating a slot for individual metrics\n"))
        }
        x@other$ind.metrics <- as.data.frame(matrix(nrow= nInd(x),ncol = 1))
        colnames(x@other$ind.metrics) <- "id"
        x@other$ind.metrics$id <- indNames(x)
    } else {
        if (verbose >= 1) {
            cat(report("    Individual metrics confirmed\n"))
        }
    }
    
    # convert the ind.metric slot into a dataframe
    x@other$ind.metrics <- as.data.frame(x@other$ind.metrics)
    
    # check if coordinates are in the right place and not misspell
    if (!is.null(x@other$latlong)) {
        x@other$latlon <- x@other$latlong
    }
    
    if (!is.null(x@other$latlon$long)) {
        x@other$latlon$lon <- x@other$latlon$long
    }
    
    # remove misspelt columns if they exist...
    x@other$latlong <- NULL
    x@other$latlon$long <- NULL
    if (verbose >= 2) {
        cat(report(
            "  Spelling of coordinates checked and changed if necessary to 
            lat/lon\n"
        ))
    }
    
    # Check SNP position
    if(is.null(x$position) & "SnpPosition" %in% names(x$other$loc.metrics)){
      x$position <- x$other$loc.metrics$SnpPosition
      if (verbose >= 2) {
        cat(report(
          "  Assigning SNP position using slot gl$other$loc.metrics$SnpPosition \n"
        ))
      }
    }
    
    # ADD TO HISTORY
    nh <- length(x@other$history)
    x@other$history[[nh + 1]] <- match.call()
    
    # FLAG SCRIPT END
    
    if (verbose > 0) {
        cat(report("Completed:", funname, "\n"))
    }
    
    invisible(x)
}

Try the dartR.base package in your browser

Any scripts or data that you put into this service are public.

dartR.base documentation built on April 4, 2025, 2:45 a.m.