R/int.genind.R
In EcoGenetics: Management and Exploratory Analysis of Spatial Data in Landscape Genetics

Documented in int.df2genind int.gendata2genind int.genind int.genind2df int.genind2gendata

####################---INT.GENIND AND GENDATA CLASSES----#######################
# A product of the genind class for internal processing of ecogen class.
#                    ----------o----------

# The present code is a modified version of that written by T. Jombart
# for genind class of the package Adegenet. 
# The internal genind object is used only as an interface between 
# data frames <-> ecogen objects, and A slot <-> G slot,
# without S4 methods or validations. Additional slots were added for handling
# coding/missing data information and were removed the slots loc.nall, loc.names
# and ind.names. The int.genind objects are temporal (they only exist as
# intermediate stages of the information in several 
# processes): they are unfolded 
# into a frequency matrix and a class "int.gendata" with pure information 
# about the matrix with the function "int.genind2gendata".
# These two objects are joined together in temporal operations with the function
# "int.gendata2genind".
# The information of int.gendata objects, used in internal processing,
# is stored in the slot INT, invisible at user level.
# There are also defined: an int.genind constructor, and 
# an importation - exportation mechanism to data frame (both also modified
# versions of the original ones). 
# With the exception of the first importation of the genetic data
# into ecogen information, the importation-exportation mechanism 
# is used in the internal processes of some ecogen methods (e.g., resizing). 
# The conversion into allelic frequencies preserves options of previous 
# Adegenet versions, and the ploidy of the input data must be unique. 
# Leandro Roser, July 2015.

#------------------------------------------------------------------------------#
#---------------------------CLASSES DEFINITIONS--------------------------------#
#------------------------------------------------------------------------------#
#' int.gendata
#' @name int.gendata-class
#' @slot loc.fac locus of each allele
#' @slot all.names alleles names
#' @slot  ploidy ploidy
#' @slot type type of data ("codominant" or "dominant")
#' @slot NA.char NA character
#' @slot sep separator
#' @slot ncod number of digits coding each allele (codominant data)
#' @slot  missing missing character
#' @slot  missing cells character
#' @slot removed.image removed columns (coded as 2) or rows (coded as 1)
#' @keywords internal

setClass("int.gendata", 
         representation(loc.fac = "factorORnull",
                        all.names = "characterORnull",
                        ploidy = "numeric",
                        type = "character",
                        NA.char = "character",
                        sep = "characterORnull",
                        ncod = "intORnull",
                        missing = "character",
                        missing.cells = "intORnull",
                        removed.image = "list"), 
         
         prototype(loc.fac = NULL,
                   all.names = NULL,
                   ploidy = 2,
                   type = "codominant",
                   NA.char = "NA",
                   sep = "",
                   ncod = NULL,
                   missing = "0",
                   missing.cells = NULL,
                   removed.image = list())
)


#' int.genind
#' @name int.genind-class
#' @param tab allelic frequency matrix
#' @keywords internal

setClass("int.genind", 
         representation(tab = "matrix"),
         contains =  "int.gendata")


#------------------------------------------------------------------------------#
#-TWO CONVERSORS TO UNFOLD/ JOIN A FREQUENCY MATRIX AND ITS INT.GENDATA OBJECT-#
#------------------------------------------------------------------------------#

#' int.genind2gendata
#' @param input int.genind object
#' @keywords internal

int.genind2gendata <- function(input) {
  out <- new("int.gendata")
  out@loc.fac <- input@loc.fac
  out@all.names <- input@all.names
  out@ploidy <- input@ploidy
  out@type <-  input@type
  out@NA.char <- input@NA.char
  out@sep <-   input@sep
  out@ncod <- input@ncod
  out@missing <-  input@missing
  out@missing.cells <- input@missing.cells
  out@removed.image <-  input@removed.image
  out
}

#' int.gendata2genind
#' @param tab frequency matrix
#' @param input int.gendata object
#' @keywords internal

int.gendata2genind <- function(tab, input) {
  out <- new("int.genind")
  out@tab <- as.matrix(tab)
  out@loc.fac <- input@loc.fac
  out@all.names <- input@all.names
  out@ploidy <- input@ploidy
  out@type <-  input@type
  out@NA.char <- input@NA.char
  out@sep <-   input@sep
  out@ncod <- input@ncod
  out@missing <-  input@missing
  out@missing.cells <- input@missing.cells
  out@removed.image <-  input@removed.image
  out
}

#------------------------------------------------------------------------------#
#--------------------------INT.GENIND CONSTRUCTOR------------------------------#
#------------------------------------------------------------------------------#
#' constructor
#' @keywords internal

int.genind <- function(X, 
                       ploidy = 2, 
                       type = c("codominant", "dominant"),
                       NA.char = "NA",
                       missing = c("0", "NA", "MEAN"),
                       missing.cells = integer(0),
                       sep = "",
                       ncod = NULL,
                       removed.image) {
  
  
  #----GENERAL CONFIGURATION---------------------------------------------------#
  
  type <- match.arg(type)
  missing <- match.arg(missing)
  
  if(class(X)[1] != "data.frame" && class(X)[1] != "matrix") {
    stop("X is not of class <data.frame> or <matrix>")
  }
  
  # X configuration
  X <- as.matrix(X, rownames.force = TRUE)
  nind <- nrow(X)
  nloc <- ncol(X)
  
  # labels configuration 
  ## column names configuration
  temp <- colnames(X)
  temp <- gsub("[.][^.]*$", "", temp)
  loc.names <- aue.rmspaces(temp)
  loc.names <- unique(loc.names)
  
  ## ind.names configuration. If duplicated or no names
  ## present, create generic labels
  X <- int.check.rownames(X)
  ind.names <- rownames(X)
  
  
  # MARKER SPECIFIC CONFIGURATION----------------------------------------------#
  
  #------------------------------
  # CODOMINANT MARKERS
  #------------------------------
  if(type == "codominant"){
    
    ## loc.fac
    loc.nall <-  table(temp)[match(loc.names, names(table(temp)))]
    loc.nall <- as.integer(loc.nall)
    loc.fac <- factor(rep(loc.names, loc.nall), levels = loc.names)
    
    ## alleles name
    temp <- colnames(X)
    temp <- gsub("^.*[.]","",temp)
    all.names <- aue.rmspaces(temp)
    names(all.names) <- loc.fac
    # all.names is now a vector. It can be splitted into a list with:
    # all.names <- split(as.vector(all.names), loc.fac)
    # all.names <- all.names[loc.names]
    
    # END CODOMINANT---------------
    
    
    #------------------------------
    # DOMINANT MARKERS
    #------------------------------
  } else { # end if type=="codominant" <=> if type=="dominant"
    loc.fac <- as.factor(loc.names)
    all.names <- NULL
  }
  # END DOMINANT----------------
  
  # OUTPUT CREATION------------------------------------------------------------#
  
  res <- new("int.genind")
  res@tab <- X
  res@loc.fac <- loc.fac
  res@all.names <- all.names
  
  if(ploidy < as.integer(1)) {
    stop("ploidy inferior to 1")
  }
  res@ploidy <- as.integer(ploidy)
  
  res@type <- as.character(type)
  res@NA.char <- NA.char
  res@sep <- sep
  res@ncod <- as.integer(ncod)
  res@missing <- missing
  res@missing.cells <- as.integer(missing.cells)
  res@removed.image <- removed.image
  
  res
  
} 

#------------------------------------------------------------------------------#
#---------------------------IMPORTER-------------------------------------------#
#------------------------------------------------------------------------------#
#' importer
#' @keywords internal

int.df2genind <- function(indata, 
                          sep = "", 
                          ncod = NULL,
                          NA.char = "NA",
                          ploidy = 2, 
                          type = c("codominant","dominant"),
                          missing = c("0", "NA", "MEAN"),
                          rm.empty.ind = FALSE,
                          poly.level = 5,
                          lock.rows = TRUE) {
  
  
  # DATA CHECK-----------------------------------------------------------------#
  
  type <- match.arg(type)
  missing <- match.arg(missing)
  
  # check ploidy
  if(ploidy < 1L) {
    stop("ploidy can not be less than 1")
  }
  
  # check format and type congruence
  X.format <- class(indata)
  
  if(!any(c("data.frame","matrix") %in% X.format)) {
    stop("data has not a valid format (<data.frame> or <matrix>).
         Check the class of your data.")
  }
  
  #if data has not rows, return an empty int.genind object
  if(dim(indata)[1] == 0) {
    return(new("int.genind"))
  } 
  
  #if data has not columns but n rows, return an int.genind object with 
  # a matrix of dimension n x 0
  if(dim(indata)[2] == 0) {
    temp <- new("int.genind")
    temp@tab <- matrix(nrow = dim(indata)[1], ncol = dim(indata)[2])
    rownames(temp@tab) <- rownames(indata)
    rownames(temp@tab) <- rownames(indata)
    return(temp)
  }
  
  
  # GENERAL INPUT CONFIGURATION------------------------------------------------#
  
  # X configuration
  X <- as.matrix(indata, rownames.force = TRUE)
  mode(X) <- "character"
  X <- aue.rmspaces(X)
  
  nind <- nrow(X)
  nloc <- ncol(X)
  
  # check row and column names
  X <- int.check.rownames(X)
  X <- int.check.colnames(X)
  loc.names <- colnames(X)
  ind.names <- rownames(X)
  
  
  # NA CONFIGURATION---------------------------------------------------------#
  
  # NA translated into character
  if(is.na(NA.char)) {
    X[is.na(X)] <- "NA"
    NA.char <- "NA"
  } 
  
  # find NA strings
  NA.list <- unlist(lapply(unique(ploidy), 
                           function(nrep) {
                             paste(rep(NA.char, nrep),  collapse = sep)
                           }))
  
  NA.list <- unique(c(NA.list, NA.char))
  
  # replace NAs
  X[X %in% NA.list] <- NA
  
  
  # HANDLING MISSING DATA----------------------------------------------------#
  # REMOTION OF DATA IS REVERSIBLE
  
  # image of the columns and rows that will be removed tagged as 1 and 2
  removed.temp <- is.na(X)
  mode(removed.temp) <- "numeric"
  removed.image <- removed.temp - removed.temp
  col.rem <- apply(removed.temp, 2, sum) == nrow(removed.temp)
  removed.image[, col.rem] <- 1 
  row.rem <- apply(removed.temp, 1, sum) == ncol(removed.temp)
  removed.image[row.rem, ] <- 2
  
  # erase entirely non-type loci
  remove.loc <- which(colSums(is.na(X)) == nrow(X))
  
  if(length(remove.loc) > 0) {
    
    ## preserve information about the original matrix
    ## reset if the cells contain a 2
    removed.image[, remove.loc] <- 1
    
    ## remove non informative loci
    X <- X[, -remove.loc, drop = FALSE]
    loc.names <- colnames(X)
    nloc <- ncol(X)
    message("Note: removed noninformative loci -pure NAs column(s)- from slots G and A")
    
  }
  
  # erase entirely non-type individuals
  remove.ind <- which(rowSums(is.na(X)) == ncol(X))
  if(length(remove.ind) > 0) {
    
    ## preserve information about the matrix, for restoring
    ## NA individuals. In case of rm.empty.ind = FALSE,
    ## reset removed image for these individuals 
    ## (because removed image is defined as is.na(X))
    
    if(rm.empty.ind && lock.rows) {
     stop("Remotion of empty individuals only available if the paramter lock.rows = FALSE")  
    }
    
    if(!rm.empty.ind) {
      removed.image[remove.ind, ] <- 0
    } 
    

    ## preserve order
    old.order.row <- seq(nrow(indata))
    new.order.ind <- old.order.row[-remove.ind]
    ind.oldnames <- ind.names
    
    ## remove individuals
    X <- X[-remove.ind, ]
    ind.names <- rownames(X)
    nind <- nrow(X)
  }
  
  # MARKER SPECIFIC CONFIGURATION--------------------------------------------#
  
  #----------------------------
  # DOMINANT MARKERS 
  #----------------------------
  
  if(type == "dominant") {
    
    out <- X
    
    # check that data values are "0", "1" and NA
    if(!all(out %in% c(NA, "1", "0"))) { 
      stop("dominant data must be binary (0 for absence, 
           1 for presence")
    }
    
    
    # restore missing individuals if required
    if(!rm.empty.ind && length(remove.ind) > 0) {
      temp <- matrix(nrow = nrow(indata), ncol = ncol(out))
      temp[new.order.ind, ] <- out
      rownames(temp) <- ind.oldnames
      colnames(temp) <- colnames(out)
      out <- temp
    } else if(rm.empty.ind && length(remove.ind) > 0) {
      message("Note: removed noninformative individuals -pure NAs row(s)- from slots G and A")
    }
    
    # remove non polymorphic data
    mode(out) <- "integer"
    isPoly <- aue.is.poly(out, poly.level)
    out <- X[,  isPoly,  drop = FALSE]
    
    loc.names <- colnames(out)
    nloc <- ncol(out)
    
    if(ncol(removed.image) > ncol(out)) {
      if(poly.level == 0) {
        message("Note: non-polymorphic marker(s) deleted")
      } else {
        message(paste("Note: marker(s) with polymorphism level <", 
                      paste(poly.level, "%", sep = ""), "deleted"))
      }  
    }
    # save data image
    removed.image[, !isPoly] <- 1
    }
  
  # END DOMINANT----------------
  
  
  #----------------------------
  # CODOMINANT MARKERS 
  #----------------------------
  
  if(type == "codominant") {
    
    # ncod control 
    ncod <- int.check.ncod(X, ploidy = ploidy, ncod = ncod, sep = sep)
    
    # Handling separators
    
    if(ploidy > 1) {
    if(sep == "") {
      
      ## add "/" as separator
      X <- gsub(paste("([[:alnum:]]{",ncod,"})", sep = ""), "\\1/", X)
      X <- sub("/$", "", X)
      sep <- "/"
      
      ## non missing case, checking if <sep> is metacharacter
    } else  {
      X <- gsub(meta2char(sep),"/",X)
      sep <- "/"
    }
  }
    
    
    # Translate data into allelic frequencies 
    
    ## unfold data for each cell of the table
    if (ploidy > 1) {
      allele.data <- strsplit(X, "/")
      n.items <- vapply(allele.data, length, numeric(1))
      locus.data <- rep(rep(loc.names, each = nind), n.items)
      ind.data <- rep(rep(ind.names,ncol(X)), n.items)
      allele.data <- unlist(allele.data)
    } else {
      n.items <- rep(1, length(X))
      locus.data <- rep(rep(loc.names, each = nind), n.items)
      ind.data <- rep(rep(ind.names, ncol(X)), n.items)
      allele.data <- unlist(X)
    }
    
    ## identify NAs
    NA.posi <- which(is.na(allele.data))
    NA.ind <- ind.data[NA.posi]
    NA.locus <- locus.data[NA.posi]
    
    ## remove NAs
    if(length(NA.posi) > 0){
      allele.data <- allele.data[-NA.posi]
      locus.data <- locus.data[-NA.posi]
      ind.data <- ind.data[-NA.posi]
    }
    
    ## get matrix with frequencies
    allele.data <- paste(locus.data, allele.data, sep = ".")
    
    # sort allele names   4/14/2018 L.R
    my_names <- unique(allele.data)
    if(length(my_names) > 1) {
      pre <- gsub("(^.+)([.])(.+$)", "\\1", my_names)
      names_order <- character(length(my_names))
      j <- 1
      names_order[1] <- 1
      for(i in seq(2, length(my_names), 1)) {
        if(pre[i] != pre[i-1]) j <- j +1
        names_order[i] <- j
      }
    }

    my_names <- unname(unlist(tapply(my_names, as.integer(names_order), sort)))
   
    allele.data <- factor(allele.data, levels = my_names)
    out <- table(ind.data, allele.data)
    out <- out[ind.names, , drop = FALSE] # table sorts rownames alphabetically. This resets.

    
    #out <- out/2
    ## force type 'matrix'
    class(out) <- NULL
    dimnames(out) <- list(rownames(out), colnames(out))
    
    ## restore NAs
    if(length(NA.posi) > 0) {
      out.colnames <- colnames(out)
      for(i in 1:length(NA.ind)) {
        loc <- paste0(NA.locus[i], "\\.")
        out[NA.ind[i], grep(loc, out.colnames)] <- NA
      }
    }
    
  }
  
  # END CODOMINANT---------------
  
  
  # GENERAL OUTPUT CONFIGURATION-----------------------------------------------#
  
  # restore missing individuals if required
  if(!rm.empty.ind && length(remove.ind) > 0) {
    temp <- matrix(nrow = nrow(indata), ncol = ncol(out))
    temp[new.order.ind, ] <- out
    rownames(temp) <- ind.oldnames
    colnames(temp) <- colnames(out)
    out <- temp
  } else if(rm.empty.ind && length(remove.ind) > 0) {
    message("Note: removed noninformative individuals -pure NAs row(s)- from slots G and A")
  }
  
  # missing data manipulation
  
  missing.cells <-  which(is.na(out))
  if(length(missing.cells) == 0) {
    missing.cells <- integer(0)
  }
  
  ## 0 case
  if (missing == "0") {
    out[is.na(out)] <- 0
  }
  
  ## mean case
  if (missing == "MEAN") {
    mode(out) <- "numeric"
    moy <- round(apply(out, 2, function(x) mean(x, na.rm = TRUE)), 3)
    
    for (j in 1:ncol(out)) {
      out[, j][is.na(out[, j])] <- moy[j]
    }
  }  
  
  # removed image configuration
  rem.cols <- which(apply(removed.image, 2, function(x) any(x == 1)))
  rem.rows <- which(apply(removed.image, 1, function(x) any(x == 2)))
  removed.image <- list(init.dim = dim(indata), 
                        rem.rows = as.vector(rem.rows), 
                        rem.cols = as.vector(rem.cols),
                        names.rows = rownames(indata), 
                        names.cols = colnames(indata))
  
  # numeric matrix // old ecogen object, now integer, L.R. 9/12/2016
  # mode(out) <- "numeric"
  out <- as.matrix(out)
  mode(out) <- "integer"
  
  # OUTPUT CREATION------------------------------------------------------------#
  
  #dominant configuration
  if(type == "dominant") {
    sep <- NULL
    ncod <- NULL
  }
  
  res <- int.genind(X = out,
                    ploidy = ploidy,
                    type = type,
                    NA.char = NA.char,
                    sep = sep,
                    ncod = ncod,
                    missing = as.character(missing),
                    missing.cells = as.integer(missing.cells),
                    removed.image = removed.image)
  
  res
  
  }

#------------------------------------------------------------------------------#
#---------------------------EXPORTER-------------------------------------------#
#------------------------------------------------------------------------------#

#' export
#' @keywords internal

int.genind2df <- function(x, sep = "",                   #the product is a matrix
                          NA.char = "NA", 
                          restore.removed = FALSE) {
  
  # restore missing cells
  output <- x@tab
  loc.names <- levels(x@loc.fac)
  row.names <- rownames(output)
  
  #--(1/2)---dominant case-----------------------------------------------------#
  if(x@type == "dominant"){
    return(output) 
  }
  
  #--(2/2)---codominant case---------------------------------------------------#
  # make separate tables
  kX <- list()
  loc.fac <- as.character(x@loc.fac)
  for(i in loc.names){
    kX[[i]] <- output[, i == loc.fac, drop = FALSE]
  }
  #kX <- lapply(kX, function(X) round(X * x@ploidy))
  
  # function to recode a genotype in form 
  # "A1[sep]...[sep]Ak" from frequencies--------#
  recod <- function(vec, lab){
    if(any(is.na(vec))) {
      return(NA)
    }
    res <- paste(rep(lab, vec), collapse = sep)
    return(res)
  }
  #---------------------------------------------#
  
  # kGen is a list of nloc vectors of genotypes--------------------------------#
  # all.names is splitted into a list 
  all.names <- x@all.names
  all.names <- split(as.vector(all.names), loc.fac)
  all.names <- all.names[loc.names]
  
  kGen <- lapply(1:length(kX), 
                 function(i) { 
                   apply(kX[[i]], 1, recod, all.names[[i]])
                 }
  )
  
  names(kGen) <- loc.names
  
  ## build the final data.frame
  res <- do.call(cbind, kGen)
  res[res == ""] <- NA
  
  
  # RESTORE REMOVED DATA CONFIGURATION-----------------------------------------#
  
  if(restore.removed) {
    
    # info restoration slots
    restore.info <- x@removed.image
    rem.rows <- x@rem.rows
    rem.cols <- x@rem.cols
    
    temp <- matrix(0,nrow = restore.info[[1]][1], ncol = restore.info[[1]][2])
    temp[restore.info[[2]], ] <- NA
    temp[, restore.info[[3]]] <- NA
    l.rr <- length(restore.info[[2]]) != 0 
    l.rc <- length(restore.info[[3]]) != 0
    
    # both cols and rows removed
    if(l.rc && l.rr) {
      temp[-rem.rows, -rem.cols] <- res
      rownames(temp) <- restore.info[[4]]
      colnames(temp) <- restore.info[[5]]
      return(temp)
    } # only columns
    if(l.rc && !l.rr) {
      temp[, -rem.cols] <- res
      rownames(temp) <- restore.info[[4]]
      colnames(temp) <- restore.info[[5]]
      return(temp)
    } # only rows
    if(!l.rc && l.rr) {
      temp[-rem.rows, ] <- res
      rownames(temp) <- restore.info[[4]]
      colnames(temp) <- restore.info[[5]]
      return(temp)
    }
  }
  
  # OUTPUT CREATION------------------------------------------------------------#
  
  rownames(res) <- row.names
  colnames(res) <- loc.names
  
  res
}

#########################END INT.GENIND#########################################
Any scripts or data that you put into this service are public.
EcoGenetics documentation built on July 8, 2020, 5:46 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
EcoGenetics
Management and Exploratory Analysis of Spatial Data in Landscape Genetics

R/int.genind.R
In EcoGenetics: Management and Exploratory Analysis of Spatial Data in Landscape Genetics

Defines functions int.genind2df int.df2genind int.genind int.gendata2genind int.genind2gendata

Documented in int.df2genind int.gendata2genind int.genind int.genind2df int.genind2gendata

Try the EcoGenetics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EcoGenetics Management and Exploratory Analysis of Spatial Data in Landscape Genetics

R/int.genind.R In EcoGenetics: Management and Exploratory Analysis of Spatial Data in Landscape Genetics

Defines functions int.genind2df int.df2genind int.genind int.gendata2genind int.genind2gendata

Documented in int.df2genind int.gendata2genind int.genind int.genind2df int.genind2gendata

Try the EcoGenetics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EcoGenetics
Management and Exploratory Analysis of Spatial Data in Landscape Genetics

R/int.genind.R
In EcoGenetics: Management and Exploratory Analysis of Spatial Data in Landscape Genetics