R/ecogen.6OF6.converters.R
In EcoGenetics: Management and Exploratory Analysis of Spatial Data in Landscape Genetics

#' Creating input data for Geneland with an ecogen object
#' 
#' @description This function creates four data frames (XY.txt, NAMES.txt, P.txt, G.txt) 
#' in the indicated directory (default: working directory), which can be loadedin Geneland.
#' @param eco Object of class "ecogen"
#' @param dir output path. Default = "" (current directory).
#' @param ncod Number of digits coding each allele
#'  (e.g., 1: x, 2: xx, 3: xxx, etc.).
#' @param ploidy Ploidy of the data.
#' @param to_numeric Recode the genetic data into numeric format? If TRUE, 
#' the functions performs the correction via \code{\link{eco.format}}.
#' Additional formatting parameters can be passed to this function.
#' @param recode Recode mode when to_numeric = TRUE: "all" for recoding
#' the data considering all the individuals values at once (e.g., protein data), 
#' "column" for recoding the values by column (e.g., microsatellite data), "paired" 
#' for passing the values of allelic states and corresponding replacement values, using 
#' the replace_in and replace_out arguments (e.g. replace_in = c("A", "T", "C", "G"),
#' replace_out = c(1,2,3,4)).
#' @param replace_in vector with states of the data matrix to be replaced, when recode = "paired".
#' This argument must be used in conjunction with the argument "replace_out".
#' @param replace_out vector with states of the data matrix used for replacement, when recode = "paired".
#' This argument must be used in conjunction with the argument "replace_in".
#' @param nout Number of digits in the output when to_numeric = TRUE.
#' @param ... Additional parameters passed to \code{\link{eco.format} when to_numeric = TRUE}
#' @return XY.txt Matrix with coordinates.
#' @return NAMES.txt Matrix with row names.
#' @return P.txt Matrix with phenotypic data.
#' @return G.txt Matrix with genotypic data.
#' @examples
#' 
#' \dontrun{
#' 
#' data(eco.test)
#' ecogen2geneland(eco, dir = "", ncod=1)
#' 
#' }
#' 
#' @author Leandro Roser \email{learoser@@gmail.com}
#' @export


setGeneric("ecogen2geneland", 
           function(eco, dir = "", ncod = NULL, ploidy = 2,  
                    to_numeric = FALSE, nout = 3, 
                    recode = c("all", "column", "paired"),
                    replace_in = NULL,
                    replace_out =NULL, ...) {
             
             recode <- match.arg(recode)
            
              if(dir != "") {
                #add "/" to the end if path is "xxx/xxx"
               if(!grep("/$", dir)) {
               dir <- paste0(dir, "/")
               }
              }
             
             # check numeric format in G
             G_temp <- int.check.to_numeric(eco@G, to_numeric = to_numeric, 
                                            nout = nout, recode = recode, 
                                            ploidy = eco@INT@ploidy,
                                            ncod = eco@INT@ncod,
                                            ...)
             
             write.table(int.loc2al(G_temp,  ncod = ncod,  ploidy = ploidy), paste0(dir, "G.txt"),
                         quote = FALSE, row.names = FALSE, col.names = FALSE)
             
             write.table(eco@XY, paste0(dir, "XY.txt"), quote = FALSE,
                         row.names = FALSE, col.names = FALSE)
             
             write.table(rownames(eco@XY), paste0(dir, "NAMES.txt"), quote = FALSE, 
                         row.names = FALSE, col.names = FALSE)
             
             write.table(eco@P, paste0(dir, "P.txt"), quote = FALSE, row.names = FALSE, 
                         col.names = FALSE)
             
    
            
              if(dir == "") {
               return(paste0("Files written to: ", getwd()))
             } else {
               return(paste0("Files written to: ", gsub("/$", "", dir)))
             }
             
           })


#' Exporting an ecogen genetic data frame into Genepop format
#' 
#' @description This function converts the genetic 
#' data of an ecogen object into a Genepop input file. 
#' @param eco Object of class "ecogen".
#' @param dir output path. Default = "" (current directory).
#' @param outName The name of the output file.
#' @param grp The name of the S slot column with groups in which the sample
#' must be divided (e.g., populations). If groups are not given (grp = NULL),
#' all individuals will be assigned to a single one.
#' @param nout Number of digits in the output file.
#' @param sep Character separating alleles.
#' @param recode Recode mode: "none" for no recoding (defalut), "all" for recoding
#' the data considering all the individuals values at once (e.g., protein data), 
#' "column" for recoding the values by column (e.g., microsatellite data), "paired" 
#' for passing the values of allelic states and corresponding replacement values, using 
#' the replace_in and replace_out arguments (e.g. replace_in = c("A", "T", "C", "G"),
#' replace_out = c(1,2,3,4)).
#' @param replace_in vector with states of the data matrix to be replaced, when recode = "paired".
#' This argument must be used in conjunction with the argument "replace_out".
#' @param replace_out vector with states of the data matrix used for replacement, when recode = "paired".
#' This argument must be used in conjunction with the argument "replace_in".
#' @param ... Additional parameters passed to \code{\link{eco.format}}
#' @return A Genepop file in the working directory.
#' @examples 
#' 
#' \dontrun{
#' 
#' data(eco.test)
#' ecogen2genepop(eco, dir = "", outName = "infile.genepop.txt", grp = "pop")
#' # an output file "infile.genepop.txt" is generated in the working directory
#' 
#' }
#' 
#' @author Leandro Roser \email{learoser@@gmail.com}
#' @export


setGeneric("ecogen2genepop", 
           function(eco, dir = "", outName = "infile.genepop.txt", 
                    grp = NULL, nout = 3, sep = "",   
                    recode = c("none", "all", "column", "paired"),
                    replace_in = NULL,
                    replace_out =NULL,
                    ...) {
             
             if(dir != "") {
               #add "/" to the end if path is "xxx/xxx"
               if(!grep("/$", dir)) {
                 dir <- paste0(dir, "/")
               }
             }
             
             nout <- ceiling(nout)
             nout <- c(1,2,3)[c(1,2,3) %in% nout]
             if(length(nout) != 1)  {
               stop("nout must be 1, 2 or 3")
             }

             #check group consistency
             structures <- int.check.group(eco@S, grp = grp, exp.l = nrow(eco@G))
             structures <- as.factor(as.numeric(structures)) #recoding levels
             
             
             X <- eco.format(eco@G, ncod = eco@INT@ncod, 
                             nout = nout,
                             ploidy = eco@INT@ploidy,
                             fill.mode = "first",
                             recode = recode,
                             ...)
             
             X <- cbind(rep(0, nrow(X)), X)
             X[, 1] <- paste(rownames(X), ",")
             
             lista <- list()
             grp <- rep(" ", ncol(eco@G) + 1)
             grp <- as.matrix(t(grp))
             grp[1] <- "POP"
             matriz <- matrix(nrow = 0, ncol = ncol(eco@G)+1)
             for(i in seq_along(levels(structures))) {
               lista[[i]] <- X[structures == i, ]
               lista[[i]] <- rbind(grp, lista[[i]])
               matriz <- rbind(matriz, lista[[i]])
             }
             matriz <- rbind(matriz, rep("", ncol(matriz)))
             
             matriz[, 1] <- as.character(matriz[, 1])
             nombres <- rep("", (ncol(X)) ^ 2)
             nombres <- as.data.frame(matrix(nombres, ncol(X), ncol(X)))
             nombres[, 1] <- c("Data exported from EcoGenetics", colnames(X[, -1]))
             colnames(nombres) <- colnames(matriz)
             matriz <- rbind(as.matrix(nombres), matriz) 
             
             out <- ifelse(dir == "", paste0(getwd(), "/", outName), paste0(dir, outName))
             
             write.table(matriz, file = out, row.names = FALSE,
                         col.names = FALSE, quote = FALSE)
             
             print(paste0("File written to: ", out))
             
           })



#' Importing a Genepop file
#' 
#' @description This function converts a Genepop file into an object 
#' with a genetic matrix (G) and a structures matrix (S).
#' 
#' @param genefile Genepop file.
#' 
#' @return A list with the objects G (genetic matrix) and S (structures matrix).
#'
#' @examples 
#' \dontrun{
#' # ingpop, file with Genepop format in the folder "/extdata" of the package
#' 
#' ecopath <- paste(path.package("EcoGenetics"), "/extdata/ingpop", sep = "")
#' ingpop <- genepop2ecogen(ecopath)
#' ingpop
#' }
#' 
#' @export
#' @author Leandro Roser \email{learoser@@gmail.com}
#  Code adapted from Emiel van Loon and Scott Davis


setGeneric("genepop2ecogen", 
           function(genefile = NULL) {
             
             # read the file into memory
             con <- file(description = genefile, open = "rb")
             lines <- readLines(con)
             close(con)
             fileLength = length(lines)
             
             # checking that the file ends with a newline
             endline <- lines[fileLength]
             endline <- strsplit(endline, " ")[[1]]
             if(any(endline != "")) {
               stop("the file does not end with a newline")
             }
             
             #pop locations ignoring first 'title' line
             popLocations <- grep("POP", lines[2:fileLength], ignore.case=TRUE)
             popLocations <- popLocations + 1 
             
             # get title from first line
             # title <- lines[1]
             
             #get Loci Column names
             lociNames <- lines[2:(popLocations[1]-1)]
             lociNames <- gsub("\t","",lociNames)
             
             # number of individuals and populations
             npop <- length(popLocations)
             initial <- popLocations[1] 
             end <- length(lines)
             rownumber <- end - initial - npop
             
             nombres.loci <- unlist(strsplit( lociNames, '[[:space:]]+'))
             colnumber <- length(nombres.loci)
             
             out <- matrix(0, ncol = colnumber, nrow = rownumber)
             estructuras <- rep(0, rownumber)
             nombres <- character()
             
             #parsing of data between pops into genepop
             counter <- 1
             
             for(i in 1:npop) {
               beginLine <- popLocations[i] + 1
               endLine <- 0
               
               if( i == npop) {  
                 endLine <- length(lines) - 1
                 
               } else {  
                 endLine <- popLocations[i+1] - 1
               }
               
               #parse individual line
               
               for(line in lines[beginLine:endLine]) {
                 
                 #split id & alleles apart
                 individuo <- unlist(strsplit(line, ","))
                 nombres[counter]<- individuo[1]
                 estructuras[counter] <- i
                 haplotipo <- individuo[2]
                 
                 #split alleles apart on whitespace
                 loci <- unlist(strsplit(haplotipo, '[[:space:]]+'))
                 loci <- loci[2:length(loci)]
                 
                 #saving non zero lines into out
                 
                 out[counter, ] <- loci
                 counter <- counter + 1 
               }
             }
             out <- data.frame(out)
             rownames(out) <- nombres
             colnames(out) <- nombres.loci
             estructuras <- data.frame(factor(estructuras))
             colnames(estructuras)[1] <- "pop"
             rownames(estructuras) <- nombres
             list("G" = out, "S" = estructuras)
             
             ecogen(G=out, S=estructuras)
             
           })


#' Conversion form ecogen to genind and genind to ecogen
#' 
#' @description These functions export from ecogen to genind and viceversa
#' @param from Object of class "ecogen" / "genind"
#' @rdname ecogen2genind
#' @examples
#' 
#' \dontrun{
#' 
#' data(eco.test)
#' 
#' # ecogen to genind
#' outGenind <- ecogen2genind(eco)
#' outGenind
#' 
#' # genind to ecogen
#' outEco <- genind2ecogen(outGenind)
#' 
#' }
#' 
#' @author Leandro Roser \email{learoser@@gmail.com}
#' @export



setGeneric("ecogen2genind", function(from) { 
  
  if(!require(adegenet)) stop("Please install the adegenet package first")
  
  to <- adegenet::genind()
  
  if(!any(dim(from@XY) == 0)) {
    to@other$xy <- from@XY
  }
  
  if(!any(dim(from@A) == 0)) {
    to@tab <- from@A
    to@loc.fac <- from@INT@loc.fac
    nomloc <- levels(from@INT@loc.fac)
    temp<- tapply(rep(1, length(from@INT@loc.fac)), from@INT@loc.fac, sum)
    #array to list ("as.list()" do not works with the array)
    temp <- as(temp, "numeric")
    names(temp) <- nomloc
    to@loc.n.all <- temp
    temp <-  tapply(from@INT@all.names, names(from@INT@all.names), function(x) return(unname(x)),simplify = FALSE)
    #reorder temp an convert to list
    temp <- temp[pmatch(nomloc, names(temp))]
    nomloc <- names(temp)
    temp <- as(temp, "list")
    names(temp) <- nomloc
    to@all.names <- temp
    to@ploidy <- rep(from@INT@ploidy, nrow(from@A))
    to@type <- ifelse(from@INT@type == "codominant", "codom", "PA")
  }
  #to@other
  #to@pop
  if(!any(dim(from@S) == 0)) {
    to@strata <- from@S
  }
  #to@hierarchy
  
  to
})

#' genind2ecogen
#' @rdname ecogen2genind
#' @export

setGeneric("genind2ecogen", function(from) { 
  
  if(!require(adegenet)) stop("Please install the adegenet package first")
  
  to <- adegenet::genind2df(from, usepop = FALSE)
  to[to == ""] <- NA
  to <- ecogen(G = to)
 
  if(!is.null(from@other$xy)) {
    if(nrow(from@other$xy) != nrow(to@G)) {
    message("Note: other$xy slot with a different number of rows 
    than the present in the genetic data table. Skipping this data\n")  
    } else {
    ecoslot.XY(to) <- from@other$xy
    }
  }
 
  if(!is.null(from@strata)) {
    if(nrow(from@strata) != nrow(to@G)) {
      message("Note: strata slot with a different number of rows 
      than the present in the genetic data table. Skipping this data\n")  
    } else {
    ecoslot.S(to) <- from@strata
    }
  }
  to
})



#' Conversion from ecogen to gstudio and gstudio to ecogen
#' 
#' @description These functions converts the genetic 
#' data of an ecogen object in a gstudio data frame or viceversa.
#' @param from Input object of class "ecogen" or "gstudio" (depending the direction of conversion)
#' @param type The type of data:  "codominant" (for codominant data); "dominant" for presence - absence data. 
#' 
#' @examples
#' 
#' \dontrun{
#' 
#' data(eco.test)
#' togstudio <- ecogen2gstudio(eco, type = "codominant")
#' togstudio
#' toeco <- gstudio2ecogen(togstudio, ID = "ID", lat = "Latitude", 
#' lon = "Longitude", struct = "pop")
#' toeco
#' # as ID, Latitude and Longitude are column names in the <togstudio> data frame 
#' # (that match default parameter values for gstudio2ecogen), 
#' # the latter is identical to this:
#' toeco <- gstudio2ecogen(togstudio, struct = "pop")
#' toeco
#' }
#' 
#' @author Leandro Roser \email{learoser@@gmail.com}
#' @export

setGeneric("ecogen2gstudio", 
           function(from, type = c("codominant", "dominant")) {
             
             require(gstudio)
             
             type <- match.arg(type)
             if(type == "codominant") {
               dat <- eco.convert(from@G, "matrix", sep.out = ":")
               dat <- as.data.frame(dat, stringsAsFactors = FALSE)
               for(i in 1:ncol(dat)) {
                 class(dat[, i]) <- "locus"
                 #out[[i]] <- gstudio::locus(dat[, i, drop = FALSE], type = "separated")
               }
               dat[is.na(dat)] <- locus(NA)
               colnames(dat) <- colnames(from@G)
               
               #dominant case
             } else {
               dat<-eco@G
               dat <- as.data.frame(dat, stringsAsFactors = FALSE)
               for(i in 1:ncol(dat)) {
                 class(dat[, i]) = "locus"
               }
               dat[is.na(dat)] <- locus(NA)
               colnames(dat) <- colnames(from@G)
             }
             
             #create output data frame
             if(all(dim(from@XY) != 0)) {
               to <- data.frame(ID = rownames(from@XY), Latitude = from@XY[,2],  Longitude = from@XY[,1])
             }
             if(all(dim(from@S) != 0)) {
               to <- data.frame(to, from@S)
             }
             to <- data.frame(to, dat)
             
             to
           })


#' Conversion from gstudio to ecogen
#' 
#' @param ID name of the column with ID (default "ID") 
#' @param lat name of the column with latitude (default "Latitude")
#' @param lon name of the column with longitude (default "Longitude") 
#' @param struct vector with name of the columns with structures (default NULL)
#' @rdname ecogen2gstudio
#' @export


setGeneric("gstudio2ecogen", function(from, ID = "ID", 
                                      lat = "Latitude", lon = "Longitude",
                                      struct = NULL) {
  
  myID <- myLat <- myLon <- NULL
  
  myID <- match(ID, colnames(from))
  if(is.na(myID)) {
    message(paste0("Note: <", ID,"> does not match the name of any column. Creating generic labels"))
    myID <- paste0("I.", seq_len(nrow(from)))
  } else {
    myID <- from[, myID]
  }

    myLat <- match(lat, colnames(from))
    if(is.na(myLat)) {
      message(paste0("Note: <", lat, "> does not match the name of any column"))
    }
  
    myLon <- match(lon, colnames(from))
    if(is.na(myLon)) {
      message(paste0("Note: <", lon, "> does not match the name of any column"))
    }

  if(!is.null(struct)) {
    myStruct <- match(struct, colnames(from))
    if(!is.na(myStruct) && length(myStruct) < length(struct)) {
      stop("some of all names of structures are not matching with the name of columns in 'from'")
    }
  } 
  
  if(is.na(myLat) && !is.na(myLon) || !is.na(myLat) && is.na(myLon)) {
    stop("Longitude requires Latitude data an viceversa, one of both is NA")
  }
  
  to<- ecogen()
  
  cuales <- sapply(from, function(x) class(x)[1])
  myloc <- from[, cuales == "locus"]
  myloc <- as.matrix(myloc)
  rownames(myloc) <- myID
  myloc[myloc == ""] <- NA
  #G <- eco.convert(myloc, sep.in = ":", sep.out = "")
  ecoslot.G(to, sep =":") <- myloc
  
  if(!is.na(myLat) && !is.na(myLon)) {
    XY <- data.frame(Longitude = from[, myLon], Latitude = from[, myLat])
    rownames(XY) <- myID
    ecoslot.XY(to) <- XY
  }
  
  if(!is.null(struct)) {
    S <- data.frame(from[, myStruct])
    rownames(S) <- myID
    ecoslot.S(to) <- S
  }
  
  to@ATTR$names <- myID
  validObject(to)
  to
  
})



#' Converting an ecogen genetic data frame into a hierfstat data frame
#' 
#' @description This function converts the genetic 
#' data of an ecogen object into a hierfstat data frame. 
#' @param eco Object of class "ecogen".
#' @param pop The name of the S slot column with the groups 
#' for the hierfstat data frame.
#' @param to_numeric Recode the genetic data into numeric format? If TRUE, 
#' the functions performs the correction via \code{\link{eco.format}}.
#' Additional formatting parameters can be passed to this function.
#' @param nout Number of digits in the output when to_numeric = TRUE.
#' @param recode Recode mode when to_numeric = TRUE: "all" for recoding
#' the data considering all the individuals values at once (e.g., protein data), 
#' "column" for recoding the values by column (e.g., microsatellite data), "paired" 
#' for passing the values of allelic states and corresponding replacement values, using 
#' the replace_in and replace_out arguments (e.g. replace_in = c("A", "T", "C", "G"),
#' replace_out = c(1,2,3,4)).
#' @param replace_in vector with states of the data matrix to be replaced, when recode = "paired".
#' This argument must be used in conjunction with the argument "replace_out".
#' @param replace_out vector with states of the data matrix used for replacement, when recode = "paired".
#' This argument must be used in conjunction with the argument "replace_in".
#' @param ... Additional parameters passed to \code{\link{eco.format}} when to_numeric = TRUE
#' 
#' @examples
#' 
#' \dontrun{
#' 
#' data(eco.test)
#' hiereco <- ecogen2hierfstat(eco, "pop", to_numeric = TRUE)
#' require("hierfstat")
#' basic.stats(hiereco)
#' 
#' }
#' 
#' @author Leandro Roser \email{learoser@@gmail.com}
#' @export


setGeneric("ecogen2hierfstat", 
           function(eco, pop = NULL, to_numeric = FALSE, nout = 3, 
                    recode = c("all", "column", "paired"),
                    replace_in = NULL,
                    replace_out =NULL,
                    ...) {
             
             recode <- match.arg(recode)
             
             u <- eco@G
             
             # check that the data is in numeric format, using the first <= 20 columns
            
             u <- int.check.to_numeric(u, to_numeric = to_numeric, 
                                       nout = nout, recode = recode, 
                                       replace_in = replace_in, replace_out = replace_out,
                                       ploidy = eco@INT@ploidy,
                                       ncod = eco@INT@ncod, ...)
             
             groups <- eco@S
             
             if(is.null(pop))
             {
               factord <- as.data.frame(rep(1, nrow(u)))
               cnom <- "pop"
               rnom <- rownames(eco@G)
               Gord <- u
             } else {
               
               pop <- match(pop, colnames(groups), nomatch = 0)
               pop <- pop[pop != 0]
               if(length(pop) == 0) {
                 stop("incorrect factor name")
               }
               orden <- order(groups[, pop])
               Gord <- u[orden,]
               factord <- groups[orden, pop]
               factord <- as.numeric(factord)
               cnom <- colnames(groups[pop])
               rnom <- rownames(eco@G)[orden]
             }
             
             datahier <- data.frame(factord, Gord)
             colnames(datahier)[1] <- cnom
             rownames(datahier) <- rnom
             datahier
             
             #class control
             clases <- character()
             j <- 1
             for(i in 2:ncol(datahier)) {
               clases[j] <- class(datahier[, i])[1]
               j <- j + 1
             }
             if(any(clases != "numeric" | clases != "integer")) {
               datahier <- as.matrix(datahier)
               colhier <- ncol(datahier)
               rowhier <- nrow(datahier)
               datahier <- matrix(as.numeric(datahier), ncol = colhier, nrow= rowhier)
               datahier <- as.data.frame(datahier)
               datahier[, 1] <- as.factor(datahier[, 1])
             }
             
             rownames(datahier) <- rownames(eco@G)
             colnames(datahier)[1] <- "Pop"
             colnames(datahier)[-1] <- colnames(eco@G)
             
             datahier
             
           })


#' Exporting an ecogen genetic data frame into SPAGeDi format
#' 
#' @description This function converts the genetic data of an ecogen object 
#' in a SPAGeDi input file.  
#' When distance classes are required, they can be constructed by combining 
#' the parameters "int", "smin", "smax", "nclass", "seqvec" and "size", as
#' described in the function \code{\link{eco.lagweight}}.
#' A distance matrix can also be included using the "distmat" parameter.
#' Missing data must be coded as a single "NA" in the G data frame. 
#' @param eco Object of class "ecogen".
#' @param dir output path. Default = "" (current directory).
#' @param pop The name of the S slot column with the groups 
#' for the output data. The default option includes all the individuals into 
#' a single group.
#' @param ndig Number of digits coding each allele in the output file
#'  (e.g., 1: x, 2: xx, or 3: xxx). If NULL, the vale will be deduced from
#'  the number of digits used for coding alleles in the ecogen object.
#' @param outName The name of the output file.
#' @param int Distance interval in the units of the XY slot data.
#' @param smin Minimum class distance in the units of the XY slot data.
#' @param smax Maximum class distance in the units of the XY slot data.
#' @param nclass Number of classes.
#' @param seqvec Vector with breaks in the units of the XY slot data.
#' @param size Number of individuals per class.
#' @param bin Rule for constructing intervals when a partition parameter (int, 
#' nclass or size) is not given. Default is Sturge's rule (Sturges, 1926). Other
#' option is Freedman-Diaconis method (Freedman and Diaconis, 1981).
#' @param distmat Distance matrix to include (optional).
#' @param latlon Are the coordinates in decimal degrees format? Defalut FALSE. If TRUE,
#' the coordinates must be in a matrix/data frame with the longitude in the first
#' column and latitude in the second. The position is projected onto a plane in
#' meters with the function \code{\link[SoDA]{geoXY}}.
#' @param to_numeric Recode the genetic data into numeric format? If TRUE, 
#' the functions performs the correction via \code{\link{eco.format}}.
#' Additional formatting parameters can be passed to this function.
#' @param nout Number of digits in the output when to_numeric = TRUE.
#' @param recode Recode mode when to_numeric = TRUE: "all" for recoding
#' the data considering all the individuals values at once (e.g., protein data), 
#' "column" for recoding the values by column (e.g., microsatellite data), "paired" 
#' for passing the values of allelic states and corresponding replacement values, using 
#' the replace_in and replace_out arguments (e.g. replace_in = c("A", "T", "C", "G"),
#' replace_out = c(1,2,3,4)).
#' @param replace_in vector with states of the data matrix to be replaced, when recode = "paired".
#' This argument must be used in conjunction with the argument "replace_out".
#' @param replace_out vector with states of the data matrix used for replacement, when recode = "paired".
#' This argument must be used in conjunction with the argument "replace_in".
#' @param ... Additional parameters passed to \code{\link{eco.format} when to_numeric = TRUE}
#' @examples
#' 
#' \dontrun{
#' 
#' data(eco.test)
#' ecogen2spagedi(eco, dir = "", pop = "pop", ndig = 1,int=2, smax=6, outName="infile.spagedi.txt")
#' 
#' }
#' 
#' @references 
#' 
#' Freedman D., and P. Diaconis. 1981. On the histogram as a density estimator: 
#' L 2 theory. Probability theory and related fields, 57: 453-476.
#' 
#' Hardy O. and X Vekemans. 2002. SPAGeDi: a versatile computer program 
#' to analyse spatial genetic structure at the individual or population levels. 
#' Molecular ecology notes, 2: 18-620.
#' 
#' Sturges  H. 1926. The choice of a class interval. Journal of the American 
#' Statistical Association, 21: 65-66.
#' 
#' @author Leandro Roser \email{learoser@@gmail.com}
#' @export


setGeneric("ecogen2spagedi", 
                        function(eco, 
                         pop = NULL, 
                         ndig = NULL,
                         dir = "",
                         outName = "infile.spagedi.txt", 
                         smin = 0,
                         smax= NULL,
                         int = NULL, 
                         nclass = NULL,
                         seqvec = NULL,
                         size = NULL,
                         bin = c("sturges", "FD"),
                         distmat = NULL,
                         latlon = FALSE,
                         to_numeric = FALSE,
                         nout = 3, 
                         recode = c("all", "column", "paired"),
                         replace_in = NULL,
                         replace_out =NULL,
                         ...) {
                          
 if(dir != "") {
 #add "/" to the end if path is "xxx/xxx"
 if(!grep("/$", dir)) {
 dir <- paste0(dir, "/")
  }
}
  
                          
  recode <- match.arg(recode)                        
  bin <- match.arg(bin)
  if(is.null(pop)) {
    pop <- rep(1, nrow(eco@XY))
  } else {
    pop <- as.numeric(eco@S[, which(colnames(eco@S)== pop)])
  }
  
  if(is.null(ndig)) {
  ndig <-eco@INT@ncod
  }
  
  if(sum(pop == 0)) {
    stop("non matching S column name")
  }
  
  # check that the data is in numeric format, using the first <= 20 columns
  
  gmat <- int.check.to_numeric(eco@G, to_numeric = to_numeric, 
                            nout = ndig, recode = recode, ploidy = eco@INT@ploidy,
                            ncod = eco@INT@ncod, ...)
  gmat <- as.matrix(gmat)
  
  ploidy <- eco@INT@ploidy
  gmat[gmat == "NA" | is.na(gmat)] <- paste(rep("0", ploidy), collapse="")
  
  matriz <- data.frame(rownames(eco@G), pop, eco@XY, gmat)
  matriz <- as.matrix(matriz)
  colnames(matriz) <- c("Individual", "Population",
                        colnames(eco@XY), colnames(eco@G))
  
  arriba <- rep("", ncol(matriz))
  arriba[1] <- nrow(matriz)
  arriba[2] <- max(pop)
  arriba[3] <- ncol(eco@XY)
  arriba[4] <- ncol(eco@G)
  arriba[5] <- ndig
  arriba[6] <- eco@INT@ploidy
  
  if(!is.null(smax) | !is.null(seqvec)) {
    
    xy <- eco@XY[,1:2]
    if(latlon) {
      dist(SoDA::geoXY(xy[,2], xy[,1], unit=1))
    }
    
    input <- int.break(XY = xy, 
                       int = int, 
                       smin = smin,
                       smax = smax,
                       size = size,
                       nclass = nclass,
                       seqvec = seqvec,
                       latlon = latlon,
                       bin = bin)
    
    breakpoints <- input$breakpoints
    
  } else  {
    breakpoints <- NULL
  }
  
  final <- rep("", ncol(matriz))
  final[1] <- "END"
  
  # start to write file here
  out <- ifelse(dir == "", paste0(getwd(), "/", outName), paste0(dir, outName))
  sink(out)
  cat(arriba, sep = "\t")
  cat("\n")
  if(!is.null(breakpoints)) {
    cat(length(breakpoints), breakpoints, sep = "\t")
    cat("\n")
  } else {
    cat(0)
  }
  cat("\n")
  cat(colnames(matriz), sep = "\t")
  cat("\n")
  write.table(matriz, sep = "\t", quote = FALSE, row.names = FALSE, 
              col.names = FALSE)
  cat(final, sep = "\t")
  
  if(!is.null(distmat)) {
    if(class(distmat)[1] == "dist") {
      distmat <- as.matrix(distmat)
    } 
    if(class(distmat)[1] != "matrix" & class(distmat)[1] != "data.frame") {
      stop("invalid distance matrix format (It should be of class: dist, matrix or data.frame")
    }
    distnames <- rownames(distmat)
    distmat<-data.frame(distnames, distmat)
    colnames(distmat)[1]<-paste("M", nrow(eco@XY), sep="")
    cat("\n")
    write.table(distmat, sep = "\t", quote = FALSE, row.names = FALSE, 
                col.names = TRUE)
    cat("END")
  }
  
  sink()
  
  # output message
  print(paste0("File written to: ", out))
  
})


#' Importing a SPAGeDi file, via conversion to ecogen
#' 
#' @description This function converts a SPAGeDi file into a ecogen object

#' @param infile Path to the SPAGeDi file.
#' @param sep Character separating alleles (codominant data). 
#' Default option is no character separating alleles.
#' @param missCode characters to represent missing genotypes in codominant markers. 
#' If NULL, is computed as "0" times the numer of characters coding alleles.
#' @param type Marker type: "codominant" or "dominant".
#' @param ... additional arguments passed to \code{ecogen}

#' @examples
#' 
#' \dontrun{
#' 
#' data(eco.test)
#' ecogen2spagedi(eco, dir = "", pop = "pop", ndig = 1,int=2, smax=6, outName="infile.spagedi.txt")
#' spagedi2ecogen("infile.spagedi.txt", sep = "")
#' }
#' 
#' 
#' @author Leandro Roser \email{learoser@@gmail.com}
#' @export



setGeneric("spagedi2ecogen", 
           function(infile, sep = "", missCode = NULL, 
                    type = c("codominant", "dominant"), 
                    ...) {
  
  type <- match.arg(type)
  # get lines
  Lines <- readLines(infile)
  Lines <- Lines[Lines != ""]
  # remove comments (comments start with //)
  com <-sapply(Lines, substring, first=1, last=2)
  Lines<-Lines[com != "//"]
  
  # get data from the first line
  fileheader <- as.integer(strsplit(Lines[1], "\t")[[1]])
  
  # number of individuals
  nind<-fileheader[1] 
  # Number of categories
  ncat<-fileheader[2] 
  # Number of spatial coordinates
  nXY<- fileheader[3]
  # Number of digits to represent alleles
  ndig<-fileheader[5] 
  
  # Read the rest of the file as a table and converto to ecogen
  myLines <- Lines[3:(3+nind)]
  myLines <- strsplit(myLines, "\t")
  myLines <- do.call("rbind", myLines)
  colnames(myLines) <- myLines[1, ]
  myLines <- myLines[-1, ]
  myLines <- gsub(" ", "", myLines)
  rownames(myLines) <- myLines[, 1]
  myLines <- myLines[, -1, drop = FALSE]
  
  if(ncat != 0) {
    S <-myLines[, 1, drop = FALSE]
    G <- myLines[, -1, drop = FALSE]
  } else  {
    S <- data.frame()
  }
  
  if(nXY > 0) {
    XY <- G[, seq_len(nXY), drop = FALSE]
    G <- G[, -seq_len(nXY), drop = FALSE]
  } else {
    XY <- data.frame()
  }
  
  
  # if is null missing, missing is 0 times the number of characters coding alleles
  if(type == "codominant") {
    if(is.null(missCode)) {
      miss <- paste(rep(0, ndig), collapse = "")
    }
    
    # remove missing individuals
    G[as.numeric(G) == 0] <- miss
    
    # replace incomplete individuals by missing data
    G <- gsub(paste0("(^", miss, ".*)|(.*", miss, "$)"), miss, G)
  }
  
  ecogen(XY = XY, G = G, S = S, missing = miss, type = type, ...)
  
})
Any scripts or data that you put into this service are public.
EcoGenetics documentation built on July 8, 2020, 5:46 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
EcoGenetics
Management and Exploratory Analysis of Spatial Data in Landscape Genetics

R/ecogen.6OF6.converters.R
In EcoGenetics: Management and Exploratory Analysis of Spatial Data in Landscape Genetics

Try the EcoGenetics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EcoGenetics Management and Exploratory Analysis of Spatial Data in Landscape Genetics

R/ecogen.6OF6.converters.R In EcoGenetics: Management and Exploratory Analysis of Spatial Data in Landscape Genetics

Try the EcoGenetics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EcoGenetics
Management and Exploratory Analysis of Spatial Data in Landscape Genetics

R/ecogen.6OF6.converters.R
In EcoGenetics: Management and Exploratory Analysis of Spatial Data in Landscape Genetics