R/DeltaDcorplot.R

Defines functions DeltaDcorplot

Documented in DeltaDcorplot

##### Plot corplot for pairwise differentiation.1 (Delta_D.1)

DeltaDcorplot = function(x, ncode) {
    #diveRsity::readGenepop

  arp2gen=function (infile)
  {
    flForm <- strsplit(infile, split = "\\.")[[1]]
    if (substr(infile, 1, 2) == "./") {
      flForm <- flForm[-1]
    }
    else if (substr(infile, 1, 3) == "../") {
      flForm <- flForm[-(1:2)]
    }
    if (length(flForm) > 3) {
      stop("There were multiple '.' characters in your file name!")
    }
    tstfile <- paste(flForm[1], ".gen", sep = "")
    if (!file.exists(tstfile)) {
      fastScan <- function(fname) {
        s <- file.info(fname)$size
        buf <- readChar(fname, s, useBytes = TRUE)
        if (length(grep("\r", buf)) != 0L) {
          buf <- gsub("\r", "\n", buf)
          buf <- gsub("\n\n", "\n", buf)
        }
        return(strsplit(buf, "\n", fixed = TRUE, useBytes = TRUE)[[1]])
      }
      dat <- fastScan(infile)
      dat <- gsub("^\\s+|\\s+$", "", dat)
      dataType <- grep("*datatype=*", tolower(dat))
      if (strsplit(dat[dataType], "=")[[1]][2] != "MICROSAT") {
        stop("Data are not in 'MICROSAT' format!")
      }
      missDataLine <- grep("*missingdata=*", tolower(dat))
      missData <- noquote(substr(dat[missDataLine], nchar(dat[missDataLine]) -
                                   1, nchar(dat[missDataLine]) - 1))
      sampSizeLine <- grep("*samplesize=*", tolower(dat))
      if (length(sampSizeLine) > 1) {
        sampNpos <- sapply(sampSizeLine, function(i) {
          return(regexpr("=", dat[i])[1])
        })
      }
      popSizes <- as.numeric(substr(dat[sampSizeLine], start = sampNpos +
                                      1, stop = nchar(dat[sampSizeLine])))
      npops <- length(popSizes)
      sampStrt <- grep("*sampledata=*", tolower(dat))
      strts <- sapply(sampStrt, function(x) {
        if (dat[(x + 1)] == "") {
          return(x + 2)
        }
        else {
          return(x + 1)
        }
      })
      ends <- strts + ((popSizes * 2) - 1)
      nloci <- length(strsplit(dat[strts[1]], split = "\\s+")[[1]]) -
        2
      popGeno <- lapply(seq_along(strts), function(i) {
        return(dat[strts[i]:ends[i]])
      })
      popSzcheck <- sapply(popGeno, function(x) length(x)/2)
      if (!all(identical(popSzcheck, popSizes))) {
        stop("Failed! Please make sure that your file is formatted correctly.")
      }
      popIdx <- lapply(popGeno, function(x) {
        return(seq(1, length(x), 2))
      })
      popList <- lapply(seq_along(popGeno), function(i) {
        al1 <- matrix(unlist(strsplit(popGeno[[i]][popIdx[[i]]],
                                      split = "\\s+")), nrow = popSizes[i], byrow = TRUE)[,
                                                                                          -(1:2)]
        al2 <- matrix(unlist(strsplit(popGeno[[i]][(popIdx[[i]] +
                                                      1)], split = "\\s+")), nrow = popSizes[i], byrow = TRUE)
        tst <- matrix(paste(al1, al2, sep = ""), nrow = popSizes[i])
        tst <- cbind(paste(rep("pop", nrow(tst)), i, " ,",
                           sep = ""), tst)
        rm(al1, al2)
        z <- gc()
        rm(z)
        if (nchar(tst[1, 2]) == 4) {
          tst[tst == paste(missData, missData, sep = "")] <- "0000"
        }
        else {
          tst[tst == paste(missData, missData, sep = "")] <- "000000"
        }
        out <- apply(tst, 1, function(x) {
          return(paste(x, collapse = "\t"))
        })
        out <- c("POP", out)
        rm(tst)
        z <- gc()
        rm(z)
        return(out)
      })
      outfile <- strsplit(infile, "\\.")[[1]]
      if (length(outfile) >= 2) {
        outfile <- paste(outfile[-length(outfile)], collapse = ".")
      }
      else {
        outfile <- outfile[1]
      }
      loci <- paste("locus", 1:nloci, sep = "")
      loci <- c(paste(outfile, "_gen_converted", sep = ""),
                loci)
      of <- c(loci, unlist(popList))
      out <- file(paste(outfile, ".gen", sep = ""), "w")
      for (i in 1:length(of)) {
        cat(of[i], "\n", file = out, sep = "")
      }
      close(out)
      return(TRUE)
    }
    else {
      return(NULL)
    }
  }
  fileReader=function (infile)
  {
    if (typeof(infile) == "list") {
      return(infile)
    }
    else if (typeof(infile) == "character") {
      flForm <- strsplit(infile, split = "\\.")[[1]]
      ext <- flForm[[length(flForm)]]
      if (ext == "arp") {
        convRes <- arp2gen(infile)
        if (!is.null(convRes)) {
          cat("Arlequin file converted to genepop format! \n")
          infile <- paste(flForm[1], ".gen", sep = "")
        }
        else {
          infile <- paste(flForm[1], ".gen", sep = "")
        }
      }
      dat <- scan(infile, sep = "\n", what = "character", quiet = TRUE)
      if (length(strsplit(dat[4], split = "\\s+")[[1]][-1]) >
          1) {
        locs <- strsplit(dat[2], split = "\\s+")[[1]]
        if (length(locs != 1)) {
          locs <- strsplit(dat[2], split = ",")[[1]]
        }
        locs <- as.character(sapply(locs, function(x) {
          x <- strsplit(x, split = "")[[1]]
          if (is.element(",", x)) {
            x <- x[-(which(x == ","))]
          }
          return(paste(x, collapse = ""))
        }))
        dat <- c(dat[1], locs, dat[-(1:2)])
      }
      popLoc <- grep("^([[:space:]]*)pop([[:space:]]*)$", tolower(dat))
      no_col <- popLoc[1] - 1
      if (popLoc[1] == 3) {
        locs <- unlist(strsplit(dat[2], split = c("\\,",
                                                  "\\s+")))
        dat <- c(dat[1], locs, dat[3:length(dat)])
      }
      popLoc <- grep("^([[:space:]]*)pop([[:space:]]*)$", tolower(dat))
      no_col <- popLoc[1] - 1
      dat1 <- sapply(dat, function(x) {
        x <- unlist(strsplit(x, split = "\\s+"))
        if (is.element("", x)) {
          x <- x[-(which(x == ""))]
        }
        if (is.element(",", x)) {
          x <- x[-(which(x == ","))]
        }
        if (length(x) != 1 && length(x) != no_col) {
          x <- paste(x, collapse = "")
        }
        if (length(x) < no_col) {
          tabs <- paste(rep(NA, (no_col - length(x))),
                        sep = "\t", collapse = "\t")
          line <- paste(x, tabs, sep = "\t")
          line <- unlist(strsplit(line, split = "\t"))
          return(line)
        }
        else {
          return(x)
        }
      })
    }
    out <- as.data.frame(t(dat1))
    rownames(out) <- NULL
    return(out)
  }
  readGenepop=function (infile = NULL, gp = 3, bootstrap = FALSE)
  {
    gp = gp
    infile = infile
    bootstrap = bootstrap
    data1 <- fileReader(infile)
    if (gp == 3) {
      data1[data1 == 0] <- NA
      data1[data1 == "999999"] <- NA
      data1[data1 == "000000"] <- NA
      data1[data1 == "NANA"] <- NA
    }
    else if (gp == 2) {
      data1[data1 == 0] <- NA
      data1[data1 == "9999"] <- NA
      data1[data1 == "0000"] <- NA
      data1[data1 == "NA"] <- NA
    }
    raw_data <- data1
    npops <- length(which(toupper(data1[, 1]) == "POP"))
    pop_pos <- c(which(toupper(data1[, 1]) == "POP"), (nrow(data1) +
                                                         1))
    pop_sizes <- vector()
    for (i in 1:npops) {
      pop_sizes[i] <- pop_pos[(i + 1)] - pop_pos[i] - 1
    }
    pop_names <- as.character(data1[(pop_pos[1:npops] + 1), 1])
    pop_weights <- 1/pop_sizes
    n_harmonic <- npops/sum(pop_weights)
    N <- pop_sizes
    nloci <- (pop_pos[1] - 2)
    if (nloci != (ncol(raw_data) - 1)) {
      stop("Check your input file for formatting errors!")
    }
    loci_names <- as.vector(data1[2:(pop_pos[1] - 1), 1])
    pop_list <- list()
    for (i in 1:npops) {
      pop_list[[i]] <- as.matrix(data1[(pop_pos[i] + 1):(pop_pos[(i +
                                                                    1)] - 1), 2:(nloci + 1)])
    }
    extCheck <- sapply(1:length(pop_list), function(i) {
      sum(is.na(pop_list[[i]])) == nloci * pop_sizes[i]
    })
    if (sum(extCheck) > 0) {
      npops <- npops - sum(extCheck)
      pop_list <- pop_list[-(which(extCheck == TRUE))]
      pop_sizes <- pop_sizes[-(which(extCheck == TRUE))]
      pop_names <- pop_names[-(which(extCheck == TRUE))]
      pop_weights <- pop_weights[-(which(extCheck == TRUE))]
      N <- N[-(which(extCheck == TRUE))]
      noPop <- which(extCheck == TRUE)
      indexer <- lapply(noPop, function(i) {
        (pop_pos[i] + 1):(pop_pos[(i + 1)])
      })
      indexer <- unlist(indexer)
      raw_data <- raw_data[-(indexer), ]
    }
    if (gp == 3) {
      plMake <- function(x) {
        out <- matrix(sprintf("%06g", as.numeric(x)), nrow = nrow(x),
                      ncol = ncol(x))
        if (Sys.info()["sysname"] == "Darwin") {
          out[out == "0000NA"] <- "    NA"
        }
        return(out)
      }
    }
    else if (gp == 2) {
      plMake <- function(x) {
        out <- matrix(sprintf("%04g", as.numeric(x)), nrow = nrow(x),
                      ncol = ncol(x))
        if (Sys.info()["sysname"] == "Darwin") {
          out[out == "00NA"] <- "  NA"
        }
        return(out)
      }
    }
    suppressWarnings(pop_list <- lapply(pop_list, plMake))
    if (gp == 3) {
      for (i in 1:npops) {
        pop_list[[i]][pop_list[[i]] == "    NA"] <- NA
      }
    }
    else if (gp == 2) {
      for (i in 1:npops) {
        pop_list[[i]][pop_list[[i]] == "  NA"] <- NA
      }
    }
    if (bootstrap == T) {
      bs <- function(x) {
        return(matrix(x[sample(nrow(x), replace = TRUE),
                        ], ncol = ncol(x)))
      }
      pop_list <- lapply(pop_list, bs)
    }
    lps <- function(x) {
      lsp_count <- as.vector(colSums(!is.na(x)))
      return(lsp_count)
    }
    pre_loci_pop_sizes <- lapply(pop_list, lps)
    pls <- matrix(ncol = nloci, nrow = npops)
    for (i in 1:length(pre_loci_pop_sizes)) {
      pls[i, ] <- pre_loci_pop_sizes[[i]]
    }
    loci_pop_sizes <- split(pls, col(pls))
    pre_loc_weights <- 1/pls
    loci_pop_weights1 <- split(pre_loc_weights, col(pre_loc_weights))
    loci_harm_N <- npops/colSums(pre_loc_weights)
    if (gp == 3) {
      pl_ss <- function(x) {
        pl <- list()
        pl[[1]] <- matrix(substr(x, 1, 3), ncol = nloci)
        pl[[2]] <- matrix(substr(x, 4, 6), ncol = nloci)
        return(pl)
      }
    }
    else {
      pl_ss <- function(x) {
        pl <- list()
        pl[[1]] <- matrix(substr(x, 1, 2), ncol = nloci)
        pl[[2]] <- matrix(substr(x, 3, 4), ncol = nloci)
        return(pl)
      }
    }
    pop_alleles <- lapply(pop_list, pl_ss)
    alln <- function(x) {
      res <- list()
      for (i in 1:ncol(x[[1]])) {
        res[i] <- list(sort(unique(c(x[[1]][, i], x[[2]][,
                                                         i])), decreasing = F))
      }
      return(res)
    }
    allele_names <- lapply(pop_alleles, alln)
    loci_combi <- allele_names[[1]]
    for (j in 1:nloci) {
      for (i in 2:npops) {
        loci_combi[[j]] <- c(loci_combi[[j]], allele_names[[i]][[j]])
      }
    }
    aaList <- function(x) {
      return(sort(unique(x, decreasing = FALSE)))
    }
    all_alleles <- lapply(loci_combi, aaList)
    aa <- all_alleles
    aa <- lapply(aa, FUN = list, npops)
    afMatrix <- function(x) {
      np <- x[[2]]
      z <- matrix(rep(0, (np * length(x[[1]]))), ncol = np,
                  nrow = length(x[[1]]))
      rownames(z) <- x[[1]]
      return(z)
    }
    allele_freq <- lapply(aa, afMatrix)
    parbind <- function(x) {
      rbind(x[[1]], x[[2]])
    }
    pa1 <- lapply(pop_alleles, parbind)
    afTab <- function(x) {
      lapply(1:ncol(x), function(i) {
        return(table(x[, i]))
      })
    }
    actab <- lapply(pa1, afTab)
    afs <- function(x) {
      afsint <- function(y) {
        length(na.omit(y))/2
      }
      apply(x, 2, afsint)
    }
    indtyppop <- lapply(pa1, afs)
    afCalcpop <- lapply(1:length(actab), function(x) {
      lapply(1:length(actab[[x]]), function(y) {
        actab[[x]][[y]]/(indtyppop[[x]][y] * 2)
      })
    })
    obs_count <- allele_freq
    for (i in 1:npops) {
      for (j in 1:nloci) {
        allele_freq[[j]][names(afCalcpop[[i]][[j]]), i] <- afCalcpop[[i]][[j]]
        obs_count[[j]][names(actab[[i]][[j]]), i] <- actab[[i]][[j]]
      }
    }
    indtyp <- list()
    for (i in 1:nloci) {
      indtyp[[i]] <- vector()
    }
    for (i in 1:npops) {
      for (j in 1:nloci) {
        indtyp[[j]][i] <- indtyppop[[i]][j]
      }
    }
    if (bootstrap == T) {
      ind_vectors <- list()
      for (i in 1:npops) {
        ind_vectors[[i]] <- noquote(paste(rep(i, pop_sizes[i]),
                                          ",", sep = ""))
      }
      pre_data <- matrix(rep("\t", ((nloci + 1) * (nloci +
                                                     1))), ncol = (nloci + 1))
      pre_data[1, ] <- c("Title", rep("\t", nloci))
      for (i in 2:(nloci + 1)) {
        pre_data[i, 1] <- loci_names[(i - 1)]
      }
      pop_data <- list()
      for (i in 1:npops) {
        pop_data[[i]] <- matrix(rbind(c("POP", as.vector(rep("\t",
                                                             nloci))), cbind(ind_vectors[[i]], pop_list[[i]])),
                                ncol = (nloci + 1))
      }
      bs_data_file <- matrix(rbind(pre_data, pop_data[[1]]),
                             ncol = (nloci + 1))
      for (i in 2:npops) {
        bs_data_file <- matrix(rbind(bs_data_file, pop_data[[i]]),
                               ncol = (nloci + 1))
      }
      bs_data_file <- data.frame(bs_data_file)
    }
    nalleles <- vector()
    for (i in 1:nloci) {
      nalleles[i] <- nrow(allele_freq[[i]])
    }
    if (bootstrap == T) {
      list(npops = npops, nloci = nloci, pop_alleles = pop_alleles,
           pop_list = pop_list, loci_names = loci_names, pop_pos = pop_pos,
           pop_sizes = pop_sizes, allele_names = allele_names,
           all_alleles = all_alleles, allele_freq = allele_freq,
           raw_data = raw_data, loci_harm_N = loci_harm_N, n_harmonic = n_harmonic,
           pop_names = pop_names, indtyp = indtyp, nalleles = nalleles,
           bs_file = bs_data_file, obs_allele_num = obs_count)
    }
    else if (bootstrap == F) {
      list(npops = npops, nloci = nloci, pop_alleles = pop_alleles,
           pop_list = pop_list, loci_names = loci_names, pop_pos = pop_pos,
           pop_sizes = pop_sizes, allele_names = allele_names,
           all_alleles = all_alleles, allele_freq = allele_freq,
           raw_data = raw_data, loci_harm_N = loci_harm_N, n_harmonic = n_harmonic,
           pop_names = pop_names, indtyp = indtyp, nalleles = nalleles,
           obs_allele_num = obs_count)
    }
  }
    gp = ncode
    fr = readGenepop(x, gp, bootstrap = FALSE)
    af = fr$allele_freq
    DeltaD = function(abun, struc) {
        ## Chao et al, 2017
        n = sum(abun)
        N = ncol(abun)
        ga = rowSums(abun)
        gp = ga[ga > 0]/n
        G = sum(-gp * log(gp))
        H = nrow(struc)
        A = numeric(H - 1)
        W = numeric(H - 1)
        Diff = numeric(H - 1)
        wi = colSums(abun)/n
        W[H - 1] = -sum(wi[wi > 0] * log(wi[wi > 0]))
        pi = sapply(1:N, function(k) abun[, k]/sum(abun[, k]))
        Ai = sapply(1:N, function(k) -sum(pi[, k][pi[, k] > 0] * log(pi[, k][pi[, k] > 0])))
        A[H - 1] = sum(wi * Ai)
        if (H > 2) {
            for (i in 2:(H - 1)) {
                I = unique(struc[i, ])
                NN = length(I)
                ai = matrix(0, ncol = NN, nrow = nrow(abun))
                c
                for (j in 1:NN) {
                  II = which(struc[i, ] == I[j])
                  if (length(II) == 1) {
                    ai[, j] = abun[, II]
                  } else {
                    ai[, j] = rowSums(abun[, II])
                  }
                }
                pi = sapply(1:NN, function(k) ai[, k]/sum(ai[, k]))
                wi = colSums(ai)/sum(ai)
                W[i - 1] = -sum(wi * log(wi))
                Ai = sapply(1:NN, function(k) -sum(pi[, k][pi[, k] > 0] * log(pi[, k][pi[, k] > 0])))
                A[i - 1] = sum(wi * Ai)
            }
        }
        Diff[1] = (G - A[1])/W[1]
        if (H > 2) {
            for (i in 2:(H - 1)) {
                Diff[i] = (A[i - 1] - A[i])/(W[i] - W[i - 1])
            }
        }
        Diff = Diff
        out = matrix(c(Diff), ncol = 1)
        return(out)
    }

    v1 = c("ecosystem", "region1", "pop1")
    v2 = c("ecosystem", "region1", "pop2")
    str = data.frame(v1, v2)
    str = as.matrix(str)
    npops = fr$npops
    nloci = fr$nloci
    Dmat = list()
    for (l in 1:nloci) {
        Dmat[[l]] = matrix(data = 0, nrow = npops, ncol = npops)
        for (i in 1:npops) {
            for (j in 1:npops) {
                ## for every loci has one differentiation value
                Dmat[[l]][i, j] = DeltaD((af[[l]][, c(i, j)]), str)[2]  ### select two pops from allelefrequency
            }
        }
    }
    pairwiseDav = Reduce("+", Dmat)/length(Dmat)
    colnames(pairwiseDav) = fr$pop_names
    rownames(pairwiseDav) = fr$pop_names
    GGally::ggcorr
    p = ggcorr(pairwiseDav, label = TRUE)
    return(p)
}
xinghuq/HierDpart documentation built on March 21, 2023, 6:43 p.m.