##### Plot corplot for pairwise differentiation.1 (Delta_D.1)
DeltaDcorplot = function(x, ncode) {
#diveRsity::readGenepop
arp2gen=function (infile)
{
flForm <- strsplit(infile, split = "\\.")[[1]]
if (substr(infile, 1, 2) == "./") {
flForm <- flForm[-1]
}
else if (substr(infile, 1, 3) == "../") {
flForm <- flForm[-(1:2)]
}
if (length(flForm) > 3) {
stop("There were multiple '.' characters in your file name!")
}
tstfile <- paste(flForm[1], ".gen", sep = "")
if (!file.exists(tstfile)) {
fastScan <- function(fname) {
s <- file.info(fname)$size
buf <- readChar(fname, s, useBytes = TRUE)
if (length(grep("\r", buf)) != 0L) {
buf <- gsub("\r", "\n", buf)
buf <- gsub("\n\n", "\n", buf)
}
return(strsplit(buf, "\n", fixed = TRUE, useBytes = TRUE)[[1]])
}
dat <- fastScan(infile)
dat <- gsub("^\\s+|\\s+$", "", dat)
dataType <- grep("*datatype=*", tolower(dat))
if (strsplit(dat[dataType], "=")[[1]][2] != "MICROSAT") {
stop("Data are not in 'MICROSAT' format!")
}
missDataLine <- grep("*missingdata=*", tolower(dat))
missData <- noquote(substr(dat[missDataLine], nchar(dat[missDataLine]) -
1, nchar(dat[missDataLine]) - 1))
sampSizeLine <- grep("*samplesize=*", tolower(dat))
if (length(sampSizeLine) > 1) {
sampNpos <- sapply(sampSizeLine, function(i) {
return(regexpr("=", dat[i])[1])
})
}
popSizes <- as.numeric(substr(dat[sampSizeLine], start = sampNpos +
1, stop = nchar(dat[sampSizeLine])))
npops <- length(popSizes)
sampStrt <- grep("*sampledata=*", tolower(dat))
strts <- sapply(sampStrt, function(x) {
if (dat[(x + 1)] == "") {
return(x + 2)
}
else {
return(x + 1)
}
})
ends <- strts + ((popSizes * 2) - 1)
nloci <- length(strsplit(dat[strts[1]], split = "\\s+")[[1]]) -
2
popGeno <- lapply(seq_along(strts), function(i) {
return(dat[strts[i]:ends[i]])
})
popSzcheck <- sapply(popGeno, function(x) length(x)/2)
if (!all(identical(popSzcheck, popSizes))) {
stop("Failed! Please make sure that your file is formatted correctly.")
}
popIdx <- lapply(popGeno, function(x) {
return(seq(1, length(x), 2))
})
popList <- lapply(seq_along(popGeno), function(i) {
al1 <- matrix(unlist(strsplit(popGeno[[i]][popIdx[[i]]],
split = "\\s+")), nrow = popSizes[i], byrow = TRUE)[,
-(1:2)]
al2 <- matrix(unlist(strsplit(popGeno[[i]][(popIdx[[i]] +
1)], split = "\\s+")), nrow = popSizes[i], byrow = TRUE)
tst <- matrix(paste(al1, al2, sep = ""), nrow = popSizes[i])
tst <- cbind(paste(rep("pop", nrow(tst)), i, " ,",
sep = ""), tst)
rm(al1, al2)
z <- gc()
rm(z)
if (nchar(tst[1, 2]) == 4) {
tst[tst == paste(missData, missData, sep = "")] <- "0000"
}
else {
tst[tst == paste(missData, missData, sep = "")] <- "000000"
}
out <- apply(tst, 1, function(x) {
return(paste(x, collapse = "\t"))
})
out <- c("POP", out)
rm(tst)
z <- gc()
rm(z)
return(out)
})
outfile <- strsplit(infile, "\\.")[[1]]
if (length(outfile) >= 2) {
outfile <- paste(outfile[-length(outfile)], collapse = ".")
}
else {
outfile <- outfile[1]
}
loci <- paste("locus", 1:nloci, sep = "")
loci <- c(paste(outfile, "_gen_converted", sep = ""),
loci)
of <- c(loci, unlist(popList))
out <- file(paste(outfile, ".gen", sep = ""), "w")
for (i in 1:length(of)) {
cat(of[i], "\n", file = out, sep = "")
}
close(out)
return(TRUE)
}
else {
return(NULL)
}
}
fileReader=function (infile)
{
if (typeof(infile) == "list") {
return(infile)
}
else if (typeof(infile) == "character") {
flForm <- strsplit(infile, split = "\\.")[[1]]
ext <- flForm[[length(flForm)]]
if (ext == "arp") {
convRes <- arp2gen(infile)
if (!is.null(convRes)) {
cat("Arlequin file converted to genepop format! \n")
infile <- paste(flForm[1], ".gen", sep = "")
}
else {
infile <- paste(flForm[1], ".gen", sep = "")
}
}
dat <- scan(infile, sep = "\n", what = "character", quiet = TRUE)
if (length(strsplit(dat[4], split = "\\s+")[[1]][-1]) >
1) {
locs <- strsplit(dat[2], split = "\\s+")[[1]]
if (length(locs != 1)) {
locs <- strsplit(dat[2], split = ",")[[1]]
}
locs <- as.character(sapply(locs, function(x) {
x <- strsplit(x, split = "")[[1]]
if (is.element(",", x)) {
x <- x[-(which(x == ","))]
}
return(paste(x, collapse = ""))
}))
dat <- c(dat[1], locs, dat[-(1:2)])
}
popLoc <- grep("^([[:space:]]*)pop([[:space:]]*)$", tolower(dat))
no_col <- popLoc[1] - 1
if (popLoc[1] == 3) {
locs <- unlist(strsplit(dat[2], split = c("\\,",
"\\s+")))
dat <- c(dat[1], locs, dat[3:length(dat)])
}
popLoc <- grep("^([[:space:]]*)pop([[:space:]]*)$", tolower(dat))
no_col <- popLoc[1] - 1
dat1 <- sapply(dat, function(x) {
x <- unlist(strsplit(x, split = "\\s+"))
if (is.element("", x)) {
x <- x[-(which(x == ""))]
}
if (is.element(",", x)) {
x <- x[-(which(x == ","))]
}
if (length(x) != 1 && length(x) != no_col) {
x <- paste(x, collapse = "")
}
if (length(x) < no_col) {
tabs <- paste(rep(NA, (no_col - length(x))),
sep = "\t", collapse = "\t")
line <- paste(x, tabs, sep = "\t")
line <- unlist(strsplit(line, split = "\t"))
return(line)
}
else {
return(x)
}
})
}
out <- as.data.frame(t(dat1))
rownames(out) <- NULL
return(out)
}
readGenepop=function (infile = NULL, gp = 3, bootstrap = FALSE)
{
gp = gp
infile = infile
bootstrap = bootstrap
data1 <- fileReader(infile)
if (gp == 3) {
data1[data1 == 0] <- NA
data1[data1 == "999999"] <- NA
data1[data1 == "000000"] <- NA
data1[data1 == "NANA"] <- NA
}
else if (gp == 2) {
data1[data1 == 0] <- NA
data1[data1 == "9999"] <- NA
data1[data1 == "0000"] <- NA
data1[data1 == "NA"] <- NA
}
raw_data <- data1
npops <- length(which(toupper(data1[, 1]) == "POP"))
pop_pos <- c(which(toupper(data1[, 1]) == "POP"), (nrow(data1) +
1))
pop_sizes <- vector()
for (i in 1:npops) {
pop_sizes[i] <- pop_pos[(i + 1)] - pop_pos[i] - 1
}
pop_names <- as.character(data1[(pop_pos[1:npops] + 1), 1])
pop_weights <- 1/pop_sizes
n_harmonic <- npops/sum(pop_weights)
N <- pop_sizes
nloci <- (pop_pos[1] - 2)
if (nloci != (ncol(raw_data) - 1)) {
stop("Check your input file for formatting errors!")
}
loci_names <- as.vector(data1[2:(pop_pos[1] - 1), 1])
pop_list <- list()
for (i in 1:npops) {
pop_list[[i]] <- as.matrix(data1[(pop_pos[i] + 1):(pop_pos[(i +
1)] - 1), 2:(nloci + 1)])
}
extCheck <- sapply(1:length(pop_list), function(i) {
sum(is.na(pop_list[[i]])) == nloci * pop_sizes[i]
})
if (sum(extCheck) > 0) {
npops <- npops - sum(extCheck)
pop_list <- pop_list[-(which(extCheck == TRUE))]
pop_sizes <- pop_sizes[-(which(extCheck == TRUE))]
pop_names <- pop_names[-(which(extCheck == TRUE))]
pop_weights <- pop_weights[-(which(extCheck == TRUE))]
N <- N[-(which(extCheck == TRUE))]
noPop <- which(extCheck == TRUE)
indexer <- lapply(noPop, function(i) {
(pop_pos[i] + 1):(pop_pos[(i + 1)])
})
indexer <- unlist(indexer)
raw_data <- raw_data[-(indexer), ]
}
if (gp == 3) {
plMake <- function(x) {
out <- matrix(sprintf("%06g", as.numeric(x)), nrow = nrow(x),
ncol = ncol(x))
if (Sys.info()["sysname"] == "Darwin") {
out[out == "0000NA"] <- " NA"
}
return(out)
}
}
else if (gp == 2) {
plMake <- function(x) {
out <- matrix(sprintf("%04g", as.numeric(x)), nrow = nrow(x),
ncol = ncol(x))
if (Sys.info()["sysname"] == "Darwin") {
out[out == "00NA"] <- " NA"
}
return(out)
}
}
suppressWarnings(pop_list <- lapply(pop_list, plMake))
if (gp == 3) {
for (i in 1:npops) {
pop_list[[i]][pop_list[[i]] == " NA"] <- NA
}
}
else if (gp == 2) {
for (i in 1:npops) {
pop_list[[i]][pop_list[[i]] == " NA"] <- NA
}
}
if (bootstrap == T) {
bs <- function(x) {
return(matrix(x[sample(nrow(x), replace = TRUE),
], ncol = ncol(x)))
}
pop_list <- lapply(pop_list, bs)
}
lps <- function(x) {
lsp_count <- as.vector(colSums(!is.na(x)))
return(lsp_count)
}
pre_loci_pop_sizes <- lapply(pop_list, lps)
pls <- matrix(ncol = nloci, nrow = npops)
for (i in 1:length(pre_loci_pop_sizes)) {
pls[i, ] <- pre_loci_pop_sizes[[i]]
}
loci_pop_sizes <- split(pls, col(pls))
pre_loc_weights <- 1/pls
loci_pop_weights1 <- split(pre_loc_weights, col(pre_loc_weights))
loci_harm_N <- npops/colSums(pre_loc_weights)
if (gp == 3) {
pl_ss <- function(x) {
pl <- list()
pl[[1]] <- matrix(substr(x, 1, 3), ncol = nloci)
pl[[2]] <- matrix(substr(x, 4, 6), ncol = nloci)
return(pl)
}
}
else {
pl_ss <- function(x) {
pl <- list()
pl[[1]] <- matrix(substr(x, 1, 2), ncol = nloci)
pl[[2]] <- matrix(substr(x, 3, 4), ncol = nloci)
return(pl)
}
}
pop_alleles <- lapply(pop_list, pl_ss)
alln <- function(x) {
res <- list()
for (i in 1:ncol(x[[1]])) {
res[i] <- list(sort(unique(c(x[[1]][, i], x[[2]][,
i])), decreasing = F))
}
return(res)
}
allele_names <- lapply(pop_alleles, alln)
loci_combi <- allele_names[[1]]
for (j in 1:nloci) {
for (i in 2:npops) {
loci_combi[[j]] <- c(loci_combi[[j]], allele_names[[i]][[j]])
}
}
aaList <- function(x) {
return(sort(unique(x, decreasing = FALSE)))
}
all_alleles <- lapply(loci_combi, aaList)
aa <- all_alleles
aa <- lapply(aa, FUN = list, npops)
afMatrix <- function(x) {
np <- x[[2]]
z <- matrix(rep(0, (np * length(x[[1]]))), ncol = np,
nrow = length(x[[1]]))
rownames(z) <- x[[1]]
return(z)
}
allele_freq <- lapply(aa, afMatrix)
parbind <- function(x) {
rbind(x[[1]], x[[2]])
}
pa1 <- lapply(pop_alleles, parbind)
afTab <- function(x) {
lapply(1:ncol(x), function(i) {
return(table(x[, i]))
})
}
actab <- lapply(pa1, afTab)
afs <- function(x) {
afsint <- function(y) {
length(na.omit(y))/2
}
apply(x, 2, afsint)
}
indtyppop <- lapply(pa1, afs)
afCalcpop <- lapply(1:length(actab), function(x) {
lapply(1:length(actab[[x]]), function(y) {
actab[[x]][[y]]/(indtyppop[[x]][y] * 2)
})
})
obs_count <- allele_freq
for (i in 1:npops) {
for (j in 1:nloci) {
allele_freq[[j]][names(afCalcpop[[i]][[j]]), i] <- afCalcpop[[i]][[j]]
obs_count[[j]][names(actab[[i]][[j]]), i] <- actab[[i]][[j]]
}
}
indtyp <- list()
for (i in 1:nloci) {
indtyp[[i]] <- vector()
}
for (i in 1:npops) {
for (j in 1:nloci) {
indtyp[[j]][i] <- indtyppop[[i]][j]
}
}
if (bootstrap == T) {
ind_vectors <- list()
for (i in 1:npops) {
ind_vectors[[i]] <- noquote(paste(rep(i, pop_sizes[i]),
",", sep = ""))
}
pre_data <- matrix(rep("\t", ((nloci + 1) * (nloci +
1))), ncol = (nloci + 1))
pre_data[1, ] <- c("Title", rep("\t", nloci))
for (i in 2:(nloci + 1)) {
pre_data[i, 1] <- loci_names[(i - 1)]
}
pop_data <- list()
for (i in 1:npops) {
pop_data[[i]] <- matrix(rbind(c("POP", as.vector(rep("\t",
nloci))), cbind(ind_vectors[[i]], pop_list[[i]])),
ncol = (nloci + 1))
}
bs_data_file <- matrix(rbind(pre_data, pop_data[[1]]),
ncol = (nloci + 1))
for (i in 2:npops) {
bs_data_file <- matrix(rbind(bs_data_file, pop_data[[i]]),
ncol = (nloci + 1))
}
bs_data_file <- data.frame(bs_data_file)
}
nalleles <- vector()
for (i in 1:nloci) {
nalleles[i] <- nrow(allele_freq[[i]])
}
if (bootstrap == T) {
list(npops = npops, nloci = nloci, pop_alleles = pop_alleles,
pop_list = pop_list, loci_names = loci_names, pop_pos = pop_pos,
pop_sizes = pop_sizes, allele_names = allele_names,
all_alleles = all_alleles, allele_freq = allele_freq,
raw_data = raw_data, loci_harm_N = loci_harm_N, n_harmonic = n_harmonic,
pop_names = pop_names, indtyp = indtyp, nalleles = nalleles,
bs_file = bs_data_file, obs_allele_num = obs_count)
}
else if (bootstrap == F) {
list(npops = npops, nloci = nloci, pop_alleles = pop_alleles,
pop_list = pop_list, loci_names = loci_names, pop_pos = pop_pos,
pop_sizes = pop_sizes, allele_names = allele_names,
all_alleles = all_alleles, allele_freq = allele_freq,
raw_data = raw_data, loci_harm_N = loci_harm_N, n_harmonic = n_harmonic,
pop_names = pop_names, indtyp = indtyp, nalleles = nalleles,
obs_allele_num = obs_count)
}
}
gp = ncode
fr = readGenepop(x, gp, bootstrap = FALSE)
af = fr$allele_freq
DeltaD = function(abun, struc) {
## Chao et al, 2017
n = sum(abun)
N = ncol(abun)
ga = rowSums(abun)
gp = ga[ga > 0]/n
G = sum(-gp * log(gp))
H = nrow(struc)
A = numeric(H - 1)
W = numeric(H - 1)
Diff = numeric(H - 1)
wi = colSums(abun)/n
W[H - 1] = -sum(wi[wi > 0] * log(wi[wi > 0]))
pi = sapply(1:N, function(k) abun[, k]/sum(abun[, k]))
Ai = sapply(1:N, function(k) -sum(pi[, k][pi[, k] > 0] * log(pi[, k][pi[, k] > 0])))
A[H - 1] = sum(wi * Ai)
if (H > 2) {
for (i in 2:(H - 1)) {
I = unique(struc[i, ])
NN = length(I)
ai = matrix(0, ncol = NN, nrow = nrow(abun))
c
for (j in 1:NN) {
II = which(struc[i, ] == I[j])
if (length(II) == 1) {
ai[, j] = abun[, II]
} else {
ai[, j] = rowSums(abun[, II])
}
}
pi = sapply(1:NN, function(k) ai[, k]/sum(ai[, k]))
wi = colSums(ai)/sum(ai)
W[i - 1] = -sum(wi * log(wi))
Ai = sapply(1:NN, function(k) -sum(pi[, k][pi[, k] > 0] * log(pi[, k][pi[, k] > 0])))
A[i - 1] = sum(wi * Ai)
}
}
Diff[1] = (G - A[1])/W[1]
if (H > 2) {
for (i in 2:(H - 1)) {
Diff[i] = (A[i - 1] - A[i])/(W[i] - W[i - 1])
}
}
Diff = Diff
out = matrix(c(Diff), ncol = 1)
return(out)
}
v1 = c("ecosystem", "region1", "pop1")
v2 = c("ecosystem", "region1", "pop2")
str = data.frame(v1, v2)
str = as.matrix(str)
npops = fr$npops
nloci = fr$nloci
Dmat = list()
for (l in 1:nloci) {
Dmat[[l]] = matrix(data = 0, nrow = npops, ncol = npops)
for (i in 1:npops) {
for (j in 1:npops) {
## for every loci has one differentiation value
Dmat[[l]][i, j] = DeltaD((af[[l]][, c(i, j)]), str)[2] ### select two pops from allelefrequency
}
}
}
pairwiseDav = Reduce("+", Dmat)/length(Dmat)
colnames(pairwiseDav) = fr$pop_names
rownames(pairwiseDav) = fr$pop_names
GGally::ggcorr
p = ggcorr(pairwiseDav, label = TRUE)
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.