update.matrix <- function(matrix,pop.to.update,update.vector){
matrix[pop.to.update,] <- update.vector
matrix[,pop.to.update] <- update.vector
return(matrix)
}
Update_admixture_target_location <- function(last.params){
# recover()
new.params <- last.params
pop.to.update <- sample(1:last.params$k,1)
population.coordinates_prime <- last.params$population.coordinates
population.coordinates_prime[pop.to.update,] <- propose.new.location(population.coordinates_prime[pop.to.update,1],
population.coordinates_prime[pop.to.update,2],
exp(last.params$lstps$admix_target_location_lstp[pop.to.update]))
prior_prob_admix_target_locations_prime <- Prior_prob_admix_target_locations(population.coordinates_prime[1:last.params$k,],
last.params$spatial.prior.X.coordinates,
last.params$spatial.prior.Y.coordinates,
last.params$target.spatial.prior.scale)
D_prime <- spacemix.dist(population.coordinates_prime[pop.to.update,1:2,drop=FALSE], population.coordinates_prime)
tmp.covariance_prime <- Covariance(last.params$a0,last.params$a1,last.params$a2,D_prime)
covariance_prime <- update.matrix(last.params$covariance,pop.to.update,tmp.covariance_prime)
admixed.covariance_prime <- admixed.Covariance(covariance_prime,last.params$admix.proportions,last.params$nugget,last.params$k,last.params$inv.mean.sample.sizes,last.params$identity.matrix)
transformed.covariance.prime <- transformed.Covariance(admixed.covariance_prime,
last.params$projection.matrix)
LnL_freqs_prime <- likelihood(likelihood.option = last.params$likelihood.option,sample.cov = last.params$sample.cov,par.cov = transformed.covariance.prime,index.matrix = last.params$index.matrix,sd = last.params$sd,n = last.params$loci)
if(metropolis_ratio(LnL_freqs_prime,prior_prob_admix_target_locations_prime,last.params$LnL_freqs,last.params$prior_prob_admix_target_locations)){
new.params$population.coordinates <- population.coordinates_prime
new.params$prior_prob_admix_target_locations <- prior_prob_admix_target_locations_prime
new.params$D <- update.matrix(new.params$D,pop.to.update,D_prime)
new.params$covariance <- covariance_prime
new.params$admixed.covariance <- admixed.covariance_prime
new.params$transformed.covariance <- transformed.covariance.prime
new.params$LnL_freqs <- LnL_freqs_prime
new.params$accept$admix_target_location_accept[pop.to.update] <- new.params$accept$admix_target_location_accept[pop.to.update] + 1
}
new.params$moves$admix_target_location_moves[pop.to.update] <- new.params$moves$admix_target_location_moves[pop.to.update] + 1
new.params$accept_rates$admix_target_location_accept_rate[pop.to.update] <- new.params$accept$admix_target_location_accept[pop.to.update]/new.params$moves$admix_target_location_moves[pop.to.update]
return(new.params)
}
Update_admixture_source_location <- function(last.params){
# recover()
new.params <- last.params
pop.to.update <- sample(1:last.params$k,1)
ghost.to.update <- pop.to.update + last.params$k
population.coordinates_prime <- last.params$population.coordinates
population.coordinates_prime[ghost.to.update,] <- propose.new.location(population.coordinates_prime[ghost.to.update,1],
population.coordinates_prime[ghost.to.update,2],
exp(last.params$lstps$admix_source_location_lstp[pop.to.update]))
prior_prob_admix_source_locations_prime <- Prior_prob_admix_source_locations(population.coordinates_prime[(last.params$k+1):(2*last.params$k),],
last.params$centroid,
last.params$source.spatial.prior.scale)
D_prime <- spacemix.dist(population.coordinates_prime[ghost.to.update,1:2,drop=FALSE], population.coordinates_prime)
tmp.covariance_prime <- Covariance(last.params$a0,last.params$a1,last.params$a2,D_prime)
covariance_prime <- update.matrix(last.params$covariance,ghost.to.update,tmp.covariance_prime)
admixed.covariance_prime <- admixed.Covariance(covariance_prime,last.params$admix.proportions,last.params$nugget,last.params$k,last.params$inv.mean.sample.sizes,last.params$identity.matrix)
transformed.covariance.prime <- transformed.Covariance(admixed.covariance_prime,
last.params$projection.matrix)
LnL_freqs_prime <- likelihood(likelihood.option = last.params$likelihood.option,sample.cov = last.params$sample.cov,par.cov = transformed.covariance.prime,index.matrix = last.params$index.matrix,sd = last.params$sd,n = last.params$loci)
if(metropolis_ratio(LnL_freqs_prime,prior_prob_admix_source_locations_prime,last.params$LnL_freqs,last.params$prior_prob_admix_source_locations)){
new.params$population.coordinates <- population.coordinates_prime
new.params$prior_prob_admix_source_locations <- prior_prob_admix_source_locations_prime
new.params$D <- update.matrix(new.params$D,ghost.to.update,D_prime)
new.params$covariance <- covariance_prime
new.params$admixed.covariance <- admixed.covariance_prime
new.params$transformed.covariance <- transformed.covariance.prime
new.params$LnL_freqs <- LnL_freqs_prime
new.params$accept$admix_source_location_accept[pop.to.update] <- new.params$accept$admix_source_location_accept[pop.to.update] + 1
}
new.params$moves$admix_source_location_moves[pop.to.update] <- new.params$moves$admix_source_location_moves[pop.to.update] + 1
new.params$accept_rates$admix_source_location_accept_rate[pop.to.update] <- new.params$accept$admix_source_location_accept[pop.to.update]/new.params$moves$admix_source_location_moves[pop.to.update]
return(new.params)
}
Update_admixture_proportions <- function(last.params){
new.params <- last.params
pop.to.update <- sample(last.params$k,1)
admix.proportions_prime <- last.params$admix.proportions
admix.proportions_prime[pop.to.update] <- admix.proportions_prime[pop.to.update] + rnorm(1,0,exp(last.params$lstps$admix_proportions_lstp[pop.to.update]))
prior_prob_admix_proportions_prime <- Prior_prob_admix_proportions(admix.proportions_prime)
if(is.finite(prior_prob_admix_proportions_prime)){
admixed.covariance_prime <- admixed.Covariance(last.params$covariance,admix.proportions_prime,last.params$nugget,last.params$k,last.params$inv.mean.sample.sizes,last.params$identity.matrix)
transformed.covariance.prime <- transformed.Covariance(admixed.covariance_prime,
last.params$projection.matrix)
LnL_freqs_prime <- likelihood(likelihood.option = last.params$likelihood.option,sample.cov = last.params$sample.cov,par.cov = transformed.covariance.prime,index.matrix = last.params$index.matrix,sd = last.params$sd,n = last.params$loci)
if(metropolis_ratio(LnL_freqs_prime,prior_prob_admix_proportions_prime,last.params$LnL_freqs,last.params$prior_prob_admix_proportions)){
new.params$admix.proportions <- admix.proportions_prime
new.params$prior_prob_admix_proportions <- prior_prob_admix_proportions_prime
new.params$admixed.covariance <- admixed.covariance_prime
new.params$transformed.covariance <- transformed.covariance.prime
new.params$LnL_freqs <- LnL_freqs_prime
new.params$accept$admix_proportions_accept[pop.to.update] <- new.params$accept$admix_proportions_accept[pop.to.update] + 1
}
}
new.params$moves$admix_proportions_moves[pop.to.update] <- new.params$moves$admix_proportions_moves[pop.to.update] + 1
new.params$accept_rates$admix_proportions_accept_rate[pop.to.update] <- new.params$accept$admix_proportions_accept[pop.to.update]/new.params$moves$admix_proportions_moves[pop.to.update]
return(new.params)
}
Update_a0 <- function(last.params){
new.params <- last.params
a0_prime <- last.params$a0 + rnorm(1,0,exp(last.params$lstps$a0_lstp))
prior_prob_alpha0_prime <- Prior_prob_alpha0(a0_prime)
if(prior_prob_alpha0_prime != -Inf){
covariance_prime <- (last.params$covariance*last.params$a0)/a0_prime
admixed.covariance_prime <- admixed.Covariance(covariance_prime,last.params$admix.proportions,last.params$nugget,last.params$k,last.params$inv.mean.sample.sizes,last.params$identity.matrix)
transformed.covariance.prime <- transformed.Covariance(admixed.covariance_prime,last.params$projection.matrix)
LnL_freqs_prime <- likelihood(likelihood.option = last.params$likelihood.option,sample.cov = last.params$sample.cov,par.cov = transformed.covariance.prime,index.matrix = last.params$index.matrix,sd = last.params$sd,n = last.params$loci)
if(metropolis_ratio(LnL_freqs_prime,prior_prob_alpha0_prime,last.params$LnL_freqs,last.params$prior_prob_alpha0)){
new.params$a0 <- a0_prime
new.params$covariance <- covariance_prime
new.params$admixed.covariance <- admixed.covariance_prime
new.params$transformed.covariance <- transformed.covariance.prime
new.params$prior_prob_alpha0 <- prior_prob_alpha0_prime
new.params$LnL_freqs <- LnL_freqs_prime
new.params$accept$a0_accept <- new.params$accept$a0_accept + 1
}
}
new.params$moves$a0_moves <- new.params$moves$a0_moves + 1
new.params$accept_rates$a0_accept_rate <- new.params$accept$a0_accept/new.params$moves$a0_moves
return(new.params)
}
Update_a1 <- function(last.params){
new.params <- last.params
a1_prime <- last.params$a1 + rnorm(1,0,exp(last.params$lstps$a1_lstp))
prior_prob_alpha1_prime <- Prior_prob_alpha1(a1_prime)
if(prior_prob_alpha1_prime != -Inf){
covariance_prime <- Covariance(last.params$a0,a1_prime,last.params$a2,last.params$D)
admixed.covariance_prime <- admixed.Covariance(covariance_prime,last.params$admix.proportions,last.params$nugget,last.params$k,last.params$inv.mean.sample.sizes,last.params$identity.matrix)
transformed.covariance.prime <- transformed.Covariance(admixed.covariance_prime,last.params$projection.matrix)
LnL_freqs_prime <- likelihood(likelihood.option = last.params$likelihood.option,sample.cov = last.params$sample.cov,par.cov = transformed.covariance.prime,index.matrix = last.params$index.matrix,sd = last.params$sd,n = last.params$loci)
if(metropolis_ratio(LnL_freqs_prime,prior_prob_alpha1_prime,last.params$LnL_freqs,last.params$prior_prob_alpha1)){
new.params$a1 <- a1_prime
new.params$covariance <- covariance_prime
new.params$admixed.covariance <- admixed.covariance_prime
new.params$transformed.covariance <- transformed.covariance.prime
new.params$prior_prob_alpha1 <- prior_prob_alpha1_prime
new.params$LnL_freqs <- LnL_freqs_prime
new.params$accept$a1_accept <- new.params$accept$a1_accept + 1
}
}
new.params$moves$a1_moves <- new.params$moves$a1_moves + 1
new.params$accept_rates$a1_accept_rate <- new.params$accept$a1_accept/new.params$moves$a1_moves
return(new.params)
}
Update_a2 <- function(last.params){
new.params <- last.params
a2_prime <- last.params$a2 + rnorm(1,0,exp(last.params$lstps$a2_lstp))
prior_prob_alpha2_prime <- Prior_prob_alpha2(a2_prime)
if(prior_prob_alpha2_prime != -Inf) {
covariance_prime <- Covariance(last.params$a0,last.params$a1,a2_prime,last.params$D)
admixed.covariance_prime <- admixed.Covariance(covariance_prime,last.params$admix.proportions,last.params$nugget,last.params$k,last.params$inv.mean.sample.sizes,last.params$identity.matrix)
transformed.covariance.prime <- transformed.Covariance(admixed.covariance_prime,last.params$projection.matrix)
LnL_freqs_prime <- likelihood(likelihood.option = last.params$likelihood.option,sample.cov = last.params$sample.cov,par.cov = transformed.covariance.prime,index.matrix = last.params$index.matrix,sd = last.params$sd,n = last.params$loci)
if(metropolis_ratio(LnL_freqs_prime,prior_prob_alpha2_prime,last.params$LnL_freqs,last.params$prior_prob_alpha2)){
new.params$a2 <- a2_prime
new.params$covariance <- covariance_prime
new.params$admixed.covariance <- admixed.covariance_prime
new.params$transformed.covariance <- transformed.covariance.prime
new.params$prior_prob_alpha2 <- prior_prob_alpha2_prime
new.params$LnL_freqs <- LnL_freqs_prime
new.params$accept$a2_accept <- new.params$accept$a2_accept + 1
}
}
new.params$moves$a2_moves <- new.params$moves$a2_moves + 1
new.params$accept_rates$a2_accept_rate <- new.params$accept$a2_accept/new.params$moves$a2_moves
return(new.params)
}
Update_nugget <- function(last.params){
new.params <- last.params
pop.to.update <- sample(1:last.params$k,1)
nugget_prime <- last.params$nugget + c(rep(0,pop.to.update-1),rnorm(1,0,exp(last.params$lstps$nugget_lstp[pop.to.update])),rep(0,last.params$k-pop.to.update))
prior_prob_nugget_prime <- Prior_prob_nugget(nugget_prime)
if(prior_prob_nugget_prime != -Inf){
admixed.covariance_prime <- last.params$admixed.covariance
diag(admixed.covariance_prime) <- diag(admixed.covariance_prime) - last.params$nugget + nugget_prime
transformed.covariance.prime <- transformed.Covariance(admixed.covariance_prime,last.params$projection.matrix)
LnL_freqs_prime <- likelihood(likelihood.option = last.params$likelihood.option,sample.cov = last.params$sample.cov,par.cov = transformed.covariance.prime,index.matrix = last.params$index.matrix,sd = last.params$sd,n = last.params$loci)
if(metropolis_ratio(LnL_freqs_prime, prior_prob_nugget_prime,last.params$LnL_freqs,last.params$prior_prob_nugget)){
new.params$nugget <- nugget_prime
new.params$admixed.covariance <- admixed.covariance_prime
new.params$transformed.covariance <- transformed.covariance.prime
new.params$prior_prob_nugget <- prior_prob_nugget_prime
new.params$LnL_freqs <- LnL_freqs_prime
new.params$accept$nugget_accept[pop.to.update] <- new.params$accept$nugget_accept[pop.to.update] + 1
}
}
new.params$moves$nugget_moves[pop.to.update] <- new.params$moves$nugget_moves[pop.to.update] + 1
new.params$accept_rates$nugget_accept_rate[pop.to.update] <- new.params$accept$nugget_accept[pop.to.update]/new.params$moves$nugget_moves[pop.to.update]
return(new.params)
}
metropolis_ratio <- function(LnL_prime,prior_prime,LnL,prior){
accept <- FALSE
if( exp((LnL_prime + prior_prime) - (LnL+prior)) >= runif(1) ){
accept <- TRUE
}
return(accept)
}
Prior_prob_admix_target_locations <- function(admix_target_locations,spatial.prior.X.coordinates,spatial.prior.Y.coordinates,target.spatial.prior.scale){
prior_prob_admix_target_locations <- sum(-log(2*pi*target.spatial.prior.scale) + (-1/2) *
((admix_target_locations[,1] - spatial.prior.X.coordinates)^2 / target.spatial.prior.scale +
(admix_target_locations[,2] - spatial.prior.Y.coordinates)^2 / target.spatial.prior.scale))
return(prior_prob_admix_target_locations)
}
Prior_prob_admix_source_locations <- function(admix_source_locations,centroid,source.spatial.prior.scale){
prior_prob_admix_source_locations <- sum(-log(2*pi*source.spatial.prior.scale) + (-1/2) *
((admix_source_locations[,1] - centroid[1])^2 / source.spatial.prior.scale +
(admix_source_locations[,2] - centroid[2])^2 / source.spatial.prior.scale))
return(prior_prob_admix_source_locations)
}
Prior_prob_alpha0 <- function(a0){
log(dexp(a0,rate=1/100))
# log(dgamma(a0,1,1))
}
Prior_prob_alpha1 <- function(a1){
log(dexp(a1,rate=1))
}
Prior_prob_alpha2 <- function(a2){
log(dunif(a2,0.1,2))
}
Prior_prob_nugget <- function(nugget){
sum(dexp(nugget,log=TRUE))
}
Prior_prob_admix_proportions <- function(admix_proportions){
admix_proportions <- admix_proportions * 2
sum(dbeta(admix_proportions,shape1=1,shape2=100,log=TRUE))
}
normal.lnL <- function(sample.cov,par.cov,sd,index.matrix){
par.cov <- par.cov[index.matrix]
lnL <- - (sample.cov-par.cov)^2/(2*sd^2)
return(sum(lnL))
}
wishart.lnL <- function(sample.cov,par.cov,n){
A <- solve(par.cov)
lnL <- -0.5 * n * sum( A * sample.cov ) - (n/2)*determinant(par.cov,logarithm=TRUE)$modulus
return(lnL)
}
likelihood <- function(likelihood.option,sample.cov,par.cov,index.matrix=NULL,sd=NULL,n=NULL){
if(likelihood.option == "wishart"){
lnL <- wishart.lnL(sample.cov,par.cov,n)
} else if(likelihood.option == "normal_approx"){
lnL <- normal.lnL(sample.cov,par.cov,sd,index.matrix)
}
return(lnL)
}
total_likelihood_freqs <- function(freqs,covmat,loci) {
cholcov <- chol(covmat)
logsqrtdet <- sum(log(diag(cholcov)))
x <- backsolve(cholcov,freqs,transpose=TRUE)
return(-(1/2)*crossprod(as.vector(x))-loci*logsqrtdet)
}
Covariance <- function(a0,a1,a2,GeoDist) {
covariance <- (1/a0)*exp(-(a1*GeoDist)^a2)
return(covariance)
}
admixed.Covariance <- function(covariance,admix.proportions,nugget,k,inv.mean.sample.sizes,ident.mat){
# recover()
if(any(admix.proportions !=0)){
w_k <- admix.proportions
admixed.Covariance <- tcrossprod((1-w_k),(1-w_k)) * covariance[1:k,1:k] +
tcrossprod((1-w_k),(w_k)) * covariance[1:k,(k+1):(2*k)] +
tcrossprod(w_k,(1-w_k)) * covariance[(k+1):(2*k),1:k] +
tcrossprod(w_k,w_k) * covariance[(k+1):(2*k),(k+1):(2*k)]
admixed.Covariance <- admixed.Covariance + ident.mat * nugget + ident.mat * inv.mean.sample.sizes
} else {
admixed.Covariance <- covariance[1:k,1:k] + ident.mat * nugget + ident.mat * inv.mean.sample.sizes
}
return(admixed.Covariance)
}
transformed.Covariance <- function(covariance,projection.matrix){
transformed.covariance <-
crossprod(projection.matrix,covariance) %*% projection.matrix
return(transformed.covariance)
}
get.mean.sample.size <- function(sample.sizes){
mean.sample.size <- mean(sample.sizes[which(sample.sizes!=0)])
return(mean.sample.size)
}
get.weighted.mean.frequency <- function(sample.frequencies,mean.sample.sizes){
na.pops <- which(is.na(sample.frequencies))
if(any(na.pops != 0)){
sample.frequencies <- sample.frequencies[-na.pops]
mean.sample.sizes <- mean.sample.sizes[-na.pops]
}
weighted.sample.frequencies <- mean.sample.sizes*sample.frequencies
sample.frequency.weighted.mean <- sum(weighted.sample.frequencies)/sum(mean.sample.sizes)
return(sample.frequency.weighted.mean)
}
curate.count.data <- function(counts,sample.sizes,prefix){ #FIX WAY DATA ARE DROPPED
#recover()
k <- nrow(counts)
original.loci <- ncol(counts)
sample.frequencies <- counts/sample.sizes
no.samples <- which(colSums(sample.sizes)==0)
fixed.alleles <- c(which(colSums(sample.frequencies)==0),which(colSums(sample.frequencies)==k))
na.loci <- which(is.na(sample.frequencies),arr.ind=TRUE)[,2]
loci.to.drop <- unique(c(no.samples,fixed.alleles,na.loci))
if(any(loci.to.drop != 0)){
counts <- counts[,-loci.to.drop]
sample.sizes <- sample.sizes[,-loci.to.drop]
sample.frequencies <- sample.frequencies[,-loci.to.drop]
}
#get mean sample sizes, round 1
mean.sample.sizes <- apply(sample.sizes,1,get.mean.sample.size)
#get mean frequencies across loci
mean.frequencies <- apply(sample.frequencies,2,get.weighted.mean.frequency,mean.sample.sizes=mean.sample.sizes)
reduced.loci <- length(mean.frequencies)
if(reduced.loci==0){
stop("you have no loci remaining in your curated dataset.")
}
cat("\t",reduced.loci,"loci out of the original",original.loci,"left in curated dataset. \n")
curated.count.data <- list( "reduced.loci" = reduced.loci,
"counts" = counts,
"sample.sizes" = sample.sizes,
"sample.frequencies" = sample.frequencies,
"mean.sample.sizes" = mean.sample.sizes,
"mean.frequencies" = mean.frequencies)
save(curated.count.data,file=paste(prefix,"_curated.count.data.Robj",sep=''))
return(curated.count.data)
}
curate.frequency.data <- function(sample.frequencies){
k <- nrow(sample.frequencies)
fixed.alleles <- c(which(colSums(sample.frequencies)==0),which(colSums(sample.frequencies)==k))
na.loci <- which(is.na(sample.frequencies),arr.ind=TRUE)[,2]
missing.data <- which(!is.finite(sample.frequencies),arr.ind=TRUE)[,2]
loci.to.drop <- unique(c(fixed.alleles,na.loci,missing.data))
if(any(loci.to.drop != 0)){
sample.frequencies <- sample.frequencies[,-loci.to.drop]
}
if(ncol(sample.frequencies) == 0){
stop("you have no loci remaining in your curated dataset.")
}
return(sample.frequencies)
}
get.projection.matrix <- function(mean.sample.sizes,proj.mat.option=NULL,likelihood.option){
if(likelihood.option == "wishart"){
if(is.null(proj.mat.option)){
k <- length(mean.sample.sizes)
transformation.matrix <- get.transformation.matrix(mean.sample.sizes)
qr.transformation.matrix <- qr(t(transformation.matrix))
projection.matrix <- qr.Q(qr.transformation.matrix)[,1:qr.transformation.matrix$rank]
stopifnot(qr.transformation.matrix$rank == sum(abs(eigen(transformation.matrix)$values - 1) < 1e-2) )
} else if(proj.mat.option == "identity"){
projection.matrix <- diag(length(mean.sample.sizes))
}
} else if(likelihood.option == "normal_approx"){
projection.matrix <- get.transformation.matrix(mean.sample.sizes)
}
return(projection.matrix)
}
get.transformation.matrix <- function(mean.sample.sizes){
k <- length(mean.sample.sizes)
transformation.matrix <- diag(k) - matrix(mean.sample.sizes/(sum(mean.sample.sizes)),nrow=k,ncol=k,byrow=TRUE)
return(transformation.matrix)
}
get.cov.variance <- function(k,mcn.freqs,mean.sample.cov,loci){
var.sample.cov <- matrix(0,nrow=k,ncol=k)
for(i in 1:k){
for(j in 1:k){
var.sample.cov[i,j] <- mean((mcn.freqs[i,]*mcn.freqs[j,] - mean.sample.cov[i,j])^2)
}
}
var.sample.cov <- (1/(loci-1)) * var.sample.cov
return(var.sample.cov)
}
get.cov.standard.error <- function(mcn.freqs){
k <- nrow(mcn.freqs)
loci <- ncol(mcn.freqs)
mean.sample.cov <- cov(t(mcn.freqs))
var.sample.cov <- get.cov.variance(k,mcn.freqs,mean.sample.cov,loci)
std.error <- sqrt(var.sample.cov)/sqrt(loci-1)
return(std.error)
}
project.sample.covariance <- function(sample.covariance,projection.matrix,likelihood.option){
if(likelihood.option == "wishart"){
sample.covariance <- t(projection.matrix) %*% sample.covariance %*% projection.matrix
} else if(likelihood.option == "normal_approx"){
sample.covariance <- sample.covariance
}
return(sample.covariance)
}
mean.center.normalize.frequencies <- function(sample.frequencies,mean.sample.sizes,prefix){
# recover()
k <- nrow(sample.frequencies)
#mean-center, normalize, and project sample frequencies
mean.frequencies <- apply(sample.frequencies,2,get.weighted.mean.frequency,mean.sample.sizes=mean.sample.sizes)
mean.freq.mat <- matrix(mean.frequencies,nrow=k,ncol=length(mean.frequencies),byrow=TRUE)
normalized.sample.frequencies <- sample.frequencies / sqrt(mean.freq.mat * (1 - mean.freq.mat))
mean.centered.sample.frequencies <- sample.frequencies - mean.freq.mat
mean.centered.normalized.sample.frequencies <- mean.centered.sample.frequencies / sqrt(mean.freq.mat * (1 - mean.freq.mat))
MCN.frequencies.list <- list( "mean.sample.sizes" = mean.sample.sizes,
"sample.frequencies" = sample.frequencies,
"normalized.sample.frequencies" = normalized.sample.frequencies,
"mean.centered.sample.frequencies" = mean.centered.sample.frequencies,
"mean.centered.normalized.sample.frequencies" = mean.centered.normalized.sample.frequencies)
save(MCN.frequencies.list,file=paste(prefix,"_MCN.frequencies.list.Robj",sep=''))
return(mean.centered.normalized.sample.frequencies)
}
spacemix.data <- function(data.type,likelihood.option,proj.mat.option=NULL,sample.frequencies=NULL,loci,mean.sample.sizes=NULL,counts=NULL,sample.sizes=NULL,sample.covariance=NULL,cov.standard.error=NULL,prefix){
# recover()
if(data.type == "sample.covariance"){
if(!matrixcalc::is.positive.definite(sample.covariance)){
stop("the input sample.covariance matrix is not a valid covariance matrix")
}
if(is.null(mean.sample.sizes)){
stop("you must specify the mean population sample sizes")
}
sample.frequencies <- NULL
mean.centered.normalized.frequencies <- NULL
} else if(data.type == "sample.frequencies"){
if(is.null(mean.sample.sizes)){
stop("you must specify the mean population sample sizes")
}
sample.frequencies <- curate.frequency.data(sample.frequencies)
mean.centered.normalized.frequencies <- mean.center.normalize.frequencies(sample.frequencies,mean.sample.sizes,prefix)
sample.covariance <- cov(t(mean.centered.normalized.frequencies))
loci <- ncol(mean.centered.normalized.frequencies)
} else if(data.type == "counts"){
curated.count.data.list <- curate.count.data(counts,sample.sizes,prefix)
sample.frequencies <- curate.frequency.data(curated.count.data.list$sample.frequencies)
mean.sample.sizes <- curated.count.data.list$mean.sample.sizes
mean.centered.normalized.frequencies <- mean.center.normalize.frequencies(curated.count.data.list$sample.frequencies,curated.count.data.list$mean.sample.sizes,prefix)
sample.covariance <- cov(t(mean.centered.normalized.frequencies))
loci <- ncol(mean.centered.normalized.frequencies)
}
projection.matrix <- get.projection.matrix(mean.sample.sizes,proj.mat.option,likelihood.option)
sample.covariance <- project.sample.covariance(sample.covariance,projection.matrix,likelihood.option)
sd <- NULL
index.matrix <- NULL
if(likelihood.option == "normal_approx"){
if(is.null(mean.centered.normalized.frequencies) && is.null(cov.standard.error)){
stop("you must either specify the standard error of the sample covariance\n provide SpaceMix with the allele counts or frequencies, so that it can calculate it for itself.")
}
if(is.null(cov.standard.error)){
sd <- get.cov.standard.error(mean.centered.normalized.frequencies)
}
index.matrix <- upper.tri(sd,diag=TRUE)
sd <- sd[index.matrix]
sample.covariance <- sample.covariance[index.matrix]
}
spacemix.data <- list( "sample.frequencies" = sample.frequencies,
"mean.centered.normalized.frequencies" = mean.centered.normalized.frequencies,
"inv.mean.sample.sizes" = 1/mean.sample.sizes,
"projection.matrix" = projection.matrix,
"sample.covariance" = sample.covariance,
"loci" = loci,
"sd" = sd,
"index.matrix" = index.matrix)
save(spacemix.data,file=paste(prefix,"_spacemix.data.Robj",sep=''))
return(spacemix.data)
}
propose.new.location.plane <- function(long,lat,dist.std){
coords_prime <- c(long,lat) + rnorm(n = 2, mean = 0, sd = dist.std)
return(coords_prime)
}
sphere.hop <- function(long,lat,distance,bearing){
new.lat <- asin(sin(lat) * cos(distance) + cos(lat) * sin(distance) * cos(bearing))
dlong <- atan2(sin(bearing)*sin(distance)*cos(lat),cos(distance)-sin(lat)*sin(new.lat))
new.long <- (long - dlong + pi) %% (2*pi) - pi
return(cbind(new.long,new.lat))
}
propose.new.location.sphere <- function(long,lat,dist.std){
long <- degrees2radians(long)
lat <- degrees2radians(lat)
proposed.jump <- abs(rnorm(1,0,dist.std))
jump.bearing <- runif(1,0,2*pi)
coords_prime <- sphere.hop(long=long, lat=lat, distance=proposed.jump, bearing=jump.bearing)
coords_prime <- radians2degrees(coords_prime)
return(coords_prime)
}
degrees2radians <- function(degrees){
radians <- (pi/180) * degrees
return(radians)
}
radians2degrees <- function(radians){
degrees <- (180/pi) * radians
return(degrees)
}
initiate.admix.proportions <- function(k,model.option){
if(model.option == "no_movement"){
admix.proportions <- numeric(k)
prior_prob_admix_proportions <- 0
} else if(model.option == "target"){
admix.proportions <- numeric(k)
prior_prob_admix_proportions <- 0
} else if(model.option == "source"){
admix.proportions <- matrix(rbeta(k,shape1=1,shape2=50),nrow=k,ncol=1) / 2
prior_prob_admix_proportions <- Prior_prob_admix_proportions(admix.proportions)
} else if(model.option == "source_and_target"){
admix.proportions <- matrix(rbeta(k,shape1=1,shape2=50),nrow=k,ncol=1) / 2
prior_prob_admix_proportions <- Prior_prob_admix_proportions(admix.proportions)
}
initiate.admix.proportions.list <- list("admix.proportions" = admix.proportions,
"prior_prob_admix_proportions" = prior_prob_admix_proportions)
return(initiate.admix.proportions.list)
}
initiate.population.coordinates <- function(spatial.prior.X.coordinates,spatial.prior.Y.coordinates,k){
# recover()
population.coordinates <- rbind(
cbind(spatial.prior.X.coordinates,
spatial.prior.Y.coordinates),
cbind( runif(k,
min = min(spatial.prior.X.coordinates),
max = max(spatial.prior.X.coordinates)),
runif(k,
min = min(spatial.prior.Y.coordinates),
max = max(spatial.prior.Y.coordinates)))
)
return(population.coordinates)
}
save.initial.parameters <- function(a0,a1,a2,nugget,admix.proportions,covariance,admixed.covariance,transformed.covariance,population.coordinates,D,projection.matrix,inv.mean.sample.sizes,prefix){
initial.parameters <- list("a0" = a0,"a1" = a1,"a2" = a2,"nugget" = nugget,"admix.proportions" = admix.proportions,
"covariance" = covariance,"admixed.covariance" = admixed.covariance,"transformed.covariance" = transformed.covariance,
"population.coordinates" = population.coordinates,"D" = D,"projection.matrix" = projection.matrix,"inv.mean.sample.sizes"=inv.mean.sample.sizes)
save(initial.parameters,file=paste(prefix,"_Initial.parameters.Robj",sep=''))
return(0)
}
initiate.update.function.list <- function(model.option){
if(model.option == "no_movement"){
Updates <- list(Update_a0,Update_a1,Update_a2,Update_nugget)
} else if(model.option == "target"){
Updates <- list(Update_a0,Update_a1,Update_a2,Update_nugget,Update_admixture_target_location)
} else if(model.option == "source"){
Updates <- list(Update_a0,Update_a1,Update_a2,Update_nugget,Update_admixture_source_location,Update_admixture_proportions)
} else if(model.option == "source_and_target"){
Updates <- list(Update_a0,Update_a1,Update_a2,Update_nugget,Update_admixture_target_location,Update_admixture_source_location,Update_admixture_proportions)
}
return(Updates)
}
initiate.lstps.list <- function(model.option,k,ngen,samplefreq,for.last.params){
if(!for.last.params){
if(model.option == "no_movement"){
lstps <- list("a0_lstp" = numeric(ngen/samplefreq),
"a1_lstp" = numeric(ngen/samplefreq),
"a2_lstp" = numeric(ngen/samplefreq),
"nugget_lstp" = matrix(0,nrow=k,ncol=(ngen/samplefreq)))
} else if(model.option == "target"){
lstps <- list("a0_lstp" = numeric(ngen/samplefreq),
"a1_lstp" = numeric(ngen/samplefreq),
"a2_lstp" = numeric(ngen/samplefreq),
"nugget_lstp" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_target_location_lstp" = matrix(0,nrow=k,ncol=(ngen/samplefreq)))
} else if(model.option == "source"){
lstps <- list("a0_lstp" = numeric(ngen/samplefreq),
"a1_lstp" = numeric(ngen/samplefreq),
"a2_lstp" = numeric(ngen/samplefreq),
"nugget_lstp" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_source_location_lstp" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_proportions_lstp" = matrix(0,nrow=k,ncol=(ngen/samplefreq)))
} else if(model.option == "source_and_target"){
lstps <- list("a0_lstp" = numeric(ngen/samplefreq),
"a1_lstp" = numeric(ngen/samplefreq),
"a2_lstp" = numeric(ngen/samplefreq),
"nugget_lstp" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_target_location_lstp" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_source_location_lstp" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_proportions_lstp" = matrix(0,nrow=k,ncol=(ngen/samplefreq)))
}
} else {
if(model.option == "no_movement"){
lstps <- list("a0_lstp" = 0,
"a1_lstp" = 0,
"a2_lstp" = 0,
"nugget_lstp" = numeric(k))
} else if(model.option == "target"){
lstps <- list("a0_lstp" = 0,
"a1_lstp" = 0,
"a2_lstp" = 0,
"nugget_lstp" = numeric(k),
"admix_target_location_lstp" = numeric(k))
} else if(model.option == "source"){
lstps <- list("a0_lstp" = 0,
"a1_lstp" = 0,
"a2_lstp" = 0,
"nugget_lstp" = numeric(k),
"admix_source_location_lstp" = numeric(k),
"admix_proportions_lstp" = numeric(k))
} else if(model.option == "source_and_target"){
lstps <- list("a0_lstp" = 0,
"a1_lstp" = 0,
"a2_lstp" = 0,
"nugget_lstp" = numeric(k),
"admix_target_location_lstp" = numeric(k),
"admix_source_location_lstp" = numeric(k),
"admix_proportions_lstp" = numeric(k))
}
}
return(lstps)
}
initiate.diagns.list <- function(model.option,k,mixing.diagn.freq){
if(model.option == "no_movement"){
diagns <- list("a0_diagn" = numeric(mixing.diagn.freq),
"a1_diagn" = numeric(mixing.diagn.freq),
"a2_diagn" = numeric(mixing.diagn.freq),
"nugget_diagn" = matrix(0,nrow=k,ncol=mixing.diagn.freq))
} else if(model.option == "target"){
diagns <- list("a0_diagn" = numeric(mixing.diagn.freq),
"a1_diagn" = numeric(mixing.diagn.freq),
"a2_diagn" = numeric(mixing.diagn.freq),
"nugget_diagn" = matrix(0,nrow=k,ncol=(mixing.diagn.freq)),
"admix_target_location_diagn" = matrix(0,nrow=k,ncol=mixing.diagn.freq))
} else if(model.option == "source"){
diagns <- list("a0_diagn" = numeric(mixing.diagn.freq),
"a1_diagn" = numeric(mixing.diagn.freq),
"a2_diagn" = numeric(mixing.diagn.freq),
"nugget_diagn" = matrix(0,nrow=k,ncol=mixing.diagn.freq),
"admix_source_location_diagn" = matrix(0,nrow=k,ncol=mixing.diagn.freq),
"admix_proportions_diagn" = matrix(0,nrow=k,ncol=mixing.diagn.freq))
} else if(model.option == "source_and_target"){
diagns <- list("a0_diagn" = numeric(mixing.diagn.freq),
"a1_diagn" = numeric(mixing.diagn.freq),
"a2_diagn" = numeric(mixing.diagn.freq),
"nugget_diagn" = matrix(0,nrow=k,ncol=mixing.diagn.freq),
"admix_target_location_diagn" = matrix(0,nrow=k,ncol=mixing.diagn.freq),
"admix_source_location_diagn" = matrix(0,nrow=k,ncol=mixing.diagn.freq),
"admix_proportions_diagn" = matrix(0,nrow=k,ncol=mixing.diagn.freq))
}
return(diagns)
}
initiate.moves.list <- function(model.option,k){
if(model.option == "no_movement"){
moves <- list("a0_moves" = 0,
"a1_moves" = 0,
"a2_moves" = 0,
"nugget_moves" = numeric(k))
} else if(model.option == "target"){
moves <- list("a0_moves" = 0,
"a1_moves" = 0,
"a2_moves" = 0,
"nugget_moves" = numeric(k),
"admix_target_location_moves" = numeric(k))
} else if(model.option == "source"){
moves <- list("a0_moves" = 0,
"a1_moves" = 0,
"a2_moves" = 0,
"nugget_moves" = numeric(k),
"admix_source_location_moves" = numeric(k),
"admix_proportions_moves" = numeric(k))
} else if(model.option == "source_and_target"){
moves <- list("a0_moves" = 0,
"a1_moves" = 0,
"a2_moves" = 0,
"nugget_moves" = numeric(k),
"admix_target_location_moves" = numeric(k),
"admix_source_location_moves" = numeric(k),
"admix_proportions_moves" = numeric(k))
}
return(moves)
}
initiate.accept.list <- function(model.option,k){
if(model.option == "no_movement"){
accept <- list("a0_accept" = 0,
"a1_accept" = 0,
"a2_accept" = 0,
"nugget_accept" = numeric(k))
} else if(model.option == "target"){
accept <- list("a0_accept" = 0,
"a1_accept" = 0,
"a2_accept" = 0,
"nugget_accept" = numeric(k),
"admix_target_location_accept" = numeric(k))
} else if(model.option == "source"){
accept <- list("a0_accept" = 0,
"a1_accept" = 0,
"a2_accept" = 0,
"nugget_accept" = numeric(k),
"admix_source_location_accept" = numeric(k),
"admix_proportions_accept" = numeric(k))
} else if(model.option == "source_and_target"){
accept <- list("a0_accept" = 0,
"a1_accept" = 0,
"a2_accept" = 0,
"nugget_accept" = numeric(k),
"admix_target_location_accept" = numeric(k),
"admix_source_location_accept" = numeric(k),
"admix_proportions_accept" = numeric(k))
}
return(accept)
}
initiate.accept.rates.list <- function(model.option,k,ngen,samplefreq,for.last.params){
if(!for.last.params){
if(model.option == "no_movement"){
accept_rates <- list("a0_accept_rate" = numeric(ngen/samplefreq),
"a1_accept_rate" = numeric(ngen/samplefreq),
"a2_accept_rate" = numeric(ngen/samplefreq),
"nugget_accept_rate" = matrix(0,nrow=k,ncol=(ngen/samplefreq)))
} else if(model.option == "target"){
accept_rates <- list("a0_accept_rate" = numeric(ngen/samplefreq),
"a1_accept_rate" = numeric(ngen/samplefreq),
"a2_accept_rate" = numeric(ngen/samplefreq),
"nugget_accept_rate" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_target_location_accept_rate" = matrix(0,nrow=k,ncol=(ngen/samplefreq)))
} else if(model.option == "source"){
accept_rates <- list("a0_accept_rate" = numeric(ngen/samplefreq),
"a1_accept_rate" = numeric(ngen/samplefreq),
"a2_accept_rate" = numeric(ngen/samplefreq),
"nugget_accept_rate" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_source_location_accept_rate" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_proportions_accept_rate" = matrix(0,nrow=k,ncol=(ngen/samplefreq)))
} else if(model.option == "source_and_target"){
accept_rates <- list("a0_accept_rate" = numeric(ngen/samplefreq),
"a1_accept_rate" = numeric(ngen/samplefreq),
"a2_accept_rate" = numeric(ngen/samplefreq),
"nugget_accept_rate" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_target_location_accept_rate" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_source_location_accept_rate" = matrix(0,nrow=k,ncol=(ngen/samplefreq)),
"admix_proportions_accept_rate" = matrix(0,nrow=k,ncol=(ngen/samplefreq)))
}
} else {
if(model.option == "no_movement"){
accept_rates <- list("a0_accept_rate" = 0,
"a1_accept_rate" = 0,
"a2_accept_rate" = 0,
"nugget_accept_rate" = numeric(k))
} else if(model.option == "target"){
accept_rates <- list("a0_accept_rate" = 0,
"a1_accept_rate" = 0,
"a2_accept_rate" = 0,
"nugget_accept_rate" = numeric(k),
"admix_target_location_accept_rate" = numeric(k))
} else if(model.option == "source"){
accept_rates <- list("a0_accept_rate" = 0,
"a1_accept_rate" = 0,
"a2_accept_rate" = 0,
"nugget_accept_rate" = numeric(k),
"admix_source_location_accept_rate" = numeric(k),
"admix_proportions_accept_rate" = numeric(k))
} else if(model.option == "source_and_target"){
accept_rates <- list("a0_accept_rate" = 0,
"a1_accept_rate" = 0,
"a2_accept_rate" = 0,
"nugget_accept_rate" = numeric(k),
"admix_target_location_accept_rate" = numeric(k),
"admix_source_location_accept_rate" = numeric(k),
"admix_proportions_accept_rate" = numeric(k))
}
}
return(accept_rates)
}
print.mcmc.update <- function(LnL_freqs,prior_prob_admix_proportions,prior_prob_nugget,prior_prob_alpha0,prior_prob_alpha1,prior_prob_alpha2,prior_prob_admix_target_locations,prior_prob_admix_source_locations){
P <- LnL_freqs + prior_prob_admix_proportions + prior_prob_nugget + prior_prob_alpha0 + prior_prob_alpha1 + prior_prob_alpha2 + prior_prob_admix_target_locations + prior_prob_admix_source_locations
return(P)
}
make.update.sampled.accept.rates.function <- function(model.option){
if(model.option == "no_movement"){
update.sampled.accept.rates <<- function(accept_rates,j,new.params){
accept_rates$a0_accept_rate[j] <- new.params$accept_rates$a0_accept_rate
accept_rates$a1_accept_rate[j] <- new.params$accept_rates$a1_accept_rate
accept_rates$a2_accept_rate[j] <- new.params$accept_rates$a2_accept_rate
accept_rates$nugget_accept_rate[,j] <- new.params$accept_rates$nugget_accept_rate
return(accept_rates)
}
} else if(model.option == "target"){
update.sampled.accept.rates <<- function(accept_rates,j,new.params){
accept_rates$a0_accept_rate[j] <- new.params$accept_rates$a0_accept_rate
accept_rates$a1_accept_rate[j] <- new.params$accept_rates$a1_accept_rate
accept_rates$a2_accept_rate[j] <- new.params$accept_rates$a2_accept_rate
accept_rates$nugget_accept_rate[,j] <- new.params$accept_rates$nugget_accept_rate
accept_rates$admix_target_location_accept_rate[,j] <- new.params$accept_rates$admix_target_location_accept_rate
return(accept_rates)
}
} else if(model.option == "source"){
update.sampled.accept.rates <<- function(accept_rates,j,new.params){
accept_rates$a0_accept_rate[j] <- new.params$accept_rates$a0_accept_rate
accept_rates$a1_accept_rate[j] <- new.params$accept_rates$a1_accept_rate
accept_rates$a2_accept_rate[j] <- new.params$accept_rates$a2_accept_rate
accept_rates$nugget_accept_rate[,j] <- new.params$accept_rates$nugget_accept_rate
accept_rates$admix_source_location_accept_rate[,j] <- new.params$accept_rates$admix_source_location_accept_rate
accept_rates$admix_proportions_accept_rate[,j] <- new.params$accept_rates$admix_proportions_accept_rate
return(accept_rates)
}
} else if(model.option == "source_and_target"){
update.sampled.accept.rates <<- function(accept_rates,j,new.params){
accept_rates$a0_accept_rate[j] <- new.params$accept_rates$a0_accept_rate
accept_rates$a1_accept_rate[j] <- new.params$accept_rates$a1_accept_rate
accept_rates$a2_accept_rate[j] <- new.params$accept_rates$a2_accept_rate
accept_rates$nugget_accept_rate[,j] <- new.params$accept_rates$nugget_accept_rate
accept_rates$admix_target_location_accept_rate[,j] <- new.params$accept_rates$admix_target_location_accept_rate
accept_rates$admix_source_location_accept_rate[,j] <- new.params$accept_rates$admix_source_location_accept_rate
accept_rates$admix_proportions_accept_rate[,j] <- new.params$accept_rates$admix_proportions_accept_rate
return(accept_rates)
}
}
return(0)
}
make.update.sampled.lstps.function <- function(model.option){
if(model.option == "no_movement"){
update.sampled.lstps <<- function(lstps,j,new.params){
lstps$a0_lstp[j] <- new.params$lstps$a0_lstp
lstps$a1_lstp[j] <- new.params$lstps$a1_lstp
lstps$a2_lstp[j] <- new.params$lstps$a2_lstp
lstps$nugget_lstp[,j] <- new.params$lstps$nugget_lstp
return(lstps)
}
} else if(model.option == "target"){
update.sampled.lstps <<- function(lstps,j,new.params){
lstps$a0_lstp[j] <- new.params$lstps$a0_lstp
lstps$a1_lstp[j] <- new.params$lstps$a1_lstp
lstps$a2_lstp[j] <- new.params$lstps$a2_lstp
lstps$nugget_lstp[,j] <- new.params$lstps$nugget_lstp
lstps$admix_target_location_lstp[,j] <- new.params$lstps$admix_target_location_lstp
return(lstps)
}
} else if(model.option == "source"){
update.sampled.lstps <<- function(lstps,j,new.params){
lstps$a0_lstp[j] <- new.params$lstps$a0_lstp
lstps$a1_lstp[j] <- new.params$lstps$a1_lstp
lstps$a2_lstp[j] <- new.params$lstps$a2_lstp
lstps$nugget_lstp[,j] <- new.params$lstps$nugget_lstp
lstps$admix_source_location_lstp[,j] <- new.params$lstps$admix_source_location_lstp
lstps$admix_proportions_lstp[,j] <- new.params$lstps$admix_proportions_lstp
return(lstps)
}
} else if(model.option == "source_and_target"){
update.sampled.lstps <<- function(lstps,j,new.params){
lstps$a0_lstp[j] <- new.params$lstps$a0_lstp
lstps$a1_lstp[j] <- new.params$lstps$a1_lstp
lstps$a2_lstp[j] <- new.params$lstps$a2_lstp
lstps$nugget_lstp[,j] <- new.params$lstps$nugget_lstp
lstps$admix_target_location_lstp[,j] <- new.params$lstps$admix_target_location_lstp
lstps$admix_source_location_lstp[,j] <- new.params$lstps$admix_source_location_lstp
lstps$admix_proportions_lstp[,j] <- new.params$lstps$admix_proportions_lstp
return(lstps)
}
}
return(0)
}
get.diagn.step <- function(generation,mixing.diagn.freq){
diagn.step <- generation%%mixing.diagn.freq
if(diagn.step == 0){
diagn.step <- mixing.diagn.freq
}
return(diagn.step)
}
make.update.diagns.function <- function(model.option){
if(model.option == "no_movement"){
update.diagns <<- function(diagns,generation,mixing.diagn.freq,accept_rates){
diagn.step <- get.diagn.step(generation,mixing.diagn.freq)
diagns$a0_diagn[diagn.step] <- accept_rates$a0_accept_rate
diagns$a1_diagn[diagn.step] <- accept_rates$a1_accept_rate
diagns$a2_diagn[diagn.step] <- accept_rates$a2_accept_rate
diagns$nugget_diagn[,diagn.step] <- accept_rates$nugget_accept_rate
return(diagns)
}
} else if(model.option == "target"){
update.diagns <<- function(diagns,generation,mixing.diagn.freq,accept_rates){
diagn.step <- get.diagn.step(generation,mixing.diagn.freq)
diagns$a0_diagn[diagn.step] <- accept_rates$a0_accept_rate
diagns$a1_diagn[diagn.step] <- accept_rates$a1_accept_rate
diagns$a2_diagn[diagn.step] <- accept_rates$a2_accept_rate
diagns$nugget_diagn[,diagn.step] <- accept_rates$nugget_accept_rate
diagns$admix_target_location_diagn[,diagn.step] <- accept_rates$admix_target_location_accept_rate
return(diagns)
}
} else if(model.option == "source"){
update.diagns <<- function(diagns,generation,mixing.diagn.freq,accept_rates){
diagn.step <- get.diagn.step(generation,mixing.diagn.freq)
diagns$a0_diagn[diagn.step] <- accept_rates$a0_accept_rate
diagns$a1_diagn[diagn.step] <- accept_rates$a1_accept_rate
diagns$a2_diagn[diagn.step] <- accept_rates$a2_accept_rate
diagns$nugget_diagn[,diagn.step] <- accept_rates$nugget_accept_rate
diagns$admix_source_location_diagn[,diagn.step] <- accept_rates$admix_source_location_accept_rate
diagns$admix_proportions_diagn[,diagn.step] <- accept_rates$admix_proportions_accept_rate
return(diagns)
}
} else if(model.option == "source_and_target"){
update.diagns <<- function(diagns,generation,mixing.diagn.freq,accept_rates){
diagn.step <- get.diagn.step(generation,mixing.diagn.freq)
diagns$a0_diagn[diagn.step] <- accept_rates$a0_accept_rate
diagns$a1_diagn[diagn.step] <- accept_rates$a1_accept_rate
diagns$a2_diagn[diagn.step] <- accept_rates$a2_accept_rate
diagns$nugget_diagn[,diagn.step] <- accept_rates$nugget_accept_rate
diagns$admix_target_location_diagn[,diagn.step] <- accept_rates$admix_target_location_accept_rate
diagns$admix_source_location_diagn[,diagn.step] <- accept_rates$admix_source_location_accept_rate
diagns$admix_proportions_diagn[,diagn.step] <- accept_rates$admix_proportions_accept_rate
return(diagns)
}
}
return(0)
}
update.lstp <- function(n,lstp,acceptance.fraction){
if(length(lstp) > 1){
for(i in 1:length(lstp)){
if(acceptance.fraction[i] > 0.44){
lstp[i] <- min(lstp[i] + min(0.01,n^(-0.5)),20)
} else if(acceptance.fraction[i] < 0.44){
lstp[i] <- lstp[i] - min(0.01,n^(-0.5))
}
}
} else {
if(acceptance.fraction > 0.44){
lstp <- min(lstp + min(0.01,n^(-0.5)),20)
} else if(acceptance.fraction < 0.44){
lstp <- lstp - min(0.01,n^(-0.5))
}
}
return(lstp)
}
make.update.all.lstps.function <- function(model.option){
if(model.option == "no_movement"){
update.all.lstps <<- function(lstps,n,diagns){
lstps$a0_lstp <- update.lstp(n,lstps$a0_lstp,mean(diagns$a0_diagn))
lstps$a1_lstp <- update.lstp(n,lstps$a1_lstp,mean(diagns$a1_diagn))
lstps$a2_lstp <- update.lstp(n,lstps$a2_lstp,mean(diagns$a2_diagn))
lstps$nugget_lstp <- update.lstp(n,lstps$nugget_lstp,rowMeans(diagns$nugget_diagn))
return(lstps)
}
} else if(model.option == "target"){
update.all.lstps <<- function(lstps,n,diagns){
lstps$a0_lstp <- update.lstp(n,lstps$a0_lstp,mean(diagns$a0_diagn))
lstps$a1_lstp <- update.lstp(n,lstps$a1_lstp,mean(diagns$a1_diagn))
lstps$a2_lstp <- update.lstp(n,lstps$a2_lstp,mean(diagns$a2_diagn))
lstps$nugget_lstp <- update.lstp(n,lstps$nugget_lstp,rowMeans(diagns$nugget_diagn))
lstps$admix_target_location_lstp <- update.lstp(n,lstps$admix_target_location_lstp,rowMeans(diagns$admix_target_location_diagn))
return(lstps)
}
} else if(model.option == "source"){
update.all.lstps <<- function(lstps,n,diagns){
lstps$a0_lstp <- update.lstp(n,lstps$a0_lstp,mean(diagns$a0_diagn))
lstps$a1_lstp <- update.lstp(n,lstps$a1_lstp,mean(diagns$a1_diagn))
lstps$a2_lstp <- update.lstp(n,lstps$a2_lstp,mean(diagns$a2_diagn))
lstps$nugget_lstp <- update.lstp(n,lstps$nugget_lstp,rowMeans(diagns$nugget_diagn))
lstps$admix_source_location_lstp <- update.lstp(n,lstps$admix_source_location_lstp,rowMeans(diagns$admix_source_location_diagn))
lstps$admix_proportions_lstp <- update.lstp(n,lstps$admix_proportions_lstp,rowMeans(diagns$admix_proportions_diagn))
return(lstps)
}
} else if(model.option == "source_and_target"){
update.all.lstps <<- function(lstps,n,diagns){
lstps$a0_lstp <- update.lstp(n,lstps$a0_lstp,mean(diagns$a0_diagn))
lstps$a1_lstp <- update.lstp(n,lstps$a1_lstp,mean(diagns$a1_diagn))
lstps$a2_lstp <- update.lstp(n,lstps$a2_lstp,mean(diagns$a2_diagn))
lstps$nugget_lstp <- update.lstp(n,lstps$nugget_lstp,rowMeans(diagns$nugget_diagn))
lstps$admix_target_location_lstp <- update.lstp(n,lstps$admix_target_location_lstp,rowMeans(diagns$admix_target_location_diagn))
lstps$admix_source_location_lstp <- update.lstp(n,lstps$admix_source_location_lstp,rowMeans(diagns$admix_source_location_diagn))
lstps$admix_proportions_lstp <- update.lstp(n,lstps$admix_proportions_lstp,rowMeans(diagns$admix_proportions_diagn))
return(lstps)
}
}
return(0)
}
initiate.last.params <- function(model.option,likelihood.option,samplefreq,ngen,spacemix.data,population.coordinates,admix.proportions,a0,a1,a2,nugget,covariance,admixed.covariance,transformed.covariance,
k,LnL_freqs,prior_prob_alpha0,prior_prob_alpha1,prior_prob_alpha2,prior_prob_nugget,prior_prob_admix_proportions,prior_prob_admix_target_locations,prior_prob_admix_source_locations,
D,spatial.prior.X.coordinates,spatial.prior.Y.coordinates,target.spatial.prior.scale,source.spatial.prior.scale,centroid){
last.params <- list("sample.covariance" = spacemix.data$sample.covariance,
"projection.matrix" = spacemix.data$projection.matrix,
"population.coordinates" = population.coordinates,
"admix.proportions" = admix.proportions,
"a0" = a0,"a1" = a1,"a2" = a2,"nugget" = nugget,
"covariance" = covariance,"admixed.covariance" = admixed.covariance,
"transformed.covariance" = transformed.covariance,
"lstps" = initiate.lstps.list(model.option,k,ngen,samplefreq,for.last.params=TRUE),
"k" = k,"LnL_freqs" = LnL_freqs,
"prior_prob_alpha0" = prior_prob_alpha0,
"prior_prob_alpha1" = prior_prob_alpha1,
"prior_prob_alpha2" = prior_prob_alpha2,
"prior_prob_nugget" = prior_prob_nugget,
"prior_prob_admix_proportions" = prior_prob_admix_proportions,
"prior_prob_admix_target_locations" = prior_prob_admix_target_locations,
"prior_prob_admix_source_locations" = prior_prob_admix_source_locations,
"accept_rates" = initiate.accept.rates.list(model.option,k,ngen,samplefreq,for.last.params=TRUE),
"moves" = initiate.moves.list(model.option,k),"accept" = initiate.accept.list(model.option,k),
"loci" = spacemix.data$loci,"D" = D,
"inv.mean.sample.sizes" = spacemix.data$inv.mean.sample.sizes,
"spatial.prior.X.coordinates" = spatial.prior.X.coordinates,
"spatial.prior.Y.coordinates" = spatial.prior.Y.coordinates,
"target.spatial.prior.scale" = target.spatial.prior.scale,
"source.spatial.prior.scale" = source.spatial.prior.scale,
"centroid" = centroid,
"sd" = spacemix.data$sd,
"index.matrix" = spacemix.data$index.matrix,
"likelihood.option" = likelihood.option,
"identity.matrix" = diag(k))
return(last.params)
}
MCMC <- function(model.option,
data.type,
sample.frequencies = NULL,
mean.sample.sizes = NULL,
counts = NULL,
sample.sizes = NULL,
sample.covariance = NULL,
target.spatial.prior.scale = NULL,
source.spatial.prior.scale = NULL,
spatial.prior.X.coordinates,
spatial.prior.Y.coordinates,
initial.parameters = NULL,
round.earth,
k,
loci,
ngen,
printfreq,
samplefreq,
mixing.diagn.freq = 50,
savefreq,
prefix=""){
# recover()
proj.mat.option <- NULL
likelihood.option <- "wishart"
spacemix.data <- spacemix.data(data.type = data.type,
proj.mat.option = proj.mat.option,
likelihood.option = likelihood.option,
sample.frequencies = sample.frequencies,
loci = loci,
mean.sample.sizes = mean.sample.sizes,
counts = counts,
sample.sizes = sample.sizes,
sample.covariance = sample.covariance,
prefix = prefix)
if(round.earth){
propose.new.location <<- propose.new.location.sphere
spacemix.dist <<- fields::rdist.earth
} else {
propose.new.location <<- propose.new.location.plane
spacemix.dist <<- fields::rdist
}
#Declare variables
LnL_freqs <- numeric(ngen/samplefreq)
Prob <- numeric(ngen/samplefreq)
population.coordinates <- vector("list",ngen/samplefreq)
distances <- vector("list",ngen/samplefreq)
transformed.covariance.list <- vector("list",ngen/samplefreq)
admix.proportions <- matrix(0,nrow=k,ncol=ngen/samplefreq)
nugget <- matrix(0,nrow=k,ncol=ngen/samplefreq)
a0 <- numeric(ngen/samplefreq)
a1 <- numeric(ngen/samplefreq)
a2 <- numeric(ngen/samplefreq)
accept_rates <- initiate.accept.rates.list(model.option,k,ngen,samplefreq,for.last.params=FALSE)
lstps <- initiate.lstps.list(model.option,k,ngen,samplefreq,for.last.params=FALSE)
diagns <- initiate.diagns.list(model.option,k,mixing.diagn.freq)
seed <- sample(0:1e6,1)
save(seed,file=paste(prefix,"_seed.Robj",sep=''))
set.seed(seed)
# if(!continue) {
#INITIALIZE MCMC
Prob[1] <- -Inf
covariance <- matrix(0,nrow=k*2,ncol=k*2)
admixed.covariance <- matrix(0,nrow=k,ncol=k)
transformed.covariance <- transformed.Covariance(admixed.covariance,spacemix.data$projection.matrix)
badness.counter <- 0
while(!is.finite(Prob[1]) && badness.counter < 100){
if(!is.null(initial.parameters$nugget)){
nugget[,1] <- initial.parameters$nugget
} else { nugget[,1] <- rexp(k) }
if(!is.null(initial.parameters$a0)){
a0[1] <- initial.parameters$a0
} else { a0[1] <- rexp(1,1/100) }
if(!is.null(initial.parameters$a1)){
a1[1] <- initial.parameters$a1
} else { a1[1] <- rexp(1,1) }
if(!is.null(initial.parameters$a2)){
a2[1] <- initial.parameters$a2
} else { a2[1] <- runif(1,0.1,2) }
if(!is.null(initial.parameters$population.coordinates)){
population.coordinates[[1]] <- initial.parameters$population.coordinates
} else { population.coordinates[[1]] <- initiate.population.coordinates(spatial.prior.X.coordinates,spatial.prior.Y.coordinates,k) }
if(!is.null(initial.parameters$admix.proportions)){
admix.proportions[,1] <- initial.parameters$admix.proportions
prior_prob_admix_proportions <- Prior_prob_admix_proportions(admix.proportions[,1])
} else { initiate.admix.proportions.list <- initiate.admix.proportions(k,model.option)
admix.proportions[,1] <- initiate.admix.proportions.list$admix.proportions
prior_prob_admix_proportions <- initiate.admix.proportions.list$prior_prob_admix_proportions}
distances[[1]] <- spacemix.dist(population.coordinates[[1]])
centroid <- c(mean(spatial.prior.X.coordinates),mean(spatial.prior.Y.coordinates))
if(is.null(target.spatial.prior.scale)){
target.spatial.prior.scale <- mean(spacemix.dist(cbind(spatial.prior.X.coordinates,spatial.prior.Y.coordinates))) / 2
}
if(is.null(source.spatial.prior.scale)){
source.spatial.prior.scale <- mean(spacemix.dist(cbind(spatial.prior.X.coordinates,spatial.prior.Y.coordinates))) * 2
}
covariance <- Covariance(a0[1],a1[1],a2[1],distances[[1]])
admixed.covariance <- admixed.Covariance(covariance,admix.proportions[,1],nugget[,1],k,spacemix.data$inv.mean.sample.sizes,diag(k))
transformed.covariance <- transformed.Covariance(admixed.covariance,spacemix.data$projection.matrix)
tmp <- save.initial.parameters(a0[1],a1[1],a2[1],nugget[,1],admix.proportions[,1],covariance,admixed.covariance,transformed.covariance,population.coordinates[[1]],distances[[1]],spacemix.data$projection.matrix,spacemix.data$inv.mean.sample.sizes,prefix)
LnL_freqs[1] <- likelihood(likelihood.option,spacemix.data$sample.covariance, transformed.covariance,spacemix.data$index.matrix,spacemix.data$sd,spacemix.data$loci)
cat("LnL: ",LnL_freqs[1],"\n")
prior_prob_alpha0 <- Prior_prob_alpha0(a0[1])
cat("Pr(a0): ",prior_prob_alpha0,"\n")
prior_prob_alpha1 <- Prior_prob_alpha1(a1[1])
cat("Pr(a1): ",prior_prob_alpha1,"\n")
prior_prob_alpha2 <- Prior_prob_alpha2(a2[1])
cat("Pr(a2): ",prior_prob_alpha2,"\n")
prior_prob_nugget <- Prior_prob_nugget(nugget[,1])
cat("Pr(nugget): ",prior_prob_nugget,"\n")
prior_prob_admix_target_locations <- Prior_prob_admix_target_locations(population.coordinates[[1]][1:k,],spatial.prior.X.coordinates,spatial.prior.Y.coordinates,target.spatial.prior.scale)
cat("Pr(admix_target_locations): ",prior_prob_admix_target_locations,"\n")
prior_prob_admix_source_locations <- Prior_prob_admix_source_locations(population.coordinates[[1]][(k+1):(2*k),],centroid,source.spatial.prior.scale)
cat("Pr(admix_source_locations): ",prior_prob_admix_source_locations,"\n")
cat("Pr(admix_proportions): ",prior_prob_admix_proportions,"\n")
Prob[1] <- LnL_freqs[1] + prior_prob_admix_proportions + prior_prob_nugget + prior_prob_alpha0 + prior_prob_alpha1 + prior_prob_alpha2 + prior_prob_admix_target_locations + prior_prob_admix_source_locations
cat("Prob: ",Prob[1],"\n")
badness.counter <- badness.counter + 1
if(badness.counter > 99){
if(!is.finite(Prob[1])){
stop("Initial probability of model is NEGATIVE INFINITY! Please attempt to initiate chain again.")
} else {
stop("Something has gone wrong with the attempt to initialize the MCMC! Please take a close look at your data and try again")
}
}
}
last.params <- initiate.last.params(model.option = model.option,likelihood.option = likelihood.option,samplefreq = samplefreq,ngen = ngen,spacemix.data = spacemix.data,population.coordinates = population.coordinates[[1]],admix.proportions = admix.proportions[,1],
a0[1],a1[1],a2[1],nugget[,1],covariance,admixed.covariance,transformed.covariance,k = k,LnL_freqs = LnL_freqs[1],
prior_prob_alpha0 = prior_prob_alpha0,prior_prob_alpha1 = prior_prob_alpha1,prior_prob_alpha2 = prior_prob_alpha2,
prior_prob_nugget = prior_prob_nugget,prior_prob_admix_proportions = prior_prob_admix_proportions,
prior_prob_admix_target_locations = prior_prob_admix_target_locations,prior_prob_admix_source_locations = prior_prob_admix_source_locations,
D = distances[[1]],spatial.prior.X.coordinates = spatial.prior.X.coordinates,spatial.prior.Y.coordinates = spatial.prior.Y.coordinates,
target.spatial.prior.scale = target.spatial.prior.scale,source.spatial.prior.scale = source.spatial.prior.scale,
centroid = centroid)
last.ngen <- 0
tmp <- make.update.sampled.accept.rates.function(model.option)
tmp <- make.update.sampled.lstps.function(model.option)
tmp <- make.update.diagns.function(model.option)
tmp <- make.update.all.lstps.function(model.option)
#Run the MCMC
Updates <- initiate.update.function.list(model.option)
for(i in 2:ngen) {
x <- sample(c(1:length(Updates)),1)
new.params <- Updates[[x]](last.params)
if(i%%samplefreq == 0){
j <- i/samplefreq
population.coordinates[[j]] <- new.params$population.coordinates
distances[[j]] <- new.params$D
transformed.covariance.list[[j]] <- new.params$transformed.covariance
admix.proportions[,j] <- new.params$admix.proportions
nugget[,j] <- new.params$nugget
a0[j] <- new.params$a0
a1[j] <- new.params$a1
a2[j] <- new.params$a2
LnL_freqs[j] <- new.params$LnL_freqs
Prob[j] <- LnL_freqs[j] +
new.params$prior_prob_admix_proportions +
new.params$prior_prob_nugget +
new.params$prior_prob_alpha0 +
new.params$prior_prob_alpha1 +
new.params$prior_prob_alpha2 +
new.params$prior_prob_admix_target_locations +
new.params$prior_prob_admix_source_locations
accept_rates <- update.sampled.accept.rates(accept_rates,j,new.params)
lstps <- update.sampled.lstps(lstps,j,new.params)
}
diagns <- update.diagns(diagns,i,mixing.diagn.freq,new.params$accept_rates)
if(i%%mixing.diagn.freq == 0){
n <- (i + last.ngen) %/% mixing.diagn.freq
new.params$lstps <- update.all.lstps(new.params$lstps,n,diagns)
}
last.params <- new.params
if(i%%printfreq == 0){
P <- print.mcmc.update(new.params$LnL_freqs,new.params$prior_prob_admix_proportions,
new.params$prior_prob_nugget,new.params$prior_prob_alpha0,new.params$prior_prob_alpha1,
new.params$prior_prob_alpha2,new.params$prior_prob_admix_target_locations,new.params$prior_prob_admix_source_locations)
cat(i," ---- ",P,"\n")
}
if(i%%savefreq == 0){
save(last.params,
LnL_freqs,Prob,distances,population.coordinates,
transformed.covariance.list,admix.proportions,a0,a1,a2,nugget,samplefreq,ngen,
accept_rates,lstps,diagns,
target.spatial.prior.scale,source.spatial.prior.scale,
file=paste(prefix,sprintf("_space_MCMC_output%d.Robj",1),sep=''))
}
}
return(paste("Output",i,"runs to",paste(prefix,"_MCMC_output*.Robj",sep=''),"."))
}
query.MCMC.output <- function(MCMC.output,param.names=NULL,last.param.names=NULL){
if(!is.null(param.names)){
param.list <- vector("list",length=length(param.names))
names(param.list) <- param.names
load(MCMC.output)
for(i in 1:length(param.names)){
param.list[[param.names[i]]] <- get(param.names[i])
}
}
if(!is.null(last.param.names)){
last.param.list <- vector("list",length=length(last.param.names))
names(last.param.list) <- last.param.names
load(MCMC.output)
for(i in 1:length(last.param.names)){
last.param.list[[last.param.names[i]]] <- get(last.param.names[i],last.params)
}
}
if(!is.null(param.names) & !is.null(last.param.names)){
param.list <- c(param.list,last.param.list)
}
if(is.null(param.names) & !is.null(last.param.names)){
param.list <- last.param.list
}
return(param.list)
}
check.model.options <- function(fast,long){
if(fast == "source" & long == "target" |
fast == "target" & long == "source" |
fast == "source_and_target" & long == "target" |
fast == "source_and_target" & long == "source" |
long == "no_movement" & fast != "no_movement"){
stop("the specified fast.model.option is not the same as, or nested within, the long.model.option\n\n")
}
return(invisible(0))
}
fast.run.init.coords <- function(fast.model.option,x.coords,y.coords){
k <- length(x.coords)
if(fast.model.option == "no_movement" |
fast.model.option == "source"){
init.coords <- cbind(c(x.coords,runif(k,min(x.coords),max(x.coords))),
c(y.coords,runif(k,min(y.coords),max(y.coords))))
} else {
init.coords <- cbind(runif(2*k,min(x.coords),max(x.coords)),
runif(2*k,min(y.coords),max(y.coords)))
}
return(init.coords)
}
#' Runs a SpaceMix analysis
#'
#' This function runs a Markov chain Monte Carlo to estimate a geogenetic map of your genotyped samples.
#' A SpaceMix analysis can be run with any of 4 separate models:
#' \enumerate{
#' \item "no_movement" - populations do not choose their own locations, nor can they draw admixture.
#' The only parameters to be estimated are: the alpha parameters of the spatial
#' covariance function and the nugget parameters
#' \item "source" - populations do not choose their own locations, but they do draw admixture.
#' The parameters to be estimated are: the alpha parameters of the spatial
#' covariance function, the nugget parameters, the locations of the sources of admixture,
#' the strength of that admixture.
#' \item "target" - populations choose their own locations, but no admixture.
#' The parameters to be estimated are: the alpha parameters of the spatial
#' covariance function, the nugget parameters, the population locations.
#' \item "source_and_target" - populations choose their own locations AND they draw admixture.
#' The parameters to be estimated are: the alpha parameters of the spatial
#' covariance function, the nugget parameters, the population locations,
#' the locations of the sources of admixture, and the strength of that admixture.
#' }
#'
#' The algorithm proceeds by running a user-specified number of fast initial runs
#' from random locations in parameter space to find a generally good area, then one
#' long run from the final location in parameter space from the best fast run,
#' the results of which are what the user cares about. The user can also choose
#' to run only a single long analysis, for which the initial parameters may be specified.
#' If the user runs one or more fast runs, the \code{fast.model.option} specified must
#' be a model that is the same as, or nested within, the \code{long.model.option}.
#' For example, a user may not specify "source" for the fast runs and "target" for the
#' long runs.
#'
#' @param n.fast.reps The number of short initial runs to perform.
#' @param fast.MCMC.ngen The number of generations to run each initial MCMC analysis.
#' @param fast.model.option The model to be used in the short runs: may be "no_movement","source","target","source_and_target".
#' @param long.model.option The model to be used in the long run: may be "no_movement","source","target","source_and_target".
#' @param data.type The data type to be used. May be "sample.covariance","sample.frequencies","counts".
#' Please see the vignette for a discussion of what these different data elements should look like.
#' @param sample.frequencies Data to be specified if "sample.frequencies" is chosen as data.type.
#' @param mean.sample.sizes Data to be specified if "sample.frequencies" or "sample.covariance" are chosen as data.type.
#' @param counts Data to be specified if "counts" is chosen as data.type.
#' @param sample.sizes Data to be specified if "counts" is chosen as data.type.
#' @param sample.covariance Data to be specified if "sample.covariance" is chosen as data.type.
#' @param target.spatial.prior.scale The variance on the spatial prior on population locations, default is half the pairwise observed distance.
#' @param source.spatial.prior.scale The variance on the spatial prior on sources of admixture, default is twice the pairwise observed distance.
#' @param spatial.prior.X.coordinates 'Observed' sample longitude, or, if you want to examine the influence of the prior, random values.
#' @param spatial.prior.Y.coordinates 'Observed' sample latitude, or, if you want to examine the influence of the prior, random values.
#' @param round.earth Option of whether you want to estimate locations on a plane (round.earth = FALSE) or a sphere (round.earth = TRUE).
#' @param long.run.initial.parameters List of parameter values that can be passed directly to the long run MCMC as initial parameter values.
#' The list should include values for \code{a0}, \code{a1}, \code{a2}, \code{population.coordinates}, \code{admix.proportions},
#' and \code{nugget}, and each element of the list should named for the corresponding parameter
#' (e.g., \code{list("a0" = 1.07, "a1" = 0.5, ...)}).
#' @param k Number of samples.
#' @param loci Number of loci.
#' @param ngen Number of MCMC gnereations for the long MCMC.
#' @param printfreq Frequency with which updates are printed. The updates consist of the current MCMC iteration followed by the posterior probability.
#' @param samplefreq Frequency with which samples are logged from the MCMC (basically the thinning).
#' @param mixing.diagn.freq Frequency of adaptive Metropolis-within-Gibbs updates do the tuning parameters of the proposal distributions.
#' Default value is every 50 MCMC iterations.
#' @param savefreq Frequency with which MCMC_output object is saved.
#' @param directory Directory into which you want output to be saved. If no directory is specified, a random directory name will be generated and
#' that directory will be created.
#' @param prefix Prefix to be attached to all output files.
#'
#' @return This function saves an output R object (".Robj") which contains the results of the analysis.
#' The components of this R object are:
#' \itemize{
#' \item a0 - The posterior distribution on parameter \eqn{\alpha_0}.
#' \item a1 - The posterior distribution on parameter \eqn{\alpha_1}.
#' \item a2 - The posterior distribution on parameter \eqn{\alpha_2}.
#' \item accept_rates - The list of acceptance rates of different parameters over the course of the MCMC.
#' The total number of elements in each element of the list is equal to the number of sampled
#' MCMC iterations (i.e., the total number of generations divided by the sample frequency).
#' \item admix.proportions - The posterior distribution on admixture proportions. This is a matrix
#' in which the \eqn{i}th column is the vector of estimated admixture proportions from the
#' \eqn{i}th sampled generation of the MCMC.
#' \item diagns - The list of acceptance rates for each parameter over the last 50 MCMC iterations.
#' \item distances - The list of pairwise distances between all samples and their sources of admixture over the
#' course of the MCMC. Each element of the list is a pairwise distance matrix of dimension \eqn{2*K} by
#' \eqn{2*K}, where K is the number of samples. The total number of elements in the list is equal to the
#' number of sampled MCMC iterations (i.e., the total number of generations divided by the sample frequency).
#' \item last.params - The list of values passed between each iteration of the MCMC,
#' sampled at the last iteration of the MCMC (i.e., the location in parameter space from the
#' very end of the analysis, along with other quantities passed between parameter update functions).
#' \item LnL_freqs - The vector of likelihood values sampled over the course of the MCMC.
#' \item lstps - A list giving the log of the scale of the tuning parameters, updated via an
#' adaptive MCMC procedure, for each model parameter. The total number of elements in each
#' element of the list is equal to the number of sampled MCMC iterations
#' (i.e., the total number of generations divided by the sample frequency).
#' \item ngen - The user-specified number of generations of the MCMC.
#' \item nugget - The posterior distribution on nugget parameters. This is a matrix
#' in which the \eqn{i}th column is the vector of estimated nuggets from the \eqn{i}th sampled
#' generation of the MCMC.
#' \item population.coordinates - The posterior distribution on sample coordinates in geogenetic space. Each
#' element of the list is a matrix with 2 columns (Eastings and Northings, which correspond to Long and Lat
#' in the geogenetic space and \eqn{2*K} rows, where \eqn{K} is the number of samples in the dataset. The first
#' \eqn{K} rows correspond to the geogenetic coordinates of the samples themselves, and the \eqn{K+1}:\eqn{2*K}
#' rows give the geogenetic coordinates of the source of admixture for each sample.
#' \item Prob - The vector of posterior probability values sampled over the course of the MCMC.
#' \item samplefreq - The number of iterations between each time the MCMC is sampled. A higher frequency (lower \code{samplefreq})
#' result in more sampled iterations per analysis, with a higher autocorrelation between sampled parameter estimates.
#' \item source.spatial.prior.scale - The variance of the prior distribution on admixture source geogenetic locations.
#' \item target.spatial.prior.scale - The variance of the prior distribution on sample geogenetic locations.
#' \item transformed.covariance.list - The posterior distribution of the mean-centered and projected parametric covariance matrix.
#' This is of dimension \eqn{K-1} by \eqn{K-1}, where \eqn{K} is the number of samples.
#' }
#'
#' @examples
#' # load example dataset
#' data(spacemix.example.dataset)
#'
#' # run example analysis
#' run.spacemix.analysis(n.fast.reps = 2,
#' fast.MCMC.ngen = 100,
#' fast.model.option = "source_and_target",
#' long.model.option = "source_and_target",
#' data.type = "counts",
#' sample.frequencies = NULL,
#' mean.sample.sizes = NULL,
#' counts = spacemix.example.dataset$allele.counts,
#' sample.sizes = spacemix.example.dataset$sample.sizes,
#' sample.covariance = NULL,
#' target.spatial.prior.scale = NULL,
#' source.spatial.prior.scale = NULL,
#' spatial.prior.X.coordinates = spacemix.example.dataset$population.coordinates[,1],
#' spatial.prior.Y.coordinates = spacemix.example.dataset$population.coordinates[,2],
#' round.earth = FALSE,
#' long.run.initial.parameters = NULL,
#' k = nrow(spacemix.example.dataset$allele.counts),
#' loci = ncol(spacemix.example.dataset$allele.counts),
#' ngen = 5000,
#' printfreq = 50,
#' samplefreq = 5,
#' mixing.diagn.freq = 50,
#' savefreq = 5000,
#' directory = NULL,
#' prefix = "example_run")
run.spacemix.analysis <- function(n.fast.reps,
fast.MCMC.ngen,
fast.model.option,
long.model.option,
data.type,
sample.frequencies=NULL,
mean.sample.sizes=NULL,
counts=NULL,
sample.sizes=NULL,
sample.covariance=NULL,
target.spatial.prior.scale=NULL,
source.spatial.prior.scale=NULL,
spatial.prior.X.coordinates,
spatial.prior.Y.coordinates,
round.earth,
long.run.initial.parameters=NULL,
k,
loci,
ngen,
printfreq,
samplefreq,
mixing.diagn.freq = 50,
savefreq,
directory=NULL,
prefix="MyRun"){
# recover()
if(n.fast.reps !=0){
check.model.options(fast.model.option,long.model.option)
if(is.null(directory)){
directory <- sprintf("run_%0.0f",1e6*runif(1))
dir.create(directory)
}
setwd(directory)
fast.run.dirs <- unlist(lapply(1:n.fast.reps,FUN=function(i){paste("fast_run_",i,sep="")}))
for(i in 1:n.fast.reps){
dir.create(fast.run.dirs[i])
setwd(fast.run.dirs[i])
initial.population.coordinates <- fast.run.init.coords(fast.model.option,
spatial.prior.X.coordinates,
spatial.prior.Y.coordinates)
initial.parameters <- list("population.coordinates" = initial.population.coordinates)
tryCatch({
MCMC(model.option = fast.model.option,
data.type = data.type,
sample.frequencies = sample.frequencies,
mean.sample.sizes = mean.sample.sizes,
counts = counts,
sample.sizes = sample.sizes,
sample.covariance = sample.covariance,
target.spatial.prior.scale = target.spatial.prior.scale,
source.spatial.prior.scale = source.spatial.prior.scale,
spatial.prior.X.coordinates = spatial.prior.X.coordinates,
spatial.prior.Y.coordinates = spatial.prior.Y.coordinates,
round.earth = round.earth,
initial.parameters = initial.parameters,
k = k,
loci = loci,
ngen = fast.MCMC.ngen,
printfreq = 1e3,
samplefreq = fast.MCMC.ngen/1e3,
mixing.diagn.freq = mixing.diagn.freq,
savefreq = fast.MCMC.ngen/2,
prefix=paste(fast.run.dirs[i],"_",sep=""))
},error=function(e){
cat("fast run",i,"failed","\n") ;
cat(paste(e,"\n",sep="")) ;
file.rename(
paste("../",fast.run.dirs[i],sep=""),
paste("../","failed_",fast.run.dirs[i],sep="")) ;
save.image(file="spacemix.dump.RData")
})
setwd("..")
}
last.probs <- c(rep(-Inf,n.fast.reps))
for(i in 1:n.fast.reps){
tryCatch({
setwd(fast.run.dirs[i])
prob.list <- query.MCMC.output(list.files()[grep("output",list.files())],param.names="Prob")
last.probs[i] <- prob.list$Prob[length(which(prob.list$Prob!=0))]
setwd("..")
},error=function(e){cat("skipping",fast.run.dirs[i],"because it failed","\n")})
}
file.rename(fast.run.dirs[which.max(last.probs)],paste(fast.run.dirs[which.max(last.probs)],"_BestRun",sep=""))
setwd(list.files()[grep("BestRun",list.files())])
fast.run.initial.parameters <- query.MCMC.output(list.files()[grep("output",list.files())],last.param.names=c("a0","a1","a2","population.coordinates","nugget","admix.proportions"))
setwd("..")
dir.create(paste(prefix,"LongRun",sep="_"))
setwd(list.files()[grep("LongRun",list.files())])
} else {
fast.run.initial.parameters <- long.run.initial.parameters
}
MCMC(model.option = long.model.option,
data.type = data.type,
sample.frequencies = sample.frequencies,
mean.sample.sizes = mean.sample.sizes,
counts = counts,
sample.sizes = sample.sizes,
sample.covariance = sample.covariance,
target.spatial.prior.scale = target.spatial.prior.scale,
source.spatial.prior.scale = source.spatial.prior.scale,
spatial.prior.X.coordinates = spatial.prior.X.coordinates,
spatial.prior.Y.coordinates = spatial.prior.Y.coordinates,
initial.parameters = fast.run.initial.parameters,
round.earth = round.earth,
k = k,
loci = loci,
ngen = ngen,
printfreq = printfreq,
samplefreq = samplefreq,
mixing.diagn.freq = mixing.diagn.freq,
savefreq = savefreq,
prefix=prefix)
setwd("..")
return("analysis completed.")
}
#' Performs a Procrustes superimposition
#'
#' This function performs a full Procrustes superimposition of the
#' geogenetic locations estimated with SpaceMix onto a reference set
#' of observed geographic sampling coordinates. This superimposition
#' consists of a rotation, reflection, scaling, and translation of the
#' estimated locations so that they most closely match the reference
#' coordinates. This function can also be used to transform the
#' estimated sources of admixture along with the geogenetic locations,
#' so that both are in the same frame of reference.
#'
#' @param X A k x 2 matrix of the reference coordinates.
#' @param Y A k x 2 matrix of the coordinates to be transformed.
#' @param k The number of samples. This should be \code{nrow(X)}.
#' @param admix.source.locs The k x 2 matrix of estimated sources
#' of admixture. This may be left \code{NULL} if the geogenetic
#' locations are being transformed.
#' @param option This argument, which takes the values 1 or 2,
#' specifies whether the function returns the transformed
#' geogenetic locations (1) or transformed sources of admixture (2).
#'
#' @return This function returns a k x 2 matrix of transformed coordinates,
#' either those of the geogenetic locations (if option 1 is specified),
#' or those of the sources of admixture (if option 2 is specified).
#'
#' @examples
#'
#' # load example location data
#' data(spacemix.location.data)
#'
#' # perform Procrustes superimposition of
#' # geogenetic locations onto sampling coordinates
#' proc.geogen.coords <- spacemix.procrustes(X = spacemix.location.data$sample.coords,
#' Y = spacemix.location.data$geogen.coords,
#' k = nrow(spacemix.location.data$sample.coords),
#' admix.source.locs = NULL,
#' option = 1)
#' # perform Procrustes superimposition of
#' # admixture locations into same frame of
#' # reference as the superimposition of the
#' # geogenetic locations onto the sampling coordinates
#' proc.admix.coords <- spacemix.procrustes(X = spacemix.location.data$sample.coords,
#' Y = spacemix.location.data$geogen.coords,
#' k = nrow(spacemix.location.data$sample.coords),
#' admix.source.locs = spacemix.location.data$admix.coords,
#' option = 2)
#' par(mfrow=c(1,3))
#' plot(spacemix.location.data$sample.coords,
#' pch=20,xlab='',ylab='',main="Sample coordinates",
#' col=spacemix.location.data$pop.colors)
#' plot(spacemix.location.data$geogen.coords,
#' pch=20,xlab='',ylab='',main="Raw geogenetic and admixture coordinates",
#' col=spacemix.location.data$pop.colors)
#' points(spacemix.location.data$admix.coords,
#' col=spacemix.location.data$admix.colors,pch=8)
#' plot(proc.geogen.coords,pch=20,xlab='',ylab='',
#' main="Procrustes superimposition\n of geogenetic and admixture coordinates",
#' col=spacemix.location.data$pop.colors)
#' points(proc.admix.coords,col=spacemix.location.data$admix.colors,pch=8)
spacemix.procrustes <- function(X,Y,k,admix.source.locs = NULL,option){
if(is.null(option)){
stop("\nYou must specify an option of 1 or 2\n")
}
if(k != nrow(X) | k != nrow(Y) | nrow(X) != nrow(Y)){
stop("\nYou have mis-specified the dimensions of the matrices\n")
}
proc.obj <- shapes::procOPA(X,Y,scale=TRUE,reflect=TRUE)
rot.scal.locs <- proc.obj$s * Y %*% proc.obj$R
translation.matrix <- matrix(colMeans(X) - colMeans(rot.scal.locs),nrow=k,ncol=2,byrow=TRUE)
if(option==1){
proc.locs <- rot.scal.locs + translation.matrix
} else if(option==2){
if(is.null(admix.source.locs)){
stop("\nYou must specify admixture source locations for option 2.\n")
}
proc.locs <- proc.obj$s * admix.source.locs %*% proc.obj$R + translation.matrix
}
return(proc.locs)
}
get.procrustes.locations.posterior.list <- function(geographic.locations,population.coordinates.posterior){
if(any(is.null(population.coordinates.posterior))){
sampled.gen <- min(which(unlist(lapply(population.coordinates.posterior,is.null))))
} else {
sampled.gen <- length(population.coordinates.posterior)
}
geogen.coords.list <- vector(mode="list",length = sampled.gen-1)
admix.source.coords.list <- vector(mode="list",length = sampled.gen-1)
k <- nrow(geographic.locations)
for(i in 1:length(geogen.coords.list)){
geogen.coords.list[[i]] <- spacemix.procrustes(X = geographic.locations,
Y = population.coordinates.posterior[[i]][1:k,],
k = k,
option = 1)
admix.source.coords.list[[i]] <- spacemix.procrustes(X = geographic.locations,
Y = population.coordinates.posterior[[i]][1:k,],
k = k,
admix.source.locs = population.coordinates.posterior[[i]][(k+1):(2*k),],
option = 2)
}
return(list(geogen.coords.list = geogen.coords.list, admix.source.coords.list = admix.source.coords.list))
}
#' Fades a sample's color
#'
#' This function fades a sample's color in proportion to its admixture proportion.
#' A sample with a smaller admixture proportion is represented by a fainter color.
#' When visualizing SpaceMix's output, it can be helpful to visualize the amount
#' of admixture a sample is drawing by having samples that draw small amounts of
#' admixture show up more faintly in figures.
#'
#' @param pop.cols This is a vector of colors of length k in which each element
#' gives the color in which the corresponding sample should be plotted.
#' @param admix.proportions This is a vector of length k in which each element
#' is the estimated admixture proportion in the corresponding sample.
#'
#' @return This function returns a vector of faded colors, for which the
#' extent of fading is determined by the admixture proportion.
#'
#' @examples
#'
#' #generate example admixture proportions
#' admix.values <- seq(0.5,0,length.out=100)
#' # make plotting colors
#' admix.cols <- fade.admixture.source.points(rep("red",100),seq(1,0,length.out=100))
#' plot(admix.values,ylab="admix proportion",pch=19,col=admix.cols)
fade.admixture.source.points <- function(pop.cols,admix.proportions){
faded.colors <- numeric(length(pop.cols))
for(i in 1:length(pop.cols)){
faded.colors[i] <- adjustcolor(pop.cols[i],admix.proportions[i])
}
return(faded.colors)
}
#' Plots admixture arrows
#'
#' This function plots the admixture arrows, from the sources of admixture to their
#' targets, on the geogenetic map estimated as part of a SpaceMix analysis.
#' The width and opacity of these arrows can be proportional to their admixture
#' proportions, which provides an intuitive visualization of the amount of admixture
#' each sample is drawing.
#'
#' @param admix.source.coords This is a k x 2 matrix of coordinates in which
#' the two elements of the ith row give the x-coordinate and y-coordinate
#' of the source admixture for sample i.
#' @param geogen.coords This is a k x 2 matrix of coordinates in which
#' the two elements of the ith row give the x-coordinate and y-coordinate
#' of the geogenetic location of the ith sample.
#' @param admix.proportions This is a vector of length k in which each element
#' is the estimated admixture proportion in the corresponding sample.
#' @param colors This is a vector of colors of length k in which each element
#' gives the color in which the corresponding sample should be plotted.
#' @param length This value determines the length of the edges of the arrow head
#' (in inches). The default value is 0.1.
#'
#' @return This function plots the admixture arrows on an existing plotting window.
#' Its return value is invisible.
plot.admix.arrows <- function(admix.source.coords,geogen.coords,admix.proportions,colors=NULL,length=0.1){
if(is.null(colors)){
colors <- rep("black",nrow(admix.source.coords))
}
arrows(x0 = admix.source.coords[,1],
y0 = admix.source.coords[,2],
x1 = geogen.coords[,1],
y1 = geogen.coords[,2],
lwd = admix.proportions,
col = colors,
length = length)
return(invisible("arrows!"))
}
load_MCMC_output <- function(MCMC.output.file){
tmpenv <- environment()
tmp <- load(MCMC.output.file,envir=tmpenv)
mcmc.output <- lapply(tmp,get,envir=tmpenv)
names(mcmc.output) <- tmp
return(mcmc.output)
}
get.posterior.location.matrix.from.list <- function(posterior.list,population.index){
post.location.matrix <- matrix(unlist(
lapply(posterior.list,
FUN=function(elem){elem[population.index,]})),
nrow=length(posterior.list),ncol=2,byrow=TRUE)
return(post.location.matrix)
}
get.credible.ellipse <- function(posterior.points,quantile){
if(sum(colSums(abs(apply(posterior.points,2,diff)))) == 0){
ellipse_boundary <- posterior.points[1,,drop=FALSE]
} else {
fit <- MASS::cov.mve(posterior.points, quantile.used = nrow(posterior.points) * quantile)
points_in_ellipse <- posterior.points[fit$best,]
ellipse_boundary <- stats::predict(cluster::ellipsoidhull(points_in_ellipse))
}
return(ellipse_boundary)
}
plot.credible.ellipse <- function(ellipse_boundary,population.color,fading=0.3,lty=1){
if(nrow(ellipse_boundary) == 1){
points(ellipse_boundary,col=population.color,pch=19)
} else {
polygon(ellipse_boundary,col=adjustcolor(population.color,fading),border=1,lty=lty)
}
}
#' Post-processes SpaceMix output for plotting
#'
#' This function post-processes the output of a SpaceMix analysis to create
#' a list object that can be used or reference later for easy visualization
#' of SpaceMix results.
#'
#' @param MCMC.output.file This is the full path, given as a character argument
#' (i.e., in quotes), of the MCMC_output R object generated by a SpaceMix
#' analysis.
#' @param geographic.locations This is a k x 2 matrix in which the ith row
#' gives the geographic coordinates (i.e., longitude and latitude) of
#' the ith sample.
#' @param name.vector This is a character vector of length k in which each
#' element gives the name of the corresponding sample.
#' @param color.vector This is a vector of colors of length k in which each element
#' gives the color in which the corresponding sample should be plotted.
#' @param quantile This value determines the size of the credible interval
#' calculated for model parameters. A value of 0.95 denotes the 95\% credible
#' interval.
#' @param burnin This value determines how many sampled iterations of the MCMC to discard
#' as `burn-in.' A value of 200 means that the first 200 sampled iterations of the MCMC
#' will be discarded when calculating credible intervals.
#'
#' @return This function returns a list that can be used for plotting and visualization
#' of SpaceMix output. The components of this list are:
#' \itemize{
#' \item MCMC.output - This is a list of the output of the SpaceMix analysis,
#' containing all the elements of the output .Robj file.
#' \item geographic.locations - This is a k x 2 matrix in which the ith row
#' gives the geographic coordinates (i.e., longitude and latitude) of
#' the ith sample.
#' \item name.vector - This is a character vector of length k in which each
#' element gives the name of the corresponding sample.
#' \item color.vector - This is a vector of colors of length k in which each element
#' gives the color in which the corresponding sample should be plotted.
#' \item quantile - This value determines the size of the credible interval
#' calculated for model parameters.
#' \item best.iter - This is the index of the sampled MCMC iteration with the largest
#' posterior probability. We refer to parameter estimates in that iteration as
#' the maximum a posteriori (MAP) estimates.
#' \item admix.source.color.vector - This is a vector of faded colors (the same as given
#' in \code{color.vector}), for which the extent of fading is determined by the
#' admixture proportion. These colors, for which the opacity is proportional
#' to the estimated admixture proportion, are used in plotting the admixture
#' sources and admixture arrows.
#' \item k - This is the number of samples in the analysis.
#' \item MAPP.geogen.coords - This is the Procrustes-transformed MAP geogenetic location
#' coordinates.
#' \item MAPP.admix.source.coords - This is the Procrustes-transformed MAP admixture source
#' location coordinates.
#' \item procrustes.coord.posterior.lists - This is a list of the Procrustes-transformed
#' location parameter coordinates.
#' \itemize{
#' \item geogen.coords.list - A list of length N, where I is the number of sampled
#' MCMC iterations. The ith element of the list contains the Procrustes-
#' transformed geogenetic location coordinates in the ith sampled iteration
#' of the MCMC. As a whole, this list represents the posterior distribution
#' of geogenetic location parameters for all samples.
#' \item admix.source.coords.list - A list of length N, where I is the number of sampled
#' MCMC iterations. The ith element of the list contains the Procrustes-
#' transformed admixture source location coordinates in the ith sampled iteration
#' of the MCMC. As a whole, this list represents the posterior distribution
#' of admixture source location parameters for all samples.
#' }
#' \item pp.geogen.location.matrices - A list of length k in which the ith element is the Procrustes-
#' transformed posterior distribution of geogenetic location coordinates for the ith sample.
#' \item pp.admix.source.location.matrices - A list of length k in which the ith element is the Procrustes-
#' transformed posterior distribution of admixture source location coordinates for the ith sample.
#' \item pp.geogen.ellipses - A list of length k in which the ith element gives the boundaries of the
#' 95\% credible ellipse of the Procrustes-transformed posterior distribution of geogenetic
#' location coordinates of the ith sample.
#' \item pp.admix.source.ellipses - A list of length k in which the ith element gives the boundaries of the
#' 95\% credible ellipse of the Procrustes-transformed posterior distribution of admixture source
#' location coordinates of the ith sample.
#' }
make.spacemix.map.list <- function(MCMC.output.file,geographic.locations,name.vector,color.vector,quantile=0.95,burnin=0){
MCMC.output <- load_MCMC_output(MCMC.output.file)
x <- seq((burnin+1),length(MCMC.output$Prob),by=1)
best.iter <- which.max(MCMC.output$Prob[x])
if(max(MCMC.output$Prob) > max(MCMC.output$Prob[x])){
message("warning: the MCMC iteration with the\n
highest posterior probability is being\n
discarded as burn-in")
}
k <- MCMC.output$last.params$k
if(is.null(color.vector)){
color.vector <- rep(1,k)
}
admix.source.color.vector <- fade.admixture.source.points(color.vector,rowMeans(MCMC.output$admix.proportions[,x]))
MAPP.geogen.coords <- spacemix.procrustes(X = geographic.locations,
Y = MCMC.output$population.coordinates[[best.iter]][1:k,],
k = k,option = 1)
MAPP.admix.source.coords <- spacemix.procrustes(X = geographic.locations,
Y = MCMC.output$population.coordinates[[best.iter]][1:k,],
k = k,
admix.source.locs = MCMC.output$population.coordinates[[best.iter]][(k+1):(2*k),],
option=2)
procrustes.coord.posterior.lists <- get.procrustes.locations.posterior.list(geographic.locations = geographic.locations,
population.coordinates.posterior = MCMC.output$population.coordinates[x])
pp.geogen.location.matrices <- lapply(1:k,get.posterior.location.matrix.from.list,posterior.list=procrustes.coord.posterior.lists$geogen.coords.list)
pp.admix.source.location.matrices <- lapply(1:k,get.posterior.location.matrix.from.list,posterior.list=procrustes.coord.posterior.lists$admix.source.coords.list)
pp.geogen.ellipses <- lapply(pp.geogen.location.matrices,get.credible.ellipse,quantile)
pp.admix.source.ellipses <- lapply(pp.admix.source.location.matrices,get.credible.ellipse,quantile)
spacemix.map.list <- c(list(MCMC.output=MCMC.output),
list(geographic.locations=geographic.locations),
list(name.vector=name.vector),list(color.vector=color.vector),
list(quantile=quantile),list(best.iter = best.iter),
list(admix.source.color.vector = admix.source.color.vector),
list(k = k),list(MAPP.geogen.coords = MAPP.geogen.coords),
list(MAPP.admix.source.coords = MAPP.admix.source.coords),
list(procrustes.coord.posterior.lists = procrustes.coord.posterior.lists),
list(pp.geogen.location.matrices = pp.geogen.location.matrices),
list(pp.admix.source.location.matrices = pp.admix.source.location.matrices),
list(pp.geogen.ellipses = pp.geogen.ellipses),
list(pp.admix.source.ellipses = pp.admix.source.ellipses))
return(spacemix.map.list)
}
#' Plots the output of a SpaceMix analysis
#'
#' This function plots the output of a SpaceMix analysis. Users can specify
#' whether they wish to also visualize the names of the samples, the
#' credible ellipses of estimated sample geogenetic and admixture source
#' location coordinates, as well as whether they wish to visualize the
#' sources of admixture at all.
#'
#' @param spacemix.map.list This is the name of the list produced using the function
#' \code{\link{make.spacemix.map.list}}. This list should already be loaded into
#' R's working memory.
#' @param text This option (\code{TRUE} or \code{FALSE}) specifies whether sample
#' names should be printed on the plotted figure at the locations of the MAP
#' parameter estimates for each sample's geogenetic location and that of its
#' admixture source location.
#' @param ellipses This option (\code{TRUE} or \code{FALSE}) specifies whether
#' bivariate credible interval ellipses should be plotted for sample geogenetic
#' locations and admixture source locations.
#' @param source.option This option (\code{TRUE} or \code{FALSE}) specifies whether
#' the sources of admixture for each sample will be plotted, with accompanying
#' admixture arrows.
#' @param xlim This vector of length 2 gives the minimum and maximum x-value of the
#' plotting window. Default is NULL, in which case the \code{xlim} used will
#' be the minimum and maximum credible ellipse x-coordinates.
#' @param ylim This vector of length 2 gives the minimum and maximum y-value of the
#' plotting window. Default is NULL, in which case the \code{xlim} used will
#' be the minimum and maximum credible ellipse y-coordinates.
#'
#' @return This function plots the output of a SpaceMix analysis. If specified,
#' sample names are plotted at the MAP estimates of their geogenetic and admixture
#' source locations. Ellipses with solid margins denote credible intervals on
#' the posterior distributions of sample geogenetic locations, while those with dashed
#' margins denote credible intervals on the posterior distributions of sample admixture
#' source locations. The \code{source.option} specifies whether admixture source
#' locations are drawn, as well as whether admixture arrows are drawn. Admixture
#' arrows are drawn from a source of admixture to its target. The opacity of the
#' admixture arrows (and credible ellipses on admixture source locations) is proportional
#' to the amount of admixture drawn (i.e., samples with more admixture have darker arrows
#' and ellipses). The return value of this function is invisible.
#'
#' @examples
#'
#' #load example spacemix.map.list object
#' data(example.spacemix.map.list)
#'
#' #make SpaceMix output map
#' # without plotting admixture sources, and with sample names plotted
#' make.spacemix.map(example.spacemix.map.list,source=FALSE,text=TRUE)
#'
make.spacemix.map <- function(spacemix.map.list,text=FALSE,ellipses=TRUE,source.option=TRUE,xlim=NULL,ylim=NULL){
with(spacemix.map.list,{
plot(MAPP.geogen.coords,type='n',xlim=xlim,ylim=ylim,xlab="",ylab="")
if(ellipses){
lapply(1:k,FUN=function(i){plot.credible.ellipse(pp.geogen.ellipses[[i]],color.vector[i])})
}
if(text){
text(MAPP.geogen.coords,col=color.vector,font=2,labels=name.vector,cex=0.7)
}
if(source.option){
if(ellipses){
lapply(1:k,FUN=function(i){plot.credible.ellipse(pp.admix.source.ellipses[[i]],admix.source.color.vector[i],fading=1,lty=2)})
}
text(MAPP.admix.source.coords,col= admix.source.color.vector,font=3,labels=name.vector,cex=0.7)
plot.admix.arrows(MAPP.admix.source.coords, MAPP.geogen.coords,
admix.proportions=MCMC.output$admix.proportions[,best.iter],
colors=admix.source.color.vector,length=0.1)
}
box(lwd=2)
})
return(invisible("spacemix map!"))
}
#' Highlights specific samples in a geogenetic map
#'
#' This function highlights the geogenetic locations and admixture
#' source locations for a subset of the samples in a geogenetic map
#' generated from a SpaceMix analysis. Users can specify the names of the
#' samples to be highlighted, as well as whether they wish to visualize the
#' sources of admixture for those samples. This function should be called
#' after a a geogenetic map has already been plotted using
#' \code{\link{make.spacemix.map}}
#'
#' @param focal.pops This is a character vector specifying the names of the
#' samples to be highlighted on the existing SpaceMix map.
#' @param spacemix.map.list This is the name of the list produced using the function
#' \code{\link{make.spacemix.map.list}}. This list should already be loaded into
#' R's working memory.
#' @param ellipses This option (\code{TRUE} or \code{FALSE}) specifies whether
#' bivariate credible interval ellipses should be plotted for sample geogenetic
#' locations and admixture source locations.
#' @param source.option This option (\code{TRUE} or \code{FALSE}) specifies whether
#' the sources of admixture for each sample will be plotted, with accompanying
#' admixture arrows.
#'
#' @return This function highlights the geogenetic locations and admixture source locations
#' for a specified subset of samples. The return value of this function is invisible.
#'
#' @examples
#'
#' #load example spacemix.map.list object
#' data(example.spacemix.map.list)
#'
#' #make SpaceMix output map
#' # without plotting admixture sources, and without sample names plotted
#' make.spacemix.map(example.spacemix.map.list,source=FALSE,text=FALSE)
#'
#' #highlight two samples, without their admixture sources
#' query.spacemix.map(focal.pops=c("Sample_9","Sample_23"),
#' spacemix.map.list = example.spacemix.map.list,
#' ellipses=TRUE,source.option=FALSE)
query.spacemix.map <- function(focal.pops,spacemix.map.list,ellipses=TRUE,source.option=TRUE){
with(spacemix.map.list,{
# browser()
focal.indices <- match(focal.pops,name.vector)
if(ellipses){
for(i in 1:length(focal.indices)){
plot.credible.ellipse(pp.geogen.ellipses[[focal.indices[i]]],color.vector[focal.indices[i]],fading=1)
}
}
if(source.option){
if(ellipses){
for(i in 1:length(focal.indices)){
plot.credible.ellipse(pp.admix.source.ellipses[[focal.indices[i]]], admix.source.color.vector[focal.indices[i]],fading=1,lty=2)
}
}
text(MAPP.admix.source.coords[focal.indices,,drop=FALSE],col=1,font=3,labels=name.vector[focal.indices])
arrows( x0 = MAPP.admix.source.coords[focal.indices,1],
y0 = MAPP.admix.source.coords[focal.indices,2],
x1 = MAPP.geogen.coords[focal.indices,1],
y1 = MAPP.geogen.coords[focal.indices,2],
col= admix.source.color.vector[focal.indices],
lwd=1,
length=0.1)
}
text(MAPP.geogen.coords[focal.indices,,drop=FALSE],col=1,font=2,labels=name.vector[focal.indices],cex=1)
box(lwd=2)
})
return(invisible("highlighted samples!"))
}
#' Converts allele count and sample size data to a normalized
#' allele frequency covariance matrix
#'
#' This function generates a normalized covariance matrix from
#' allele count and sample size data. The output can be used
#' to run a SpaceMix analysis using the \code{run.spacemix.analysis}
#' function. Running a SpaceMix analysis on this normalized covariance
#' matrix (rather than on raw allele count and sample size data, or
#' on sample allele frequency data) is recommended when there are many
#' loci for which there are missing data, as its use results in pairwise,
#' rather than listwise, deletion of missing data.
#'
#' The normalization is used to standardize the variance in
#' allele frequencies across loci, and is performed by dividing
#' the allele frequencies at a locus by
#' \eqn{\sqrt{\bar{f}(1-\bar{f})}} where \eqn{\bar{f}}
#' is the average of the \eqn{K} sample allele frequencies,
#' weighted by mean population size. That is,
#' \deqn{\bar{f}_{\ell} = \frac{1}{\sum_K S_{k,\ell}}
#' \sum_K \hat{f}_{k,\ell} S_{k,\ell}}.
#'
#' @param counts A matrix of allele counts for which each row corresponds to a
#' sample and each column corresponds to a locus. The \eqn{ij}th entry gives the number
#' of times the counted allele at the \eqn{j}th locus is observed in the \eqn{i}th sample.
#' Please see the vignette for a discussion of what the allele count data matrix
#' should look like.
#
#' @param sample.sizes A matrix of sample sizes for which each row corresponds to a
#' sample and each column corresponds to a locus. The \eqn{ij}th entry gives the number
#' of chromosomes genotyped in the \eqn{i}th sample at the \eqn{j}th locus.
#'
#' @return This function returns a list with three components:
#' \itemize{
#' \item norm.sample.covariance - normalized sample allele frequency covariance matrix.
#' \item mean.sample.sizes - The vector of length \eqn{K} giving the mean sample size
#' across loci in each of the \eqn{K} samples.
#' \item loci - The number of loci in the dataset.
#' }
#'
#' @examples
#' # load example dataset
#' data(spacemix.example.dataset)
#'
#' # generate normalized sample allele frequency covariance
#' norm.samp.cov <- get.nrmlzd.sample.covariance(counts = spacemix.example.dataset$allele.counts,
#' sample.sizes = spacemix.example.dataset$sample.sizes)
#'
#' # visualize results
#' image(norm.samp.cov$norm.sample.covariance,
#' main="Normalized sample allele frequency covariance",
#' xaxt='n',yaxt='n',xlab="Population",ylab="Population")
#' axis(side=1,c(0:29)/29,labels=c(paste(1:30)))
#' axis(side=2,c(0:29)/29,labels=c(paste(1:30)),las=2)
get.nrmlzd.sample.covariance <- function(counts,sample.sizes){
sample.frequencies <- counts/sample.sizes
mean.sample.sizes <- apply(sample.sizes,1,function(x){sum(x)/length(which(x!=0))})
mean.sample.frequencies <- matrix(apply(sample.frequencies,2,
get.weighted.mean.frequency,
mean.sample.sizes= mean.sample.sizes),
nrow=length(mean.sample.sizes),ncol=ncol(sample.frequencies),byrow=TRUE)
normalized.sample.frequencies <- sample.frequencies/sqrt(mean.sample.frequencies*(1-mean.sample.frequencies))
sample.covariance <- cov(t(normalized.sample.frequencies),use="pairwise.complete.obs")
loci <- ncol(sample.frequencies)
return(list("norm.sample.covariance" = sample.covariance,
"mean.sample.sizes" = mean.sample.sizes,
"loci" = loci))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.