# Note that this function only works by printing to a pdf. To print to screen, comment out
# the first line and execute in parts, noting that dev.off() is only activated if an existing
# pdf is open. Moreover, some functions might throw an error due to poor mixing, hence are
# wrapped in try().
visualise_results <- function(results, # list; results saved in .RData file generated by mcmc_sampling
data_summary, # list; data .RData file
PDF, # logical; PDF = TRUE to plot PDF
child, # logical; child = TRUE if results are generated by mcmc_sampling_child
augment_missing_data, # logical; if true, missing entries are inferred (only if prevalence data or the log likelihood is set to zero (QC check), in which case, always TRUE)
Simulated) # logical; true if data are simulated
{
#======================================================================================================
# Pdf and plotting (comment out if plotting manually)
if(class(PDF) == 'character'){pdf(PDF)}else{stop('Need to plot by hand')}
#======================================================================================================
#======================================================================================================
# Install packages if not already
installed_packages <- rownames(installed.packages())
desired_packages <- c('MCMCpack',# For correlation plots
'car', # for conversion between an array and list
'plyr', # For text plot and confidence intervals
'gplots', # For moi and haplotype count plots
'lattice', # To generate mcmcAs
'vioplot', # For posterior frequency plots
'abind',
'grDevices')
uninstalled_desired_packages <- desired_packages[!desired_packages %in% installed_packages]
if(length(uninstalled_desired_packages) > 0){
install.packages(uninstalled_desired_packages)
}
#======================================================================================================
#======================================================================================================
# Load packages if not already
loaded_packages <- search()
unloaded_desired_packages <- desired_packages[!paste('package:', desired_packages, sep = '') %in% loaded_packages]
if(length(unloaded_desired_packages) > 0){
for(i in 1:length(unloaded_desired_packages)){
require(unloaded_desired_packages[i], character.only = TRUE)
}
}
#======================================================================================================
#======================================================================================================
# Extract meta data
haplotypes <- processed_data_list$comp_genotypes_character
no_haplotypes <- processed_data_list$no_haplotypes
no_markers <- processed_data_list$no_marker_loci
no_total <- processed_data_list$no_total
#======================================================================================================
#======================================================================================================
# Burnin and pdf thinning (for managable pdf size)
burnin <- 1:(0.5*mcmc_variable_list$no_traces_preburnin) # remove first half
plot_thin <- 10
plot_thin_preburnin <- seq(1, mcmc_variable_list$no_traces_preburnin, plot_thin) # To further thin traces that include burnin
plot_thin_postburnin <- seq(1, mcmc_variable_list$no_traces_preburnin-max(burnin), plot_thin) # To further thin traces that don't include burnin
plot_thin_preburnin_noburnin <- seq((max(burnin)+1), mcmc_variable_list$no_traces_preburnin, plot_thin)
#======================================================================================================
#========================================================================================================
# Generate posteriors of interest
if(mcmc_variable_list$no_mcmc_chains > 1){
alply_genotype_freq_store_chains_burnin <- alply(results$genotype_freq_store_chains[-burnin,,],3)
}else{
alply_genotype_freq_store_chains_burnin <- results$genotype_freq_store_chains[-burnin,,]
}
mcmc_frequency_chains <- mcmc.list(lapply(alply_genotype_freq_store_chains_burnin, # 3 for splitting by third dimension (nothing to do with no. of chains)
mcmc,
start = (max(burnin)+1)*mcmc_variable_list$thinning_interval,
end = mcmc_variable_list$no_traces_preburnin*mcmc_variable_list$thinning_interval,
thin = mcmc_variable_list$thinning_interval))
mcmc_As <- abind(alply(results$genotype_count_store_chains[-burnin,,,],4), along = 1) # Haplotype counts for all chains excluding burn in
mcmc_mois <- apply(mcmc_As,c(1,2),sum)
#========================================================================================================
#===========================================================================================
# Summary of run variables for text summary (might want to add some more)
raw_data_plus_latent <- data_summary$Data[,1:no_markers,drop=FALSE]
ind_partial_initial <- apply(raw_data_plus_latent,1,function(X){99%in%X})
if(child){
no_children_TempVar <- strsplit(rownames(raw_data_plus_latent), ':')
childID <- unique(matrix(unlist(no_children_TempVar), ncol = 2, byrow = TRUE)[,1])
no_children <- length(childID)
} else {
no_children <- no_total
}
Variables <- as.matrix(c(no_markers, no_children, no_total, sum(apply(raw_data_plus_latent,1,function(X){99%in%X})),
mcmc_variable_list$thinning_interval * mcmc_variable_list$no_traces_preburnin,
mcmc_variable_list$thinning_interval, max(burnin)*mcmc_variable_list$thinning_interval))
rownames(Variables) <- c('No. marker loci','No. children',
'No. blood samples','No. partial observations',
'No. mcmc iterations overall','Thinning interval','Burn in overall'); colnames(Variables)<-'Value'
Data_to_print <- as.matrix(table(apply(raw_data_plus_latent,1,paste,sep=' ',collapse=' '))) # Summary of data
colnames(Data_to_print)<-'No of observations'
EffSize_pi<-as.matrix(round(effectiveSize(mcmc_frequency_chains),0),ncol=1) # Effective size of the sample (after burnin)
colnames(EffSize_pi) <- 'Effective posterior sample size'
#===========================================================================================
#=====================================================================================================
# Extract the simulated samples of freq and moi (simulated haploytype counts not used in plots)
if(Simulated){
pi_simulated_sample <- data_summary$Sample_haplotype_frequencies[,haplotypes ]
if(augment_missing_data){
moi_simulated_sample = data_summary$Data[,'MOI']
} else {
moi_simulated_sample = data_summary$Data[!ind_partial_initial,'MOI']}
} else {
moi_simulated_sample <- rep(Inf, no_total)
}
#=====================================================================================================
#=====================================================================================================
# Values for plotting mois and genotype counts and chain cols
randomly_selected_bloodsamples <- sample(no_total, size = min(no_total, 16))
mini_title_names <- paste(dimnames(mcmc_As)[[2]],apply(raw_data_plus_latent, 1, paste, sep = ' ', collapse=' '), sep=', ')
chain_cols <- rainbow(mcmc_variable_list$no_mcmc_chains, alpha = 0.5)
#=====================================================================================================
#===========================================================================================
# Print data summary and effective size to the screen/file
par(mfcol=c(2,2), mar = c(5,4,1,1))
textplot(Variables)
textplot(as.matrix(Data_to_print))
textplot(EffSize_pi)
par(mfrow = c(1,1)) # reset
#===========================================================================================
#========================================================================================================
# Classic diagnostic trace plots (burnin removed hence start and end arguments passed to mcmc function via lapply - see xlab of trace plots)
plot(mcmc_frequency_chains, ask = FALSE, bty = 'n', col = chain_cols, smooth = FALSE)
try(gelman.plot(mcmc_frequency_chains, transform = FALSE, ask = FALSE, bty = 'n')) # Plot of gelman diagnostic
autocorr.plot(mcmc_frequency_chains,lag.max=50, ask = FALSE, bty = 'n')
#========================================================================================================
#========================================================================================================
# Plot true values against estimate (having excluded burnin)
par(mfrow = c(1,1))
if(Simulated){ # If simulated data
pi_med <- summary(mcmc_frequency_chains)$quantiles[,3]
CI_lower<-summary(mcmc_frequency_chains)$quantiles[,1]
CI_upper<-summary(mcmc_frequency_chains)$quantiles[,5]
options(warn = -1) # to supress warnings
plotCI(x = data_summary$Sample_haplotype_frequencies[,haplotypes],
y = pi_med[haplotypes],ui=CI_upper[haplotypes],li=CI_lower[haplotypes],
xlab='Sample haplotype frequency',ylab='Estimated haplotype frequency',
xlim=c(0,1),ylim=c(0,1), gap = 0,
barcol='grey',pch=19,pt.bg=par("bg"),cex = 0.3,
bty = 'n')
lines(seq(0,1,0.1),seq(0,1,0.1))
}
#========================================================================================================
#========================================================================================================
# Violin plot
# Generate samples from the dirichlet prior for overlaying when mcmc_variable_list$log_like_zero is true
priorsamples <- rdirichlet(1000, frequency_list$frequency_hyperparameter) # frequency_hyperparameter
colnames(priorsamples) <- haplotypes
plot(NULL, xlim=c(0.5,no_haplotypes + 0.5), ylim=c(0,1.05), xaxt = 'n', yaxt = 'n', xlab = 'Haplotypes', ylab = 'Frequency', cex.lab = 1.2)
for(chain in 1:mcmc_variable_list$no_mcmc_chains){
for(haplotype in haplotypes){
if(mcmc_variable_list$log_like_zero){
if(chain ==1){
vioplot(priorsamples[,haplotype], at = which(haplotype == haplotypes), col = 'gray', add = TRUE)
}
}
try(vioplot(results$genotype_freq_store_chains[-burnin, haplotype, chain], at = which(haplotype == haplotypes), col = chain_cols[chain], add = TRUE))
}
}
if(!mcmc_variable_list$log_like_zero & Simulated){
points(y = data_summary$Population_haplotype_frequencies[,haplotypes], x=1:no_haplotypes, pch = 19, col = 'black')
}
box(col = 'white', lwd = 6)
axis(side = 1, at = 1:no_haplotypes, labels = haplotypes, tick = FALSE, cex.axis = 1, las = 2, line = -1)
axis(side = 2, at = seq(0,1,0.5), labels = seq(0,1,0.5), cex.axis = 1.2)
if(!mcmc_variable_list$log_like_zero & Simulated){legend('topleft', legend = c('Frequency used to simulate the data'), pch = c(19), col = 'black', bty = 'n', cex = 1)}
legend('topright', legend = paste('Chain', 1:mcmc_variable_list$no_mcmc_chains),lty = 'solid', col = chain_cols[1:mcmc_variable_list$no_mcmc_chains], cex = 1, bty = 'n', lwd = 3)
#========================================================================================================
#========================================================================================================
if(!mcmc_variable_list$log_like_zero & Simulated){
# Violin relative to the simulated sammple frequency
plot(NULL, xlim=c(0.5,no_haplotypes+0.5), ylim=c(0,1.05), xaxt = 'n', yaxt = 'n', xlab = 'Haplotypes', ylab = 'Frequency', cex.lab = 1.2)
for(chain in 1:mcmc_variable_list$no_mcmc_chains){
for(haplotype in haplotypes){
try(vioplot(results$genotype_freq_store_chains[-burnin, haplotype, chain], at = which(haplotype == haplotypes), col = chain_cols[chain], add = TRUE))}}
points(y = data_summary$Sample_haplotype_frequencies[,haplotypes],
x = 1:no_haplotypes, pch = 19, col = 'black')
box(col = 'white', lwd = 6)
axis(side = 1, at = 1:no_haplotypes, labels = haplotypes, tick = FALSE, cex.axis = 1, las = 2, line = -1)
axis(side = 2, at = seq(0,1,0.5), labels = seq(0,1,0.5), cex.axis = 1.2)
if(Simulated){legend('topleft', legend = c('Simulated sample frequency'), pch = c(19), col = 'black', bty = 'n', cex = 1)}
legend('topright', legend = paste('Chain', 1:mcmc_variable_list$no_mcmc_chains),lty = 'solid', col = chain_cols[1:mcmc_variable_list$no_mcmc_chains], cex = 1, bty = 'n', lwd = 3)}
#========================================================================================================
#========================================================================================================
# Plot the likelihood overall (including burnin, although ylim based on post burnin)
# Calculate log_likelihood by taking product over malaria episodes
log_likelihood_store_chains_sum <- apply(results$log_likelihood_store_chains,c(1,3),sum)
plot(NULL, ylim = range(log_likelihood_store_chains_sum[-burnin,])-c(10, 0),
xaxt = 'n', yaxt = 'n', xlim = c(0, mcmc_variable_list$no_traces_preburnin), bty ='n', ylab = 'Log likelihood',
xlab = 'Trace', cex.lab = 1.2)
Seq <- seq(min(log_likelihood_store_chains_sum[-burnin,]),
max(log_likelihood_store_chains_sum[-burnin,]),
length.out = 3)
axis(side = 2, at = Seq, labels = round(Seq,0), cex.axis = 1.2)
axis(side = 1, at = c(0,mcmc_variable_list$no_traces_preburnin), labels = c(0,mcmc_variable_list$no_traces_preburnin*mcmc_variable_list$thinning_interval), cex.axis = 1.2)
for(chain in 1:mcmc_variable_list$no_mcmc_chains){ # Add the likelihood
lines(y = log_likelihood_store_chains_sum[plot_thin_preburnin, chain], x = plot_thin_preburnin, col = chain_cols[chain])
}
legend('bottom',
legend = paste('Chain', 1:mcmc_variable_list$no_mcmc_chains),
lty = 'solid', col = chain_cols, cex = 1, bty = 'n')
title(sub = sprintf('In addition to the thinning interval used to generate results, thinned by %s', plot_thin), cex.sub = 0.5)
#========================================================================================================
#========================================================================================================
# # Plot the posterior (including burnin, although ylim based on post burnin)
# plot(NULL, ylim = range(results$log_posterior_store_chains[-burnin,])-c(10, 0), xaxt = 'n', yaxt = 'n',
# xlim = c(0, mcmc_variable_list$no_traces_preburnin), bty ='n', ylab = 'Log posterior', xlab = 'Trace', cex.lab = 1.2)
#
# Seq <- seq(min(results$log_posterior_store_chains[-burnin,]),
# max(results$log_posterior_store_chains[-burnin,]),
# length.out = 3)
#
# axis(side = 2, at = Seq, labels = round(Seq,0),cex.axis = 1.2)
# axis(side = 1, at = c(0,mcmc_variable_list$no_traces_preburnin), labels = c(0, mcmc_variable_list$no_traces_preburnin*mcmc_variable_list$thinning_interval), cex.axis = 1.2)
#
# for(chain in 1:mcmc_variable_list$no_mcmc_chains){
# lines(y = results$log_posterior_store_chains[plot_thin_preburnin, chain],
# x = plot_thin_preburnin,
# col = chain_cols[chain])
# }
# legend('bottom', legend = paste('Chain', 1:mcmc_variable_list$no_mcmc_chains),lty = 'solid', col = chain_cols, cex = 1, bty = 'n')
#
# title(sub = sprintf('In addition to the thinning interval used to generate results, thinned by %s', plot_thin), cex.sub = 0.5)
#========================================================================================================
#========================================================================================================
# Traces (including burnin, although ylim based on post burnin)
plot(NULL, ylim = c(0,1.1), xlim = c(0, mcmc_variable_list$no_traces_preburnin), bty = 'n', ylab = 'Frequency', xlab = 'Trace',
cex.lab = 1.2, xaxt = 'n', yaxt = 'n')
axis(side = 1, at = c(0,mcmc_variable_list$no_traces_preburnin), labels = c(0,mcmc_variable_list$no_traces_preburnin*mcmc_variable_list$thinning_interval), cex.axis = 1.2)
axis(side = 2, at = seq(0,1,length.out = 3), cex.axis = 1.2)
for(chain in 1:mcmc_variable_list$no_mcmc_chains){
for(i in 1:no_haplotypes){
lines(y = as.mcmc(results$genotype_freq_store_chains[plot_thin_preburnin, i, chain]), x = plot_thin_preburnin, col = chain_cols[chain])
}}
if(Simulated & !mcmc_variable_list$log_like_zero){
abline(h = data_summary$Population_haplotype_frequencies)
legend('topleft', col = 'black', lty = c('solid'),
legend = c('Frequency used to simulate the data'), cex = 1, bty = 'n', lwd = 1.5)
}
legend('topright', col = chain_cols, lty = 'solid', legend = paste('Chain', 1:mcmc_variable_list$no_mcmc_chains), cex = 1, bty = 'n', lwd = 1.5)
title(sub = sprintf('In addition to the thinning interval used to generate results, thinned by %s', plot_thin), cex.sub = 0.5)
#========================================================================================================
#========================================================================================================
# Denisty plots
par(mfrow = c(min(4, no_haplotypes/2), 2))
for(i in 1:no_haplotypes){
plot(NULL, ylim = c(0, max(density(results$genotype_freq_store_chains[plot_thin_preburnin,i,])$y)+10),
cex.lab = 1.2, cex.axis = 1.2, xlim = range(results$genotype_freq_store_chains[,i,]), bty = 'n',
ylab = '', xlab = '')
title(xlab = sprintf('Haplotype %s frequency',haplotypes[i]),
ylab = 'Density', line = 2.5)
for(chain in 1:mcmc_variable_list$no_mcmc_chains){
lines(density(results$genotype_freq_store_chains[plot_thin_preburnin, i, chain]), col = chain_cols[chain])
}
legend('top', legend = paste('Chain', 1:mcmc_variable_list$no_mcmc_chains),lty = 'solid', col = chain_cols, cex = 1, bty = 'n')
title(sub = sprintf('In addition to the thinning interval used to generate results, thinned by %s', plot_thin), cex.sub = 0.5)
}
par(mfrow = c(1,1)) # reset
#========================================================================================================
#========================================================================================================
# Haplotype counts/mois acceptance rate per individual
if(child){
thinnin_seq <- plot_thin_preburnin_noburnin
} else {
thinnin_seq <- plot_thin_preburnin
}
plot(NULL, ylim = c(0,1), xlim = c(min(thinnin_seq),mcmc_variable_list$no_traces_preburnin), xlab = 'Trace', ylab = 'Acceptance rate',
bty = 'n', cex.lab = 1.2, xaxt = 'n')
axis(side = 1, at = c(min(thinnin_seq),mcmc_variable_list$no_traces_preburnin), labels = c(min(thinnin_seq),mcmc_variable_list$no_traces_preburnin)*mcmc_variable_list$thinning_interval)
abline(h = 0, lty = 'dotted')
for(individual in 1:no_total){
for(chain in 1:mcmc_variable_list$no_mcmc_chains){
lines(y = results$acceptance_store_chains[thinnin_seq, individual, chain], x = thinnin_seq, col = chain_cols[chain])
}
}
legend('topright', col = chain_cols, lty = 'solid', legend = paste('Chain', 1:mcmc_variable_list$no_mcmc_chains), cex = 1, bty = 'n', lwd = 1.5)
title(sub = sprintf('In addition to the thinning interval used to generate results, thinned by %s', plot_thin), cex.sub = 0.5)
#========================================================================================================
# #========================================================================================================
# # Correlation (large)
# for(chain in 1:mcmc_variable_list$no_mcmc_chains){
# post_bin <- results$genotype_freq_store_chains[-unique(c(plot_thin_preburnin, burnin)),,chain]
# try(scatterplotMatrix(post_bin[sample(nrow(post_bin), min(100,nrow(post_bin))),]))
# }
# title(sub = sprintf('In addition to the thinning interval used to generate results, thinned by %s', plot_thin), cex.sub = 0.5)
# #========================================================================================================
#=====================================================================================================
# plot the haplotype aggregate - thinned by 10 and discluding burn in
total_no_traces_postburnin <- dim(mcmc_As)[1]
if(total_no_traces_postburnin > 1000){
hap_thin <- 100
}else{
hap_thin <- 10
}
X<-array(NA, dim =c(total_no_traces_postburnin/hap_thin,
no_haplotypes,min(16,no_total)),
dimnames = list(NULL,haplotypes,mini_title_names[randomly_selected_bloodsamples])) # Define X as a rehaplotypes ed version of Trace_genotype_counts in which to store thinned data with burn in disgarded
for(i in 1:min(16, no_total)){ # Sample <=16 blood samples at random
X[,,i]<-mcmc_As[seq(1,total_no_traces_postburnin, hap_thin),randomly_selected_bloodsamples[i],]
} # Not sure if this is necessary anymore
print(levelplot(X, # levelplot for array
aspect='fill',
xlab='Iteration',
ylab='Haplotype',
par.strip.text = list(cex=0.4), # Make pid label smaller
col.regions = gray(moi_list$moi_max:0/moi_list$moi_max),
cuts = moi_list$moi_max, # No. of moi cuts
sub = sprintf('Posterior for %s random blood samples, additionally thinned by %s, post burnin', min(no_total,16), hap_thin),
cex = 0.1,
cex.title = 0.1,
colorkey=list(space='right', # Colour bar parameters
col = gray(moi_list$moi_max:0/moi_list$moi_max),
at = 0:(moi_max+1),
width=0.4,
height=1,
labels=list(labels = paste('Count: ',0:moi_list$moi_max,sep=''),
at = 0:moi_max+0.5,
cex = 0.5)),
scales=list(x=list(tick.number=3, # Axis parameters
label='',
at=1,
alternating=c(1,rep(0,1)), # Control labels
tck=0,# Length of tick marks is zero
rot=90,
cex=0.4),# Rotate Labels
y=list(tick.number=no_haplotypes,
alternating=c(1,rep(0,1)),
tck=0,
at=1:length(dimnames(X)[[2]]),
labels=dimnames(X)[[2]],
cex=0.3))))
#=================================================================================================================
#==================================================================================================================
# Posterior distribution over genotypes for 16 randomly selected individuals
# Remember, the simulated distribution is composed of one or more haplotypes if mixed, hence there is no 'true' haplotype;
# rather, a combination of true haplotypes.
haplotype_dataframe<-c()
for(i in randomly_selected_bloodsamples){
haplotype_dataframe <- rbind(haplotype_dataframe,
data.frame(hap_counts = colSums(mcmc_As[,i,haplotypes]),
hap_name = haplotypes,
id = as.factor(mini_title_names[i])))}
print(barchart(hap_counts ~ hap_name|id, data = haplotype_dataframe,
par.strip.text=list(cex=0.4), # Make id label smaller
stack=FALSE,
col='gray',
ylab='Haplotype count',
xlab='Haplotype',
sub = sprintf('Haplotype barchart, %s randomly selected blood samples, post burnin',min(no_total,16)),
cex.title=0.5,
scales=list(x=list(tck=0,# Length of tick marks is zero
cex=0.6,
rot=54),# Rotate Labels
y=list(tck=0,
alternating=c(1,rep(0,1))))))
#==================================================================================================================
#==================================================================================================================
# Plot the change in m for each individual - thinned by 10 and discluding burn in
print(levelplot(mcmc_mois[seq(1,dim(mcmc_mois)[1],plot_thin), randomly_selected_bloodsamples], # Thin for plotting
aspect = 'fill',
xlab='Traces (burnin removed, chains collapsed, then further thinned (by 10))',
ylab='Blood sample ID',
col.regions=gray(7:0/7),
cuts=7, # No. of moi cuts
sub=sprintf('MOI posterior, %s blood samples, post burnin, additionally thinned by 10',min(no_total,16)),
cex.title=0.2,
colorkey=list(space='right', # Colour bar parameters
tick.number=9,
width=0.4,
height=1,
labels=list(cex=0.5,
at = 1:moi_list$moi_max,
labels=paste('MOI:',1:moi_list$moi_max,sep=''))), # No MOI=0
scales=list(x=list(label='',
at=1,
tck=0,# Length of tick marks is zero
rot=0,
cex=0.6),# Rotate Labels
y=list(tck=0,
labels = mini_title_names[randomly_selected_bloodsamples],
cex=0.4))))
#==================================================================================================================
#==================================================================================================================
# Plot the distribution of m for 16 randomly selected bloodsamples
true_moi<-c()
if(mcmc_variable_list$log_like_zero){moi_simulated_sample[]<-Inf}
for(i in randomly_selected_bloodsamples){
X <- rep('Estimate',moi_list$moi_max)
if(moi_simulated_sample[i] <= moi_list$moi_max){X[moi_simulated_sample[i]]<-'Simulated'}
true_moi<-c(true_moi,X)
}
moi_counts<-apply(mcmc_mois[,randomly_selected_bloodsamples],2,tabulate,nbins=moi_list$moi_max) # Tabulate the MOIs per individual
MOI <- data.frame(moi_count = as.vector(moi_counts), # Create data frame
moi = as.factor(rep(1:moi_list$moi_max, min(16, no_total))),
pid = rep(mini_title_names[randomly_selected_bloodsamples],rep(moi_list$moi_max, min(16, no_total))),
true_moi=true_moi)
print(barchart(moi_count ~ moi | pid, data = MOI,
groups = true_moi,# Highlight true moi
auto.key = list(space = "right"),
par.strip.text = list(cex = 0.4), # Make pid label smaller
stack=TRUE,
ylab = 'MOI count',
xlab = 'MOI',
sub = sprintf('MOI barchart, %s blood samples, post burnin, chains combined', min(no_total,16)),
cex.title = 0.1,
scales=list(x=list(tck=0,# Length of tick marks is zero
rot=45,
cex=0.6),# Rotate Labels
y=list(tck=0,
cex=0.6,
alternating=c(1,rep(0,1))))))
#==================================================================================================================
if(child == TRUE){ # Extra plots if child hierachy is included
#========================================================================================================
# Population frequency acceptance rate per individual
results$acceptance_store_population_chains[results$acceptance_store_population_chains == 0] <- NA
par(mfrow = c(1,1))
plot(NULL, ylim = c(0,1), xlim = c(max(burnin)+1,mcmc_variable_list$no_traces_preburnin), xlab = 'Trace', ylab = 'Population frequency acceptance rate',
bty = 'n', cex.lab = 1.2, xaxt = 'n')
axis(side = 1, at = c((max(burnin)+1),mcmc_variable_list$no_traces_preburnin), labels = c((max(burnin)+1),mcmc_variable_list$no_traces_preburnin)*mcmc_variable_list$thinning_interval)
for(chain in 1:mcmc_variable_list$no_mcmc_chains){lines(y=results$acceptance_store_population_chains[plot_thin_preburnin_noburnin,chain],
x = plot_thin_preburnin_noburnin, col = chain_cols[chain])}
legend('topright', col = chain_cols, lty = 'solid', legend = paste('Chain', 1:mcmc_variable_list$no_mcmc_chains), cex = 1, bty = 'n', lwd = 1.5)
title(sub = sprintf('In addition to the thinning interval used to generate results, thinned by %s', plot_thin), cex.sub = 0.5)
#========================================================================================================
#========================================================================================================
# Sigma acceptance rate per individual
par(mfrow = c(1,1))
plot(NULL, ylim = c(0,1), xlim = c(max(burnin)+1,mcmc_variable_list$no_traces_preburnin), xlab = 'Trace', ylab = 'Dispersion parameter acceptance rate',
bty = 'n', cex.lab = 1.2, xaxt = 'n')
axis(side = 1, at = c((max(burnin)+1),mcmc_variable_list$no_traces_preburnin), labels = c((max(burnin)+1),mcmc_variable_list$no_traces_preburnin)*mcmc_variable_list$thinning_interval)
for(chain in 1:mcmc_variable_list$no_mcmc_chains){lines(y=results$acceptance_store_sigma_chains[plot_thin_preburnin_noburnin, chain],
x=plot_thin_preburnin_noburnin, col = chain_cols[chain], lty = 'solid')}
legend('topright', col = chain_cols, lty = 'solid', legend = paste('Chain', 1:mcmc_variable_list$no_mcmc_chains), cex = 1, bty = 'n', lwd = 1.5)
title(sub = sprintf('In addition to the thinning interval used to generate results, thinned by %s', plot_thin), cex.sub = 0.5)
#========================================================================================================
#========================================================================================================
# Classic mcmc plots of sigma (problems with start and end (try more iterations))
mcmc_sigma_temp0 <- alply(results$sigma_store_chains[-burnin,], 2)
mcmc_sigma_temp1 <- lapply(mcmc_sigma_temp0, FUN = mcmc, thin = mcmc_variable_list$thinning_interval, start = (max(burnin)*mcmc_variable_list$thinning_interval)+1, end = mcmc_variable_list$no_traces_preburnin*mcmc_variable_list$thinning_interval)
mcmc_sigma <- mcmc.list(mcmc_sigma_temp1)
Eff_sigma <- round(effectiveSize(mcmc_sigma),0)
plot(mcmc_sigma, bty = 'n', density = FALSE, col = chain_cols, ylab = expression(sigma), smooth = FALSE) # No burnin
legend('topright', col = chain_cols, lty = 'solid', legend = paste('Chain', 1:mcmc_variable_list$no_mcmc_chains), cex = 1, bty = 'n', lwd = 1.5)
density_sigma_posterior <- density(as.vector(results$sigma_store_chains[-burnin,]))
density_sigma_prior <- dgamma(density_sigma_posterior$x, shape = sigma_list$sigma_shape_hyperparameter, scale = sigma_list$sigma_scale_hyperparameter)
plot(density_sigma_posterior, bty = 'n', main = '', xlab = bquote(sigma ~ '( effective sample size' ~.(Eff_sigma)~ ')'), ylim = c(0,max(density_sigma_prior,density_sigma_posterior$y)), lwd = 2)
if(mcmc_variable_list$log_like_zero){lines(y = density_sigma_prior, x = density_sigma_posterior$x, col = 'green', lwd = 2)}
if(Simulated & !mcmc_variable_list$log_like_zero){abline(v = data_summary$sigma, lty = 'dashed', lwd = 2)}
if(Simulated & mcmc_variable_list$log_like_zero){legend('top', legend = c('Posterior distribution', 'Prior distribution'),
col = c(1,3), lty = c('solid', 'solid'), bty = 'n', lwd = 2)
}
if(Simulated & !mcmc_variable_list$log_like_zero){legend('top', legend = expression('Simulated'~ sigma, 'Posterior distribution'),
col = c(1,1), lty = c('dashed', 'solid'), bty = 'n', lwd = 2)
}
autocorr.plot(mcmc_sigma)
#========================================================================================================
#========================================================================================================
# Plots of frequency_child and sigma
Xlist0 <- alply(results$genotype_freq_child_store_chains[-burnin,,,],2) # Split by child (value is a list with one entry per child)
Xlist1 <- lapply(Xlist0, FUN = alply, 3) # For each child entry, split by chain (value is a list of lists, one entry per child, and, per chain, however many entries as chains)
Xlist2 <- lapply(Xlist1, FUN = lapply, mcmc, start = (max(burnin)*mcmc_variable_list$thinning_interval)+1, end = mcmc_variable_list$no_traces_preburnin*mcmc_variable_list$thinning_interval, thin = mcmc_variable_list$thinning_interval) # Turn each chain entry into an mcmc object
mcmc_frequency_child_chains <- lapply(Xlist2, FUN = mcmc.list) # For each child, create mcmc.list over chains
# Classic mcmc plots for a random child
random_child <- sample(no_children, 1)
plot(mcmc_frequency_child_chains[[random_child]], bty = 'n', col = chain_cols, smooth = FALSE)
# Classic mcmc summary for all children combined (an estimate of the frequency population based on the frequency of the children)
Xlist0 <- alply(results$genotype_freq_child_store_chains[-burnin,,,],2) # Create list of children
Xlist1 <- abind(Xlist0, along = 3) # Combine all children
dimnames(Xlist1)[[3]] <- NULL
Xlist2 <- alply(Xlist1, 3) # Chains
Xlist3 <- lapply(Xlist2, FUN = mcmc, thin = mcmc_variable_list$thinning_interval) # create mcmc objects
mcmc_population_child_chains <- mcmc.list(Xlist3)
summary(mcmc_population_child_chains) # All chains combined
# Classic mcmc plot for all children combined (thinned before plotting)
plotting_thinning_interval <- 100
thin_mcmc_population_child_chains <- seq(1, nrow(mcmc_population_child_chains[[1]]), by = plotting_thinning_interval)
Xlist0 <- lapply(mcmc_population_child_chains, FUN = function(x){x[thin_mcmc_population_child_chains,]}) # thin
Xlist1 <- lapply(Xlist0, mcmc, thin = mcmc_variable_list$thinning_interval*plotting_thinning_interval) # reclassify as mcmc object
mcmc_population_child_chains_thinned = mcmc.list(Xlist1) # re-classify as mcmc list
plot(mcmc_population_child_chains_thinned, smooth = FALSE, bty = 'n', density = FALSE, col = rainbow(length(mcmc_population_child_chains_thinned), alpha = 0.2))
title(sub = sprintf('In addition to the thinning interval used to generate results, thinned by %s', plotting_thinning_interval), cex.sub = 0.5)
plot(mcmc_population_child_chains_thinned, bty = 'n', trace = FALSE, xlim = c(0,1)) # All chains combined
title(sub = sprintf('In addition to the thinning interval used to generate results, thinned by %s', plotting_thinning_interval), cex.sub = 0.5)
par(mfrow = c(1,1))
#========================================================================================================
# Violin of frequency pop estimated from randomly selected children
# Samples from the dirichlet prior for overlaying when mcmc_variable_list$log_like_zero is true
randomly_selected_children <- sample(length(mcmc_population_child_chains_thinned),10)
Cols <- rainbow(length(randomly_selected_children), alpha = 0.5)
priorsamples <- rdirichlet(1000, frequency_list$frequency_hyperparameter) # frequency_hyperparameter
colnames(priorsamples) <- haplotypes
plot(NULL, xlim=c(0.5,no_haplotypes + 0.5), ylim=c(0,1.05), xaxt = 'n', yaxt = 'n', ylab = 'Frequency', cex.lab = 1.2, xlab = 'Haplotypes')
for(Child in 1:length(randomly_selected_children)){
for(haplotype in haplotypes){
if(mcmc_variable_list$log_like_zero){if(Child ==1){vioplot(priorsamples[,haplotype], at = which(haplotype == haplotypes), col = 'gray', add = TRUE)}}
try(vioplot(mcmc_population_child_chains_thinned[,haplotype][[Child]], at = which(haplotype == haplotypes), col = Cols[Child], add = TRUE))}}
if(!mcmc_variable_list$log_like_zero & Simulated){points(y = data_summary$Sample_haplotype_frequencies[,haplotypes ], x=1:no_haplotypes, pch = 19, col = 'black')}
box(col = 'white', lwd = 6)
axis(side = 1, at = 1:no_haplotypes, labels = haplotypes, tick = FALSE, cex.axis = 1, las = 2, line = -1)
axis(side = 2, at = seq(0,1,0.5), labels = seq(0,1,0.5), cex.axis = 1.2)
if(!mcmc_variable_list$log_like_zero){legend('topright', legend = c('Frequency used to simulate the data'), pch = c(19), col = 'black', bty = 'n', cex = 1)}
legend('topleft', legend = paste('Child', randomly_selected_children),lty = 'solid', col = Cols, cex = 1, bty = 'n', lwd = 3)
title(sub = sprintf('Thinned by an extra %s', plotting_thinning_interval))
#========================================================================================================
}
#==================================================================================================================
if(names(dev.cur()) == 'pdf'){dev.off()}
# if(child == TRUE){ # Extra plots if child is true (too big for pdf)
# # Classic mcmc plots for all children for a random chain and 3 random haplotypes (massive pdf size hence not printed to pdf)
# Xlist0 <- alply(results$genotype_freq_child_store_chains[-burnin,,,],4) # Split by chain (value is a list with one entry per chain)
# Xlist1 <- lapply(Xlist0, FUN = alply, 2) # For each chain, split by child (value is a list of list, one entry per chain, and, per chain, however many entries as children)
# Xlist2 <- lapply(Xlist1, FUN = lapply, mcmc, start = (0.5*mcmc_variable_list$no_traces_preburnin*mcmc_variable_list$thinning_interval)+1, end = mcmc_variable_list$no_traces_preburnin*mcmc_variable_list$thinning_interval, thin = mcmc_variable_list$thinning_interval) # Turn each chain entry into an mcmc object
# mcmc_frequency_child_chains <- lapply(Xlist2, FUN = mcmc.list) # For each child, create mcmc.list over chains
# random_chain <- sample(mcmc_variable_list$no_mcmc_chains, 1) # Pick a random chain
# random_haplotypes <- sample(no_haplotypes, 3)
# plot(mcmc_frequency_child_chains[[random_chain]][,random_haplotypes], bty = 'n') # plot
# }
#==================================================================================================================
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.