rbeast.clusterbranchlengths.150205<- function()
{
file <- '/Users/Oliver/Dropbox\ (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150417_files/150129_HPTN071_scA-mFP25_DATEDTREE.newick'
phd <- read.tree(file)
tmp <- which( sapply(phd, Ntip)>1 )
phd <- lapply( tmp, function(i) phd[[i]] )
dfb <- do.call('rbind',lapply(seq_along(phd), function(i)
{
data.table(CLU=i, NTIP=Ntip(phd[[i]]), BRL=phd[[i]]$edge.length)
}))
dfb[, CLU_SIZE:='']
set(dfb, dfb[, which(NTIP<4)], 'CLU_SIZE','<4')
set(dfb, dfb[, which(NTIP>3 & NTIP<=10)], 'CLU_SIZE','4-10')
set(dfb, dfb[, which(NTIP>10)], 'CLU_SIZE','>10')
ggplot(dfb, aes(x=BRL, group=CLU, colour=factor(NTIP))) + stat_ecdf() + guides(colour=guide_legend(ncol=7)) +
theme(legend.position='bottom') + labs(x='branch length\n(years)', y='cumulative distribution', colour='number of tips\nper cluster') +
facet_grid(~CLU_SIZE)
file <- '/Users/Oliver/Dropbox\ (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150417_files/150129_HPTN071_scA-mFP25_DATEDTREE_brld.pdf'
ggsave(file=file, h=8, w=8)
}
rbeast.mixing<- function()
{
require(coda)
indir <- file <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150417_files'
# read log file
infiles <- list.files(indir, pattern='log$')
# get options
infiles <- data.table(FILE=infiles)
infiles[, SUBSTM:= NA_character_]
set(infiles, infiles[, which(grepl('GTR',FILE))], 'SUBSTM', 'GTR')
set(infiles, infiles[, which(grepl('HKY',FILE))], 'SUBSTM', 'HKY')
infiles[, CODON:= 'No']
set(infiles, infiles[, which(grepl('CODON',FILE))], 'CODON', 'Yes')
infiles[, GENE:= NA_character_]
set(infiles, infiles[, which(grepl('pol',FILE))], 'GENE', 'pol')
infiles[, SEQ:= infiles[, substring(regmatches(FILE, regexpr('seq[0-9]+',FILE)),4)]]
set(infiles, NULL, 'SEQ', infiles[, as.numeric(SEQ)])
set(infiles, infiles[, which(grepl('cseq3',FILE))], 'SEQ', 1350)
infiles[, CLU_TYPE:= NA_character_]
set(infiles, infiles[, which(grepl('mseq',FILE))], 'CLU_TYPE', 'largest')
set(infiles, infiles[, which(grepl('clrnd',FILE))], 'CLU_TYPE', 'random')
set(infiles, infiles[, which(grepl('cseq',FILE))], 'CLU_TYPE', 'all')
set(infiles, infiles[, which(grepl('clsm',FILE))], 'CLU_TYPE', 'smallest')
dfl <- do.call('rbind',lapply(infiles[, unique(FILE)], function(f)
{
cat(paste('\nprocess file', f))
file <- paste(indir, '/', f, sep='')
z <- beast.read.log(file, select=c('state','likelihood','posterior','branchRates','gtr','hky','site','logPopSize'), verbose=0)
z <- melt(z, id.vars=c('state'))
z[, FILE:=f]
z
}))
dfm <- dfl[, {
z <- effectiveSize(matrix(data=value,ncol=1))
list(ESS=z)
}, by=c('FILE','variable')]
dfm <- merge(dfm, infiles, by='FILE')
# GTR SEQ 1000
dfsp <- subset(dfm, SUBSTM=='GTR' & SEQ==1000)
setkey(dfsp, ESS)
ggplot(dfsp, aes(x=variable, y=ESS, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(breaks=c(0,100,300,500,700), minor_breaks=NULL, expand=c(0,0)) +
labs(y='effective sample size\n(20e6 iterations, burn-in 10%)', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SUBSTM~SEQ) +
guides(fill=guide_legend(ncol=1))
file <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150417_analysis/150413_Mixing_fixedtree_pol_GTR_mseq1000.pdf'
ggsave(file=file, h=15, w=10)
# HKY SEQ 1000
dfsp <- subset(dfm, SUBSTM=='HKY' & SEQ==1000 & CLU_TYPE=='largest')
setkey(dfsp, ESS)
ggplot(dfsp, aes(x=variable, y=ESS, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(breaks=c(0,100,300,500,700), minor_breaks=NULL, expand=c(0,0)) +
labs(y='effective sample size\n(20e6 iterations, burn-in 10%)', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SUBSTM~SEQ) +
guides(fill=guide_legend(ncol=1))
file <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150417_analysis/150413_Mixing_fixedtree_pol_HKY_mseq1000.pdf'
ggsave(file=file, h=15, w=10)
}
rbeast.mixing.weight150419<- function()
{
require(rBEAST)
require(coda)
indir <- file <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150419_files'
# read log file
infiles <- list.files(indir, pattern='log|ops')
# get options
infiles <- data.table(FILE=infiles)
infiles[, TYPE:= 'log']
set(infiles, infiles[, which(grepl('ops',FILE))], 'TYPE', 'ops')
infiles[, SUBSTM:= NA_character_]
set(infiles, infiles[, which(grepl('GTR',FILE))], 'SUBSTM', 'GTR')
set(infiles, infiles[, which(grepl('HKY',FILE))], 'SUBSTM', 'HKY')
infiles[, CODON:= 'No']
set(infiles, infiles[, which(grepl('CODON',FILE))], 'CODON', 'Yes')
infiles[, GENE:= NA_character_]
set(infiles, infiles[, which(grepl('pol',FILE))], 'GENE', 'pol')
infiles[, SEQ:= infiles[, substring(regmatches(FILE, regexpr('seq[0-9]+',FILE)),4)]]
set(infiles, NULL, 'SEQ', infiles[, as.numeric(SEQ)])
set(infiles, infiles[, which(grepl('cseq3',FILE))], 'SEQ', 1350)
infiles[, CLU_TYPE:= NA_character_]
set(infiles, infiles[, which(grepl('mseq',FILE))], 'CLU_TYPE', 'largest')
set(infiles, infiles[, which(grepl('clrnd',FILE))], 'CLU_TYPE', 'random')
set(infiles, infiles[, which(grepl('cseq',FILE))], 'CLU_TYPE', 'all')
set(infiles, infiles[, which(grepl('clsm',FILE))], 'CLU_TYPE', 'smallest')
infiles[, WGHT_TYPE:= NA_character_]
set(infiles, infiles[, which(grepl('uweight',FILE))], 'WGHT_TYPE', 'same')
set(infiles, infiles[, which(grepl('uwnoSkgrdBlck',FILE))], 'WGHT_TYPE', 'same ex skygrid')
set(infiles, infiles[, which(grepl('uwnoSkgrdBlckBrRtCtgrs',FILE))], 'WGHT_TYPE', 'same ex skygrid and branch rates')
infiles[, WGHT:= infiles[, regmatches(FILE, regexpr('[0-9]+\\.log|[0-9]+\\.ops',FILE))]]
set(infiles, NULL, 'WGHT', infiles[, as.numeric(substr(WGHT, 1, nchar(WGHT)-4))])
# read logs
dfl <- do.call('rbind',lapply(subset(infiles, TYPE=='log')[, unique(FILE)], function(f)
{
cat(paste('\nprocess file', f))
file <- paste(indir, '/', f, sep='')
z <- beast.read.log(file, select=c('state','likelihood','posterior','ucld','branchRates','gtr','hky','site','logPopSize'), verbose=0)
z <- melt(z, id.vars=c('state'))
z[, FILE:=f]
z
}))
dfl <- subset(dfl, state>2e6)
# calculate effective size
dfm <- dfl[, {
z <- effectiveSize(matrix(data=value,ncol=1))
list(ESS=z)
}, by=c('FILE','variable')]
dfm <- merge(dfm, infiles, by='FILE')
setkey(dfm, WGHT_TYPE, WGHT)
dfm[, WGHT_LGND:= paste(WGHT_TYPE, WGHT, sep='\n')]
set(dfm, NULL, 'WGHT_LGND', dfm[, factor(WGHT_LGND, levels=unique(WGHT_LGND), labels=unique(WGHT_LGND))])
# read ops
dfo <- do.call('rbind',lapply(subset(infiles, TYPE=='ops')[, unique(FILE)], function(f)
{
cat(paste('\nprocess file', f))
file <- paste(indir, '/', f, sep='')
z <- beast.read.ops(file)
z[, FILE:=f]
z
}))
dfo <- merge(dfo, infiles, by='FILE')
setkey(dfo, WGHT_TYPE, WGHT)
dfo[, WGHT_LGND:= paste(WGHT_TYPE, WGHT, sep='\n')]
set(dfo, NULL, 'WGHT_LGND', dfo[, factor(WGHT_LGND, levels=unique(WGHT_LGND), labels=unique(WGHT_LGND))])
# generate plots
#nseq <- 1000
nseq <- 400
dfsp <- subset(dfm, SEQ==nseq)
# generate plots
#nseq <- 1000
nseq <- 400
dfsp <- subset(dfm, SEQ==nseq)
setkey(dfsp, ESS)
ggplot(dfsp, aes(x=variable, y=ESS, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(breaks=c(0,100,300,500,700), minor_breaks=NULL, expand=c(0,0)) +
labs(y='effective sample size\n(20e6 iterations, burn-in 10%)', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste( '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150419_files/150419_Weights_fixedtree_pol_GTR_mseq',nseq,'_ESS.pdf', sep='' )
ggsave(file=file, h=0.25*dfsp[, length(unique(variable))], w=25)
dfsp <- subset(dfo, SEQ==nseq)
ggplot(dfsp, aes(x=Operator, y=Count, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(expand=c(0,0)) +
labs(y='Operator count', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste('/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150419_files/150419_Weights_fixedtree_pol_GTR_mseq',nseq,'_Count.pdf', sep='')
ggsave(file=file, h=ifelse(nseq==400,0.5,0.2)*dfsp[, length(unique(Operator))], w=25)
ggplot(dfsp, aes(x=Operator, y=PrAccept, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(expand=c(0,0)) +
labs(y='Operator Pr Accept', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste('/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150419_files/150419_Weights_fixedtree_pol_GTR_mseq',nseq,'_PrAccept.pdf', sep='')
ggsave(file=file, h=ifelse(nseq==400,0.5,0.2)*dfsp[, length(unique(Operator))], w=25)
}
rbeast.mixing.weight150426<- function()
{
require(rBEAST)
require(coda)
indir <- file <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150426_files'
# read log file
infiles <- list.files(indir, pattern='log|ops')
# get options
infiles <- data.table(FILE=infiles)
infiles[, TYPE:= 'log']
set(infiles, infiles[, which(grepl('ops',FILE))], 'TYPE', 'ops')
infiles[, SUBSTM:= NA_character_]
set(infiles, infiles[, which(grepl('GTR',FILE))], 'SUBSTM', 'GTR')
set(infiles, infiles[, which(grepl('HKY',FILE))], 'SUBSTM', 'HKY')
infiles[, CODON:= 'No']
set(infiles, infiles[, which(grepl('CODON',FILE))], 'CODON', 'Yes')
infiles[, GENE:= NA_character_]
set(infiles, infiles[, which(grepl('pol',FILE))], 'GENE', 'pol')
infiles[, SEQ:= infiles[, substring(regmatches(FILE, regexpr('seq[0-9]+',FILE)),4)]]
set(infiles, NULL, 'SEQ', infiles[, as.numeric(SEQ)])
set(infiles, infiles[, which(grepl('cseq3',FILE))], 'SEQ', 1350)
infiles[, CLU_TYPE:= NA_character_]
set(infiles, infiles[, which(grepl('mseq',FILE))], 'CLU_TYPE', 'largest')
set(infiles, infiles[, which(grepl('clrnd',FILE))], 'CLU_TYPE', 'random')
set(infiles, infiles[, which(grepl('cseq',FILE))], 'CLU_TYPE', 'all')
set(infiles, infiles[, which(grepl('clsm',FILE))], 'CLU_TYPE', 'smallest')
infiles[, WGHT_TYPE:= NA_character_]
set(infiles, infiles[, which(grepl('w4Intrnl[0-9]+',FILE))], 'WGHT_TYPE', 'w4Intrnl')
set(infiles, infiles[, which(grepl('w4BrUCLDUpDown',FILE))], 'WGHT_TYPE', 'w4UCLDUpDown')
infiles[, WGHT:= infiles[, regmatches(FILE, regexpr('[0-9]+\\.log|[0-9]+\\.ops',FILE))]]
set(infiles, NULL, 'WGHT', infiles[, as.numeric(substr(WGHT, 1, nchar(WGHT)-4))])
# read logs
dfl <- do.call('rbind',lapply(subset(infiles, TYPE=='log')[, unique(FILE)], function(f)
{
cat(paste('\nprocess file', f))
file <- paste(indir, '/', f, sep='')
z <- beast.read.log(file, select=c('state','likelihood','posterior','ucld','branchRates','gtr','hky','site','logPopSize','rootHeight'), verbose=0)
z <- melt(z, id.vars=c('state'))
z[, FILE:=f]
z
}))
dfl <- subset(dfl, state>2e6)
# calculate effective size
dfm <- dfl[, {
z <- effectiveSize(matrix(data=value,ncol=1))
list(ESS=z)
}, by=c('FILE','variable')]
dfm <- merge(dfm, infiles, by='FILE')
setkey(dfm, WGHT_TYPE, WGHT)
dfm[, WGHT_LGND:= paste(WGHT_TYPE, WGHT, sep='\n')]
set(dfm, NULL, 'WGHT_LGND', dfm[, factor(WGHT_LGND, levels=unique(WGHT_LGND), labels=unique(WGHT_LGND))])
# read ops
dfo <- do.call('rbind',lapply(subset(infiles, TYPE=='ops')[, unique(FILE)], function(f)
{
cat(paste('\nprocess file', f))
file <- paste(indir, '/', f, sep='')
z <- beast.read.ops(file)
z[, FILE:=f]
z
}))
dfo <- merge(dfo, infiles, by='FILE')
setkey(dfo, WGHT_TYPE, WGHT)
dfo[, WGHT_LGND:= paste(WGHT_TYPE, WGHT, sep='\n')]
set(dfo, NULL, 'WGHT_LGND', dfo[, factor(WGHT_LGND, levels=unique(WGHT_LGND), labels=unique(WGHT_LGND))])
# generate plots
#nseq <- 1000
nseq <- 400
dfsp <- subset(dfm, SEQ==nseq)
setkey(dfsp, ESS)
ggplot(dfsp, aes(x=variable, y=ESS, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(breaks=c(0,100,300,500,700), minor_breaks=NULL, expand=c(0,0)) +
labs(y='effective sample size\n(20e6 iterations, burn-in 10%)', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste( '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150426_files/150426_Weights_fixedtopo_pol_GTR_mseq',nseq,'_ESS.pdf', sep='' )
ggsave(file=file, h=0.2*dfsp[, length(unique(variable))], w=25)
dfsp <- subset(dfo, SEQ==nseq)
set(dfsp, NULL, 'Operator', dfsp[, substr(Operator,1,60)])
ggplot(dfsp, aes(x=Operator, y=Count, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(expand=c(0,0)) +
labs(y='Operator count', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste('/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150426_files/150426_Weights_fixedtopo_pol_GTR_mseq',nseq,'_Count.pdf', sep='')
ggsave(file=file, h=0.2*dfsp[, length(unique(Operator))], w=25)
ggplot(dfsp, aes(x=Operator, y=PrAccept, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(expand=c(0,0)) +
labs(y='Operator Pr Accept', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste('/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150426_files/150426_Weights_fixedtopo_pol_GTR_mseq',nseq,'_PrAccept.pdf', sep='')
ggsave(file=file, h=0.2*dfsp[, length(unique(Operator))], w=25)
}
rbeast.mixing.weight150424<- function()
{
require(rBEAST)
require(coda)
indir <- file <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150424_files'
# read log file
infiles <- list.files(indir, pattern='log|ops')
# get options
infiles <- data.table(FILE=infiles)
infiles[, TYPE:= 'log']
set(infiles, infiles[, which(grepl('ops',FILE))], 'TYPE', 'ops')
infiles[, SUBSTM:= NA_character_]
set(infiles, infiles[, which(grepl('GTR',FILE))], 'SUBSTM', 'GTR')
set(infiles, infiles[, which(grepl('HKY',FILE))], 'SUBSTM', 'HKY')
infiles[, CODON:= 'No']
set(infiles, infiles[, which(grepl('CODON',FILE))], 'CODON', 'Yes')
infiles[, GENE:= NA_character_]
set(infiles, infiles[, which(grepl('pol',FILE))], 'GENE', 'pol')
infiles[, SEQ:= infiles[, substring(regmatches(FILE, regexpr('seq[0-9]+',FILE)),4)]]
set(infiles, NULL, 'SEQ', infiles[, as.numeric(SEQ)])
set(infiles, infiles[, which(grepl('cseq3',FILE))], 'SEQ', 1350)
infiles[, CLU_TYPE:= NA_character_]
set(infiles, infiles[, which(grepl('mseq',FILE))], 'CLU_TYPE', 'largest')
set(infiles, infiles[, which(grepl('clrnd',FILE))], 'CLU_TYPE', 'random')
set(infiles, infiles[, which(grepl('cseq',FILE))], 'CLU_TYPE', 'all')
set(infiles, infiles[, which(grepl('clsm',FILE))], 'CLU_TYPE', 'smallest')
infiles[, WGHT_TYPE:= NA_character_]
set(infiles, infiles[, which(grepl('w4BR[0-9]+',FILE))], 'WGHT_TYPE', 'w4BR')
set(infiles, infiles[, which(grepl('w4BR2xIntrnl',FILE))], 'WGHT_TYPE', 'w42xIntrnl')
infiles[, WGHT:= infiles[, regmatches(FILE, regexpr('[0-9]+\\.log|[0-9]+\\.ops',FILE))]]
set(infiles, NULL, 'WGHT', infiles[, as.numeric(substr(WGHT, 1, nchar(WGHT)-4))])
# read logs
dfl <- do.call('rbind',lapply(subset(infiles, TYPE=='log')[, unique(FILE)], function(f)
{
cat(paste('\nprocess file', f))
file <- paste(indir, '/', f, sep='')
z <- beast.read.log(file, select=c('state','likelihood','posterior','ucld','branchRates','gtr','hky','site','logPopSize','rootHeight'), verbose=0)
z <- melt(z, id.vars=c('state'))
z[, FILE:=f]
z
}))
dfl <- subset(dfl, state>2e6)
# calculate effective size
dfm <- dfl[, {
z <- effectiveSize(matrix(data=value,ncol=1))
list(ESS=z)
}, by=c('FILE','variable')]
dfm <- merge(dfm, infiles, by='FILE')
setkey(dfm, WGHT_TYPE, WGHT)
dfm[, WGHT_LGND:= paste(WGHT_TYPE, WGHT, sep='\n')]
set(dfm, NULL, 'WGHT_LGND', dfm[, factor(WGHT_LGND, levels=unique(WGHT_LGND), labels=unique(WGHT_LGND))])
# read ops
dfo <- do.call('rbind',lapply(subset(infiles, TYPE=='ops')[, unique(FILE)], function(f)
{
cat(paste('\nprocess file', f))
file <- paste(indir, '/', f, sep='')
z <- beast.read.ops(file)
z[, FILE:=f]
z
}))
dfo <- merge(dfo, infiles, by='FILE')
setkey(dfo, WGHT_TYPE, WGHT)
dfo[, WGHT_LGND:= paste(WGHT_TYPE, WGHT, sep='\n')]
set(dfo, NULL, 'WGHT_LGND', dfo[, factor(WGHT_LGND, levels=unique(WGHT_LGND), labels=unique(WGHT_LGND))])
# generate plots
#nseq <- 1000
nseq <- 400
dfsp <- subset(dfm, SEQ==nseq)
setkey(dfsp, ESS)
ggplot(dfsp, aes(x=variable, y=ESS, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(breaks=c(0,100,300,500,700), minor_breaks=NULL, expand=c(0,0)) +
labs(y='effective sample size\n(20e6 iterations, burn-in 10%)', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste( '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150424_files/150424_Weights_fixedtopo_pol_GTR_mseq',nseq,'_ESS.pdf', sep='' )
ggsave(file=file, h=0.2*dfsp[, length(unique(variable))], w=25)
dfsp <- subset(dfo, SEQ==nseq)
set(dfsp, NULL, 'Operator', dfsp[, substr(Operator,1,60)])
ggplot(dfsp, aes(x=Operator, y=Count, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(expand=c(0,0)) +
labs(y='Operator count', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste('/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150424_files/150424_Weights_fixedtopo_pol_GTR_mseq',nseq,'_Count.pdf', sep='')
ggsave(file=file, h=0.2*dfsp[, length(unique(Operator))], w=25)
ggplot(dfsp, aes(x=Operator, y=PrAccept, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(expand=c(0,0)) +
labs(y='Operator Pr Accept', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste('/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150424_files/150424_Weights_fixedtopo_pol_GTR_mseq',nseq,'_PrAccept.pdf', sep='')
ggsave(file=file, h=0.2*dfsp[, length(unique(Operator))], w=25)
}
rbeast.mixing.weight150421<- function()
{
require(rBEAST)
require(coda)
indir <- file <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150421_files'
# read log file
infiles <- list.files(indir, pattern='log|ops')
# get options
infiles <- data.table(FILE=infiles)
infiles[, TYPE:= 'log']
set(infiles, infiles[, which(grepl('ops',FILE))], 'TYPE', 'ops')
infiles[, SUBSTM:= NA_character_]
set(infiles, infiles[, which(grepl('GTR',FILE))], 'SUBSTM', 'GTR')
set(infiles, infiles[, which(grepl('HKY',FILE))], 'SUBSTM', 'HKY')
infiles[, CODON:= 'No']
set(infiles, infiles[, which(grepl('CODON',FILE))], 'CODON', 'Yes')
infiles[, GENE:= NA_character_]
set(infiles, infiles[, which(grepl('pol',FILE))], 'GENE', 'pol')
infiles[, SEQ:= infiles[, substring(regmatches(FILE, regexpr('seq[0-9]+',FILE)),4)]]
set(infiles, NULL, 'SEQ', infiles[, as.numeric(SEQ)])
set(infiles, infiles[, which(grepl('cseq3',FILE))], 'SEQ', 1350)
infiles[, CLU_TYPE:= NA_character_]
set(infiles, infiles[, which(grepl('mseq',FILE))], 'CLU_TYPE', 'largest')
set(infiles, infiles[, which(grepl('clrnd',FILE))], 'CLU_TYPE', 'random')
set(infiles, infiles[, which(grepl('cseq',FILE))], 'CLU_TYPE', 'all')
set(infiles, infiles[, which(grepl('clsm',FILE))], 'CLU_TYPE', 'smallest')
infiles[, WGHT_TYPE:= NA_character_]
set(infiles, infiles[, which(grepl('w4UCLD[0-9]+',FILE))], 'WGHT_TYPE', 'w4UCLD')
set(infiles, infiles[, which(grepl('w4UCLDUpDown',FILE))], 'WGHT_TYPE', 'w4UCLDUpDown')
infiles[, WGHT:= infiles[, regmatches(FILE, regexpr('[0-9]+\\.log|[0-9]+\\.ops',FILE))]]
set(infiles, NULL, 'WGHT', infiles[, as.numeric(substr(WGHT, 1, nchar(WGHT)-4))])
# read logs
dfl <- do.call('rbind',lapply(subset(infiles, TYPE=='log')[, unique(FILE)], function(f)
{
cat(paste('\nprocess file', f))
file <- paste(indir, '/', f, sep='')
z <- beast.read.log(file, select=c('state','likelihood','posterior','ucld','branchRates','gtr','hky','site','logPopSize'), verbose=0)
z <- melt(z, id.vars=c('state'))
z[, FILE:=f]
z
}))
dfl <- subset(dfl, state>2e6)
# calculate effective size
dfm <- dfl[, {
z <- effectiveSize(matrix(data=value,ncol=1))
list(ESS=z)
}, by=c('FILE','variable')]
dfm <- merge(dfm, infiles, by='FILE')
dfm[, WGHT_LGND:= paste(WGHT_TYPE, WGHT, sep='\n')]
# read ops
dfo <- do.call('rbind',lapply(subset(infiles, TYPE=='ops')[, unique(FILE)], function(f)
{
cat(paste('\nprocess file', f))
file <- paste(indir, '/', f, sep='')
z <- beast.read.ops(file)
z[, FILE:=f]
z
}))
dfo <- merge(dfo, infiles, by='FILE')
dfo[, WGHT_LGND:= paste(WGHT_TYPE, WGHT, sep='\n')]
# generate plots
#nseq <- 1000
nseq <- 400
dfsp <- subset(dfm, SEQ==nseq)
setkey(dfsp, ESS)
ggplot(dfsp, aes(x=variable, y=ESS, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(breaks=c(0,100,300,500,700), minor_breaks=NULL, expand=c(0,0)) +
labs(y='effective sample size\n(20e6 iterations, burn-in 10%)', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste( '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150421_files/150421_Weights_fixedtree_pol_GTR_mseq',nseq,'_ESS.pdf', sep='' )
ggsave(file=file, h=20, w=30)
dfsp <- subset(dfo, SEQ==nseq)
ggplot(dfsp, aes(x=Operator, y=Count, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(expand=c(0,0)) +
labs(y='Operator count', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste('/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150421_files/150421_Weights_fixedtree_pol_GTR_mseq',nseq,'_Count.pdf', sep='')
ggsave(file=file, h=20, w=30)
ggplot(dfsp, aes(x=Operator, y=PrAccept, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
scale_y_continuous(expand=c(0,0)) +
labs(y='Operator Pr Accept', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SEQ~WGHT_LGND) +
guides(fill=guide_legend(ncol=1))
file <- paste('/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150421_files/150421_Weights_fixedtree_pol_GTR_mseq',nseq,'_PrAccept.pdf', sep='')
ggsave(file=file, h=20, w=30)
}
rbeast.mixing.for.commonparameters<- function()
{
require(coda)
indir <- '/Users/Oliver/duke/2014_Gates/methods_comparison_pipeline/150414'
# read log file
infiles <- list.files(indir, pattern='log$')
# get options
infiles <- data.table(FILE=infiles)
infiles[, SUBSTM:= NA_character_]
set(infiles, infiles[, which(grepl('GTR',FILE))], 'SUBSTM', 'GTR')
set(infiles, infiles[, which(grepl('HKY',FILE))], 'SUBSTM', 'HKY')
infiles[, CODON:= 'No']
set(infiles, infiles[, which(grepl('CODON',FILE))], 'CODON', 'Yes')
infiles[, GENE:= NA_character_]
set(infiles, infiles[, which(grepl('pol',FILE))], 'GENE', 'pol')
infiles[, SEQ:= infiles[, substring(regmatches(FILE, regexpr('seq[0-9]+',FILE)),4)]]
set(infiles, NULL, 'SEQ', infiles[, as.numeric(SEQ)])
set(infiles, infiles[, which(grepl('cseq3',FILE))], 'SEQ', 1350)
infiles[, CLU_TYPE:= NA_character_]
set(infiles, infiles[, which(grepl('mseq',FILE))], 'CLU_TYPE', 'largest')
set(infiles, infiles[, which(grepl('clrnd',FILE))], 'CLU_TYPE', 'random')
set(infiles, infiles[, which(grepl('cseq',FILE))], 'CLU_TYPE', 'all')
set(infiles, infiles[, which(grepl('clsm',FILE))], 'CLU_TYPE', 'smallest')
dfl <- infiles[, {
cat(paste('\nprocess file', FILE))
file <- paste(indir, '/', FILE, sep='')
beast.read.log(file, select=c('state','likelihood','posterior','branchRates'), verbose=0)
}, by='FILE']
dfl <- subset(dfl, state>2e6)
dfm <- do.call('rbind',lapply( dfl[, unique(FILE)], function(f)
{
tmp <- subset(dfl, FILE==f)
z <- effectiveSize(subset(tmp, select=3:ncol(tmp)))
data.table(FILE=f, VAR=names(z), ESS=z)
}))
dfm <- merge(dfm, infiles, by='FILE')
# GTR model vs HKY model
dfsp <- subset(dfm, CLU_TYPE=='largest')
setkey(dfsp, SEQ)
ggplot(dfsp, aes(x=VAR, y=ESS, fill=FILE)) + geom_bar(stat='identity') +
coord_flip() +
#scale_y_log10() +
scale_y_continuous(breaks=c(0,100,300,500,700), minor_breaks=NULL, expand=c(0,0)) +
labs(y='effective sample size\n(20e6 iterations, burn-in 10%)', x='') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SUBSTM~SEQ) +
guides(fill=guide_legend(ncol=1))
file <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150413_Mixing_fixedtree_pol_BY_seq_substm.pdf'
ggsave(file=file, h=6, w=12)
}
rbeast.skygrid<- function()
{
indir <- '/Users/Oliver/duke/2014_Gates/methods_comparison_pipeline/150414'
# read log file
infiles <- list.files(indir, pattern='log$')
# get options
infiles <- data.table(FILE=infiles)
infiles[, SUBSTM:= NA_character_]
set(infiles, infiles[, which(grepl('GTR',FILE))], 'SUBSTM', 'GTR')
set(infiles, infiles[, which(grepl('HKY',FILE))], 'SUBSTM', 'HKY')
infiles[, CODON:= 'No']
set(infiles, infiles[, which(grepl('CODON',FILE))], 'CODON', 'Yes')
infiles[, GENE:= NA_character_]
set(infiles, infiles[, which(grepl('pol',FILE))], 'GENE', 'pol')
infiles[, SEQ:= infiles[, substring(regmatches(FILE, regexpr('seq[0-9]+',FILE)),4)]]
set(infiles, NULL, 'SEQ', infiles[, as.numeric(SEQ)])
set(infiles, infiles[, which(grepl('cseq3',FILE))], 'SEQ', 1350)
infiles[, CLU_TYPE:= NA_character_]
set(infiles, infiles[, which(grepl('mseq',FILE))], 'CLU_TYPE', 'largest')
set(infiles, infiles[, which(grepl('clrnd',FILE))], 'CLU_TYPE', 'random')
set(infiles, infiles[, which(grepl('cseq',FILE))], 'CLU_TYPE', 'all')
set(infiles, infiles[, which(grepl('clsm',FILE))], 'CLU_TYPE', 'smallest')
dfl <- infiles[, {
cat(paste('\nprocess file', FILE))
file <- paste(indir, '/', FILE, sep='')
beast.read.log(file, select=c('state','skygrid'), verbose=0)
}, by='FILE']
dfl <- melt(dfl, id.vars=c('FILE','state','skygrid.cutOff','skygrid.numGridPoints','skygrid.precision'))
setnames(dfl, 'value', 'LPS')
setnames(dfl, 'variable', 'X')
set(dfl, NULL, 'X', dfl[,gsub('skygrid.logPopSize','',X)])
set(dfl, NULL, 'X', dfl[,as.numeric(X)])
dfl <- subset(dfl, FILE!="150129_HPTN071_scA-mFP25_TEST_pol_grid-cseq3.log")
max.samplingtime <- 2020
#dfs <- subset(dfl, FILE=='150129_HPTN071_scA-mFP25_TEST_pol_CODON-GTR-grid-cseq3.log' & state>2e6)
dfs <- subset(dfl, state>2e6)
dfs <- dfs[, list( skygrid.cutOff=skygrid.cutOff[1], skygrid.numGridPoints=skygrid.numGridPoints[1],
LPSm= median(exp(LPS)), LPSlq= quantile(exp(LPS), p=0.025), LPSuq= quantile(exp(LPS), p=0.975),
YR=max.samplingtime-(X-1)/(skygrid.numGridPoints[1])*skygrid.cutOff[1]), by=c('FILE','X')]
dfs <- merge(dfs, infiles, by='FILE')
# GTR model change # sequences used
dfsp <- subset(dfs, SUBSTM=='GTR')
setkey(dfsp, SEQ, YR)
ggplot(dfsp, aes(x=YR, fill=FILE)) + geom_ribbon(aes(ymin=LPSlq, ymax=LPSuq), alpha=0.5, colour=NA) + geom_line(aes(y=LPSm)) +
scale_y_log10() +
scale_x_continuous(expand=c(0,0)) +
labs(x='', y='effective population size\n(Skygrid)') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(~SEQ) +
guides(fill=guide_legend(ncol=1))
file <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150413_Skygrid_CODONGTR_fixedtree_pol_BY_seq.pdf'
ggsave(file=file, h=6, w=10)
# GTR model vs HKY model
dfsp <- subset(dfs, CLU_TYPE=='largest')
setkey(dfsp, SEQ, YR)
ggplot(dfsp, aes(x=YR, fill=FILE)) + geom_ribbon(aes(ymin=LPSlq, ymax=LPSuq), alpha=0.5, colour=NA) + geom_line(aes(y=LPSm)) +
scale_y_log10() +
scale_x_continuous(expand=c(0,0)) +
labs(x='', y='effective population size\n(Skygrid)') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(SUBSTM~SEQ) +
guides(fill=guide_legend(ncol=1))
file <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150413_Skygrid_fixedtree_pol_BY_seq_substm.pdf'
ggsave(file=file, h=10, w=10)
# HKY model clustering type
dfsp <- subset(dfs, SUBSTM=='HKY' & SEQ>500 & SEQ<1300)
setkey(dfsp, SEQ, YR)
ggplot(dfsp, aes(x=YR, fill=FILE)) + geom_ribbon(aes(ymin=LPSlq, ymax=LPSuq), alpha=0.5, colour=NA) + geom_line(aes(y=LPSm)) +
scale_y_log10() +
scale_x_continuous(expand=c(0,0)) +
labs(x='', y='effective population size\n(Skygrid)') +
theme_bw() +
theme(legend.position='bottom',panel.grid.minor = element_line(colour='grey70', size=0.2), panel.grid.major = element_line(colour='grey70', size=0.3)) +
facet_grid(CLU_TYPE~SEQ) +
guides(fill=guide_legend(ncol=1))
file <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150413_Skygrid_HKY_fixedtree_pol_BY_clutype.pdf'
ggsave(file=file, h=10, w=7)
}
rbeast.skygrid.hky.weight.allexceptlogpopsize.150428<- function()
{
require(rBEAST)
require(data.table)
require(ape)
require(XML)
tree.id.labelsep <- '|'
tree.id.label.idx.ctime <- 4
select <- 'grid-mseq1000'
indir <- '/Users/Oliver/Dropbox\ (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150419_files'
#indir <- '/Users/Oliver/git/HPTN071sim/tmp140914/140716_RUN001_INTERNAL'
outdir <- '/Users/Oliver/Dropbox\ (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150428_files'
infiles <- list.files(indir, '.*INTERNAL.R$', full.names=FALSE)
#stopifnot(length(infiles)==1)
# run: no autooptimize on UCLN, use different scalefactors instead
selects <- paste('grid-mseq',c(400, 1000), sep='')
for(w in c(0.6, 0.7, 0.8, 0.9, 0.95))
{
for(select in selects)
{
for(i in seq_along(infiles))
{
infile <- infiles[i]
# load simulated data
file <- paste(indir, '/', infile, sep='')
file.name <- paste(outdir, gsub('_SIMULATED_INTERNAL.R',paste('_TEST_pol_HKY_fixedtree_',select,"_s4UCLN",w*100,'.xml',sep=''),infile), sep='/')
cat(paste('\nLoading file', file))
load(file) #expect "df.epi" "df.trms" "df.inds" "df.sample" "df.seq"
set( df.seq, NULL, 'IDCLU', df.seq[, as.integer(IDCLU)] )
setnames(df.seq, c("LABEL", "IDCLU", "IDPOP"), c("TAXON_NAME", "CLU_ID", "TAXON_ID"))
df.seq[, SAMPLINGTIME:=df.seq[, as.numeric(sapply(strsplit(TAXON_NAME,'|',fixed=1),'[[',4))]]
seq.select <- beast.choose.seq.by.clusters(df.seq, select, verbose=1)
#
tmp <- list.files(indir, '_DATEDTREE.newick$', full.names=FALSE)
tmp <- tmp[ grepl(substr(infile, 1, regexpr('_SIMULATED',infile)), tmp) ]
phd <- read.tree(paste(indir, tmp, sep='/'))
tmp2 <- data.table(TAXON_ID= sapply(phd, function(x) x$tip.label[1]), IDX=seq_along(phd))
set( tmp2, NULL, 'TAXON_ID', tmp2[, as.integer(substring(sapply(strsplit(TAXON_ID, tree.id.labelsep, fixed=TRUE),'[[',1),7)) ] )
tmp2 <- merge(subset(seq.select, select=c(TAXON_ID, CLU_ID)), tmp2, by='TAXON_ID')
phd <- lapply(tmp2[,IDX], function(i) phd[[i]] )
names(phd) <- tmp2[, CLU_ID]
#
# create BEAST XML POL
#
seq.select.pol <- subset(seq.select, select=c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SAMPLINGTIME", "POL" ))
setnames(seq.select.pol, 'POL', 'SEQ')
bxml <- beastxml.multilocus.hky.fixed.tree( file.name, seq.select.pol, phd, verbose=1 )
#
# adjust parameter weights: all 1 but higher for UCLD
#
bxmlo <- getNodeSet(bxml, "//operators")[[1]]
tmp <- unlist(lapply( c("hky","site","skygrid.precision","skygrid"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- 1
tmp <- unlist(lapply( c("branchRates.categories_CLU"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- 2
tmp <- unlist(lapply( c("ucld"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
{
xmlAttrs(x)["weight"] <- 1
xmlAttrs(x)["scaleFactor"] <- w
xmlAttrs(x)["autoOptimize"] <- 'false'
}
cat(paste("\nwrite xml file to",file.name))
saveXML(bxml, file=file.name)
}
}
}
# run use LN operator
}
rbeast.skygrid.hky.weight.allexceptlogpopsize.150424<- function()
{
require(rBEAST)
tree.id.labelsep <- '|'
tree.id.label.idx.ctime <- 4
select <- 'grid-mseq400'
indir <- '/Users/Oliver/Dropbox\ (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150419_files'
#indir <- '/Users/Oliver/git/HPTN071sim/tmp140914/140716_RUN001_INTERNAL'
outdir <- '/Users/Oliver/Dropbox\ (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150424_files'
infiles <- list.files(indir, '.*INTERNAL.R$', full.names=FALSE)
#stopifnot(length(infiles)==1)
# run: baseline
selects <- paste('grid-mseq',c(400, 1000), sep='')
for(w in c(1, 5, 10))
{
for(select in selects)
{
for(i in seq_along(infiles))
{
infile <- infiles[i]
# load simulated data
file <- paste(indir, '/', infile, sep='')
file.name <- paste(outdir, gsub('_SIMULATED_INTERNAL.R',paste('_TEST_pol_HKY_fixedtopo_',select,"_w4BR",w,'.xml',sep=''),infile), sep='/')
cat(paste('\nLoading file', file))
load(file) #expect "df.epi" "df.trms" "df.inds" "df.sample" "df.seq"
set( df.seq, NULL, 'IDCLU', df.seq[, as.integer(IDCLU)] )
setnames(df.seq, c("LABEL", "IDCLU", "IDPOP"), c("TAXON_NAME", "CLU_ID", "TAXON_ID"))
df.seq[, SAMPLINGTIME:=df.seq[, as.numeric(sapply(strsplit(TAXON_NAME,'|',fixed=1),'[[',4))]]
seq.select <- beast.choose.seq.by.clusters(df.seq, select, verbose=1)
#
tmp <- list.files(indir, '_DATEDTREE.newick$', full.names=FALSE)
tmp <- tmp[ grepl(substr(infile, 1, regexpr('_SIMULATED',infile)), tmp) ]
phd <- read.tree(paste(indir, tmp, sep='/'))
tmp2 <- data.table(TAXON_ID= sapply(phd, function(x) x$tip.label[1]), IDX=seq_along(phd))
set( tmp2, NULL, 'TAXON_ID', tmp2[, as.integer(substring(sapply(strsplit(TAXON_ID, tree.id.labelsep, fixed=TRUE),'[[',1),7)) ] )
tmp2 <- merge(subset(seq.select, select=c(TAXON_ID, CLU_ID)), tmp2, by='TAXON_ID')
phd <- lapply(tmp2[,IDX], function(i) phd[[i]] )
names(phd) <- tmp2[, CLU_ID]
#
# create BEAST XML POL
#
seq.select.pol <- subset(seq.select, select=c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SAMPLINGTIME", "POL" ))
setnames(seq.select.pol, 'POL', 'SEQ')
bxml <- beastxml.multilocus.hky.fixed.tree.fixed.topology( file.name, seq.select.pol, phd, verbose=1 )
#
# adjust parameter weights: all node height higher weight
#
bxmlo <- getNodeSet(bxml, "//operators")[[1]]
tmp <- unlist(lapply( c("treeModel"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- w
cat(paste("\nwrite xml file to",file.name))
saveXML(bxml, file=file.name)
}
}
}
# run: twice the weight on internal node heights
selects <- paste('grid-mseq',c(400, 1000), sep='')
for(w in c(1, 5, 10))
{
for(select in selects)
{
for(i in seq_along(infiles))
{
infile <- infiles[i]
# load simulated data
file <- paste(indir, '/', infile, sep='')
file.name <- paste(outdir, gsub('_SIMULATED_INTERNAL.R',paste('_TEST_pol_HKY_fixedtopo_',select,"_w4BR2xIntrnl",w,'.xml',sep=''),infile), sep='/')
cat(paste('\nLoading file', file))
load(file) #expect "df.epi" "df.trms" "df.inds" "df.sample" "df.seq"
set( df.seq, NULL, 'IDCLU', df.seq[, as.integer(IDCLU)] )
setnames(df.seq, c("LABEL", "IDCLU", "IDPOP"), c("TAXON_NAME", "CLU_ID", "TAXON_ID"))
df.seq[, SAMPLINGTIME:=df.seq[, as.numeric(sapply(strsplit(TAXON_NAME,'|',fixed=1),'[[',4))]]
seq.select <- beast.choose.seq.by.clusters(df.seq, select, verbose=1)
#
tmp <- list.files(indir, '_DATEDTREE.newick$', full.names=FALSE)
tmp <- tmp[ grepl(substr(infile, 1, regexpr('_SIMULATED',infile)), tmp) ]
phd <- read.tree(paste(indir, tmp, sep='/'))
tmp2 <- data.table(TAXON_ID= sapply(phd, function(x) x$tip.label[1]), IDX=seq_along(phd))
set( tmp2, NULL, 'TAXON_ID', tmp2[, as.integer(substring(sapply(strsplit(TAXON_ID, tree.id.labelsep, fixed=TRUE),'[[',1),7)) ] )
tmp2 <- merge(subset(seq.select, select=c(TAXON_ID, CLU_ID)), tmp2, by='TAXON_ID')
phd <- lapply(tmp2[,IDX], function(i) phd[[i]] )
names(phd) <- tmp2[, CLU_ID]
#
# create BEAST XML POL
#
seq.select.pol <- subset(seq.select, select=c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SAMPLINGTIME", "POL" ))
setnames(seq.select.pol, 'POL', 'SEQ')
bxml <- beastxml.multilocus.hky.fixed.tree.fixed.topology( file.name, seq.select.pol, phd, verbose=1 )
# all node height higher weight
bxmlo <- getNodeSet(bxml, "//operators")[[1]]
tmp <- unlist(lapply( c("treeModel"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- w
# all inner node heights twice weight
tmp <- unlist(lapply( c("treeModel.internalNodeHeights"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- 2*w
cat(paste("\nwrite xml file to",file.name))
saveXML(bxml, file=file.name)
}
}
}
# run: with UpDown operator on UCLN
for(w in c(1, 5, 10))
{
for(select in selects)
{
for(i in seq_along(infiles))
{
infile <- infiles[i]
# load simulated data
file <- paste(indir, '/', infile, sep='')
file.name <- paste(outdir, gsub('_SIMULATED_INTERNAL.R',paste('_TEST_pol_HKY_fixedtopo_',select,"_w4BrUCLDUpDown",w,'.xml',sep=''),infile), sep='/')
cat(paste('\nLoading file', file))
load(file) #expect "df.epi" "df.trms" "df.inds" "df.sample" "df.seq"
set( df.seq, NULL, 'IDCLU', df.seq[, as.integer(IDCLU)] )
setnames(df.seq, c("LABEL", "IDCLU", "IDPOP"), c("TAXON_NAME", "CLU_ID", "TAXON_ID"))
df.seq[, SAMPLINGTIME:=df.seq[, as.numeric(sapply(strsplit(TAXON_NAME,'|',fixed=1),'[[',4))]]
seq.select <- beast.choose.seq.by.clusters(df.seq, select, verbose=1)
#
tmp <- list.files(indir, '_DATEDTREE.newick$', full.names=FALSE)
tmp <- tmp[ grepl(substr(infile, 1, regexpr('_SIMULATED',infile)), tmp) ]
phd <- read.tree(paste(indir, tmp, sep='/'))
tmp2 <- data.table(TAXON_ID= sapply(phd, function(x) x$tip.label[1]), IDX=seq_along(phd))
set( tmp2, NULL, 'TAXON_ID', tmp2[, as.integer(substring(sapply(strsplit(TAXON_ID, tree.id.labelsep, fixed=TRUE),'[[',1),7)) ] )
tmp2 <- merge(subset(seq.select, select=c(TAXON_ID, CLU_ID)), tmp2, by='TAXON_ID')
phd <- lapply(tmp2[,IDX], function(i) phd[[i]] )
names(phd) <- tmp2[, CLU_ID]
#
# create BEAST XML POL
#
seq.select.pol <- subset(seq.select, select=c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SAMPLINGTIME", "POL" ))
setnames(seq.select.pol, 'POL', 'SEQ')
bxml <- beastxml.multilocus.hky.fixed.tree.fixed.topology( file.name, seq.select.pol, phd, verbose=1 )
#
# adjust parameter weights: all node height higher weight
#
bxmlo <- getNodeSet(bxml, "//operators")[[1]]
tmp <- unlist(lapply( c("ucld"), function(x)
{
getNodeSet(bxmlo, paste('scaleOperator[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- 0.001
beast.add.upDownOperator(bxml, "ucld.mean", "ucld.stdev", scaleFactor=0.75, weight=5)
tmp <- unlist(lapply( c("treeModel"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- w
cat(paste("\nwrite xml file to",file.name))
saveXML(bxml, file=file.name)
}
}
}
# run: with higher internal node weights
for(w in c(3, 4, 5, 10, 20))
{
for(select in selects)
{
for(i in seq_along(infiles))
{
infile <- infiles[i]
# load simulated data
file <- paste(indir, '/', infile, sep='')
file.name <- paste(outdir, gsub('_SIMULATED_INTERNAL.R',paste('_TEST_pol_HKY_fixedtopo_',select,"_w4Intrnl",w,'.xml',sep=''),infile), sep='/')
cat(paste('\nLoading file', file))
load(file) #expect "df.epi" "df.trms" "df.inds" "df.sample" "df.seq"
set( df.seq, NULL, 'IDCLU', df.seq[, as.integer(IDCLU)] )
setnames(df.seq, c("LABEL", "IDCLU", "IDPOP"), c("TAXON_NAME", "CLU_ID", "TAXON_ID"))
df.seq[, SAMPLINGTIME:=df.seq[, as.numeric(sapply(strsplit(TAXON_NAME,'|',fixed=1),'[[',4))]]
seq.select <- beast.choose.seq.by.clusters(df.seq, select, verbose=1)
#
tmp <- list.files(indir, '_DATEDTREE.newick$', full.names=FALSE)
tmp <- tmp[ grepl(substr(infile, 1, regexpr('_SIMULATED',infile)), tmp) ]
phd <- read.tree(paste(indir, tmp, sep='/'))
tmp2 <- data.table(TAXON_ID= sapply(phd, function(x) x$tip.label[1]), IDX=seq_along(phd))
set( tmp2, NULL, 'TAXON_ID', tmp2[, as.integer(substring(sapply(strsplit(TAXON_ID, tree.id.labelsep, fixed=TRUE),'[[',1),7)) ] )
tmp2 <- merge(subset(seq.select, select=c(TAXON_ID, CLU_ID)), tmp2, by='TAXON_ID')
phd <- lapply(tmp2[,IDX], function(i) phd[[i]] )
names(phd) <- tmp2[, CLU_ID]
#
# create BEAST XML POL
#
seq.select.pol <- subset(seq.select, select=c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SAMPLINGTIME", "POL" ))
setnames(seq.select.pol, 'POL', 'SEQ')
bxml <- beastxml.multilocus.hky.fixed.tree.fixed.topology( file.name, seq.select.pol, phd, verbose=1 )
#
# adjust parameter weights: all node height higher weight
#
bxmlo <- getNodeSet(bxml, "//operators")[[1]]
tmp <- unlist(lapply( c("treeModel"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- 1
# all inner node heights higher weight
tmp <- unlist(lapply( c("treeModel.internalNodeHeights"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- w
cat(paste("\nwrite xml file to",file.name))
saveXML(bxml, file=file.name)
}
}
}
}
rbeast.skygrid.hky.weight.allexceptlogpopsize.150421<- function()
{
require(rBEAST)
require(data.table)
require(ape)
require(XML)
tree.id.labelsep <- '|'
tree.id.label.idx.ctime <- 4
#select <- 'grid-mseq500'
#select <- 'grid-cseq3'
select <- 'grid-mseq1000'
indir <- '/Users/Oliver/Dropbox\ (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150419_files'
#indir <- '/Users/Oliver/git/HPTN071sim/tmp140914/140716_RUN001_INTERNAL'
outdir <- '/Users/Oliver/Dropbox\ (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150421_files'
infiles <- list.files(indir, '.*INTERNAL.R$', full.names=FALSE)
#stopifnot(length(infiles)==1)
# run: no UpDown operator
selects <- paste('grid-mseq',c(400, 1000), sep='')
for(w in c(5, 10, 20, 50))
{
for(select in selects)
{
for(i in seq_along(infiles))
{
infile <- infiles[i]
# load simulated data
file <- paste(indir, '/', infile, sep='')
file.name <- paste(outdir, gsub('_SIMULATED_INTERNAL.R',paste('_TEST_pol_HKY_fixedtree_',select,"_w4UCLD",w,'.xml',sep=''),infile), sep='/')
cat(paste('\nLoading file', file))
load(file) #expect "df.epi" "df.trms" "df.inds" "df.sample" "df.seq"
set( df.seq, NULL, 'IDCLU', df.seq[, as.integer(IDCLU)] )
setnames(df.seq, c("LABEL", "IDCLU", "IDPOP"), c("TAXON_NAME", "CLU_ID", "TAXON_ID"))
df.seq[, SAMPLINGTIME:=df.seq[, as.numeric(sapply(strsplit(TAXON_NAME,'|',fixed=1),'[[',4))]]
seq.select <- beast.choose.seq.by.clusters(df.seq, select, verbose=1)
#
tmp <- list.files(indir, '_DATEDTREE.newick$', full.names=FALSE)
tmp <- tmp[ grepl(substr(infile, 1, regexpr('_SIMULATED',infile)), tmp) ]
phd <- read.tree(paste(indir, tmp, sep='/'))
tmp2 <- data.table(TAXON_ID= sapply(phd, function(x) x$tip.label[1]), IDX=seq_along(phd))
set( tmp2, NULL, 'TAXON_ID', tmp2[, as.integer(substring(sapply(strsplit(TAXON_ID, tree.id.labelsep, fixed=TRUE),'[[',1),7)) ] )
tmp2 <- merge(subset(seq.select, select=c(TAXON_ID, CLU_ID)), tmp2, by='TAXON_ID')
phd <- lapply(tmp2[,IDX], function(i) phd[[i]] )
names(phd) <- tmp2[, CLU_ID]
#
# create BEAST XML POL
#
seq.select.pol <- subset(seq.select, select=c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SAMPLINGTIME", "POL" ))
setnames(seq.select.pol, 'POL', 'SEQ')
bxml <- beastxml.multilocus.hky.fixed.tree( file.name, seq.select.pol, phd, verbose=1 )
#
# adjust parameter weights: all 1 but higher for UCLD
#
bxmlo <- getNodeSet(bxml, "//operators")[[1]]
tmp <- unlist(lapply( c("hky","site","skygrid.precision","skygrid","branchRates.categories_CLU"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- 1
tmp <- unlist(lapply( c("ucld"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- w
cat(paste("\nwrite xml file to",file.name))
saveXML(bxml, file=file.name)
}
}
}
# run: with UpDown operator
selects <- paste('grid-mseq',c(400, 1000), sep='')
for(w in c(5, 10, 20, 50))
{
for(select in selects)
{
for(i in seq_along(infiles))
{
infile <- infiles[i]
# load simulated data
file <- paste(indir, '/', infile, sep='')
file.name <- paste(outdir, gsub('_SIMULATED_INTERNAL.R',paste('_TEST_pol_HKY_fixedtree_',select,"_w4UCLDUpDown",w,'.xml',sep=''),infile), sep='/')
cat(paste('\nLoading file', file))
load(file) #expect "df.epi" "df.trms" "df.inds" "df.sample" "df.seq"
set( df.seq, NULL, 'IDCLU', df.seq[, as.integer(IDCLU)] )
setnames(df.seq, c("LABEL", "IDCLU", "IDPOP"), c("TAXON_NAME", "CLU_ID", "TAXON_ID"))
df.seq[, SAMPLINGTIME:=df.seq[, as.numeric(sapply(strsplit(TAXON_NAME,'|',fixed=1),'[[',4))]]
seq.select <- beast.choose.seq.by.clusters(df.seq, select, verbose=1)
#
tmp <- list.files(indir, '_DATEDTREE.newick$', full.names=FALSE)
tmp <- tmp[ grepl(substr(infile, 1, regexpr('_SIMULATED',infile)), tmp) ]
phd <- read.tree(paste(indir, tmp, sep='/'))
tmp2 <- data.table(TAXON_ID= sapply(phd, function(x) x$tip.label[1]), IDX=seq_along(phd))
set( tmp2, NULL, 'TAXON_ID', tmp2[, as.integer(substring(sapply(strsplit(TAXON_ID, tree.id.labelsep, fixed=TRUE),'[[',1),7)) ] )
tmp2 <- merge(subset(seq.select, select=c(TAXON_ID, CLU_ID)), tmp2, by='TAXON_ID')
phd <- lapply(tmp2[,IDX], function(i) phd[[i]] )
names(phd) <- tmp2[, CLU_ID]
#
# create BEAST XML POL
#
seq.select.pol <- subset(seq.select, select=c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SAMPLINGTIME", "POL" ))
setnames(seq.select.pol, 'POL', 'SEQ')
bxml <- beastxml.multilocus.hky.fixed.tree( file.name, seq.select.pol, phd, verbose=1 )
#
# adjust parameter weights: all 1 but higher for UCLD and add UpDown operator
#
beast.add.upDownOperator(bxml, "ucld.mean", "ucld.stdev", scaleFactor=0.75, weight=1)
bxmlo <- getNodeSet(bxml, "//operators")[[1]]
#
tmp <- unlist(lapply( c("hky","site","skygrid.precision","skygrid","branchRates.categories_CLU"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- 1
tmp <- unlist(lapply( c("ucld"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- w
cat(paste("\nwrite xml file to",file.name))
saveXML(bxml, file=file.name)
}
}
}
}
rbeast.skygrid.hky.weight.allexceptlogpopsize.150419<- function()
{
require(rBEAST)
require(data.table)
require(ape)
require(XML)
tree.id.labelsep <- '|'
tree.id.label.idx.ctime <- 4
#select <- 'grid-mseq500'
#select <- 'grid-cseq3'
select <- 'grid-mseq1000'
indir <- '/Users/Oliver/Dropbox\ (Infectious Disease)/OR_Work/2015/2015_PANGEA_ReductionIncidence/150419_files'
#indir <- '/Users/Oliver/git/HPTN071sim/tmp140914/140716_RUN001_INTERNAL'
outdir <- indir
infiles <- list.files(indir, '.*INTERNAL.R$', full.names=FALSE)
#stopifnot(length(infiles)==1)
#
# run
#
selects <- paste('grid-mseq',c(400, 1000), sep='')
for(w in c(1, 5))
{
for(select in selects)
{
for(i in seq_along(infiles))
{
infile <- infiles[i]
# load simulated data
file <- paste(indir, '/', infile, sep='')
file.name <- paste(indir, gsub('_SIMULATED_INTERNAL.R',paste('_TEST_pol_HKY_fixedtree_',select,"_uweight",w,'.xml',sep=''),infile), sep='/')
cat(paste('\nLoading file', file))
load(file) #expect "df.epi" "df.trms" "df.inds" "df.sample" "df.seq"
set( df.seq, NULL, 'IDCLU', df.seq[, as.integer(IDCLU)] )
setnames(df.seq, c("LABEL", "IDCLU", "IDPOP"), c("TAXON_NAME", "CLU_ID", "TAXON_ID"))
df.seq[, SAMPLINGTIME:=df.seq[, as.numeric(sapply(strsplit(TAXON_NAME,'|',fixed=1),'[[',4))]]
seq.select <- beast.choose.seq.by.clusters(df.seq, select, verbose=1)
#
tmp <- list.files(indir, '_DATEDTREE.newick$', full.names=FALSE)
tmp <- tmp[ grepl(substr(infile, 1, regexpr('_SIMULATED',infile)), tmp) ]
phd <- read.tree(paste(indir, tmp, sep='/'))
tmp2 <- data.table(TAXON_ID= sapply(phd, function(x) x$tip.label[1]), IDX=seq_along(phd))
set( tmp2, NULL, 'TAXON_ID', tmp2[, as.integer(substring(sapply(strsplit(TAXON_ID, tree.id.labelsep, fixed=TRUE),'[[',1),7)) ] )
tmp2 <- merge(subset(seq.select, select=c(TAXON_ID, CLU_ID)), tmp2, by='TAXON_ID')
phd <- lapply(tmp2[,IDX], function(i) phd[[i]] )
names(phd) <- tmp2[, CLU_ID]
#
# create BEAST XML POL
#
seq.select.pol <- subset(seq.select, select=c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SAMPLINGTIME", "POL" ))
setnames(seq.select.pol, 'POL', 'SEQ')
bxml <- beastxml.multilocus.hky.fixed.tree( file.name, seq.select.pol, phd, verbose=1 )
#
# adjust parameter weights: all the same
#
bxmlo <- getNodeSet(bxml, "//operators")[[1]]
tmp <- unlist(lapply( c("hky","site","ucld","skygrid.precision","skygrid","branchRates.categories_CLU"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- w
cat(paste("\nwrite xml file to",file.name))
saveXML(bxml, file=file.name)
}
}
}
# weights all the same except skygrid block updater
for(w in c(1, 5, 20, 50))
{
for(select in selects)
{
for(i in seq_along(infiles))
{
infile <- infiles[i]
# load simulated data
file <- paste(indir, '/', infile, sep='')
file.name <- paste(indir, gsub('_SIMULATED_INTERNAL.R',paste('_TEST_pol_HKY_fixedtree_',select,"_uwnoSkgrdBlck",w,'.xml',sep=''),infile), sep='/')
cat(paste('\nLoading file', file))
load(file) #expect "df.epi" "df.trms" "df.inds" "df.sample" "df.seq"
set( df.seq, NULL, 'IDCLU', df.seq[, as.integer(IDCLU)] )
setnames(df.seq, c("LABEL", "IDCLU", "IDPOP"), c("TAXON_NAME", "CLU_ID", "TAXON_ID"))
df.seq[, SAMPLINGTIME:=df.seq[, as.numeric(sapply(strsplit(TAXON_NAME,'|',fixed=1),'[[',4))]]
seq.select <- beast.choose.seq.by.clusters(df.seq, select, verbose=1)
#
tmp <- list.files(indir, '_DATEDTREE.newick$', full.names=FALSE)
tmp <- tmp[ grepl(substr(infile, 1, regexpr('_SIMULATED',infile)), tmp) ]
phd <- read.tree(paste(indir, tmp, sep='/'))
tmp2 <- data.table(TAXON_ID= sapply(phd, function(x) x$tip.label[1]), IDX=seq_along(phd))
set( tmp2, NULL, 'TAXON_ID', tmp2[, as.integer(substring(sapply(strsplit(TAXON_ID, tree.id.labelsep, fixed=TRUE),'[[',1),7)) ] )
tmp2 <- merge(subset(seq.select, select=c(TAXON_ID, CLU_ID)), tmp2, by='TAXON_ID')
phd <- lapply(tmp2[,IDX], function(i) phd[[i]] )
names(phd) <- tmp2[, CLU_ID]
#
# create BEAST XML POL
#
seq.select.pol <- subset(seq.select, select=c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SAMPLINGTIME", "POL" ))
setnames(seq.select.pol, 'POL', 'SEQ')
bxml <- beastxml.multilocus.hky.fixed.tree( file.name, seq.select.pol, phd, verbose=1 )
#
# adjust parameter weights: all the same
#
bxmlo <- getNodeSet(bxml, "//operators")[[1]]
tmp <- unlist(lapply( c("hky","site","ucld","skygrid.precision","branchRates.categories_CLU"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- w
cat(paste("\nwrite xml file to",file.name))
saveXML(bxml, file=file.name)
}
}
}
# weights all the same except skygrid block updater and branch rate categories
for(w in c(1, 5, 20, 50))
{
for(select in selects)
{
for(i in seq_along(infiles))
{
infile <- infiles[i]
# load simulated data
file <- paste(indir, '/', infile, sep='')
file.name <- paste(indir, gsub('_SIMULATED_INTERNAL.R',paste('_TEST_pol_HKY_fixedtree_',select,"_uwnoSkgrdBlckBrRtCtgrs",w,'.xml',sep=''),infile), sep='/')
cat(paste('\nLoading file', file))
load(file) #expect "df.epi" "df.trms" "df.inds" "df.sample" "df.seq"
set( df.seq, NULL, 'IDCLU', df.seq[, as.integer(IDCLU)] )
setnames(df.seq, c("LABEL", "IDCLU", "IDPOP"), c("TAXON_NAME", "CLU_ID", "TAXON_ID"))
df.seq[, SAMPLINGTIME:=df.seq[, as.numeric(sapply(strsplit(TAXON_NAME,'|',fixed=1),'[[',4))]]
seq.select <- beast.choose.seq.by.clusters(df.seq, select, verbose=1)
#
tmp <- list.files(indir, '_DATEDTREE.newick$', full.names=FALSE)
tmp <- tmp[ grepl(substr(infile, 1, regexpr('_SIMULATED',infile)), tmp) ]
phd <- read.tree(paste(indir, tmp, sep='/'))
tmp2 <- data.table(TAXON_ID= sapply(phd, function(x) x$tip.label[1]), IDX=seq_along(phd))
set( tmp2, NULL, 'TAXON_ID', tmp2[, as.integer(substring(sapply(strsplit(TAXON_ID, tree.id.labelsep, fixed=TRUE),'[[',1),7)) ] )
tmp2 <- merge(subset(seq.select, select=c(TAXON_ID, CLU_ID)), tmp2, by='TAXON_ID')
phd <- lapply(tmp2[,IDX], function(i) phd[[i]] )
names(phd) <- tmp2[, CLU_ID]
#
# create BEAST XML POL
#
seq.select.pol <- subset(seq.select, select=c("CLU_ID", "TAXON_ID", "TAXON_NAME", "SAMPLINGTIME", "POL" ))
setnames(seq.select.pol, 'POL', 'SEQ')
bxml <- beastxml.multilocus.hky.fixed.tree( file.name, seq.select.pol, phd, verbose=1 )
#
# adjust parameter weights: all the same
#
bxmlo <- getNodeSet(bxml, "//operators")[[1]]
tmp <- unlist(lapply( c("hky","site","ucld","skygrid.precision"), function(x)
{
getNodeSet(bxmlo, paste('*[descendant::*[contains(@idref,"',x,'")]]',sep=''))
}))
for(x in tmp)
xmlAttrs(x)["weight"] <- w
cat(paste("\nwrite xml file to",file.name))
saveXML(bxml, file=file.name)
}
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.