Description Usage Author(s) Examples
See Examples.
1 | beastxml.multilocus.codon.gtr(file.name, seq.select, phd, verbose = 1)
|
Oliver Ratmann
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | require(rBEAST)
tree.label.sep <- '|'
tree.label.idx.idpop <- 1
tree.label.idx.ctime <- 4
#select <- 'grid-cseq3' #generates large file to illustrate rBEAST functionality
select <- 'grid-mseq400' #generates relatively small file for testing
infile.seq <- system.file(package="rBEAST", 'extra','sim_150414a_seq.R')
infile.starttrees <- system.file(package="rBEAST", 'extra','sim_150414a_starttrees.newick')
outdir <- getwd()
#
# load data
#
file <- infile.seq
cat(paste('\nLoading file', file))
load(file)
# expect data.table "df.seq" with columns
# "TAXON_ID" taxon ID
# "TAXON_NAME" taxon name
# "SAMPLINGTIME" taxon sampling time
# "GAG" gag sequence as character string
# "POL" pol sequence as character string
# "ENV" env sequence as character string
# "CLU_ID" phylogenetic cluster ID of sequence, NA if not in any cluster
# 'size' option: eg how many sequences to select
thresh.NSEQ <- as.numeric(substring(regmatches(select, regexpr('seq[0-9]+',select)), 4))
stopifnot(!is.na(thresh.NSEQ))
# 'how to select' options
# pick phylogenetic clusters randomly
if(grepl('clrndseq',select))
{
seq.select <- df.seq[, list(CLU_N=length(TAXON_NAME)), by='CLU_ID']
setkey(seq.select, CLU_N, CLU_ID)
seq.select <- subset(unique(seq.select), CLU_N>2)
seq.select[, DUMMY:= sample(nrow(seq.select), nrow(seq.select))]
setkey(seq.select, DUMMY)
seq.select[, CLU_CN:= seq.select[, cumsum(CLU_N)]]
seq.select <- seq.select[seq_len( seq.select[, which(CLU_CN>=thresh.NSEQ)[1]] ), ]
seq.select <- merge( df.seq, subset(seq.select, select=CLU_ID), by='CLU_ID' )
}
# or pick sequences from largest phylogenetic clusters
if(grepl('mseq',select))
{
seq.select <- df.seq[, list(CLU_N=-length(TAXON_NAME)), by='CLU_ID']
setkey(seq.select, CLU_N, CLU_ID)
seq.select <- subset(unique(seq.select), CLU_N< -2)
seq.select[, CLU_CN:= seq.select[, cumsum(-CLU_N)]]
seq.select <- seq.select[seq_len( seq.select[, which(CLU_CN>=thresh.NSEQ)[1]] ), ]
seq.select <- merge( df.seq, subset(seq.select, select=CLU_ID), by='CLU_ID' )
}
# or pick sequences from smallest phylogenetic clusters
if(grepl('clsmseq',select))
{
seq.select <- df.seq[, list(CLU_N=length(TAXON_NAME)), by='CLU_ID']
setkey(seq.select, CLU_N, CLU_ID)
seq.select <- subset(unique(seq.select), CLU_N>2)
seq.select[, CLU_CN:= seq.select[, cumsum(CLU_N)]]
seq.select <- seq.select[seq_len( seq.select[, which(CLU_CN>=thresh.NSEQ)[1]] ), ]
seq.select <- merge( df.seq, subset(seq.select, select=CLU_ID), by='CLU_ID' )
}
# or pick sequences of all phylogenetic clusters of size >= x
if(grepl('cseq',select))
{
seq.select <- df.seq[, list(CLU_N=length(TAXON_NAME)), by='CLU_ID']
setkey(seq.select, CLU_N, CLU_ID)
seq.select <- subset(unique(seq.select), CLU_N>=thresh.NSEQ)
seq.select <- merge( df.seq, subset(seq.select, select=CLU_ID), by='CLU_ID' )
}
cat(paste('\nFound clusters, n=', seq.select[, length(unique(CLU_ID))]))
cat(paste('\nFound sequences, n=', seq.select[, length(unique(TAXON_ID))]))
#
# read starting trees for each cluster phylogeny
#
phd <- read.tree(infile.starttrees)
tmp2 <- data.table(TAXON_ID= sapply(phd, function(x) x$tip.label[1]), IDX=seq_along(phd))
set( tmp2, NULL, 'TAXON_ID', tmp2[, as.integer(substring(sapply(strsplit(TAXON_ID, tree.label.sep, fixed=TRUE),'[[',tree.label.idx.idpop),7)) ] )
tmp2 <- merge(subset(seq.select, select=c(TAXON_ID, CLU_ID)), tmp2, by='TAXON_ID')
phd <- lapply(tmp2[,IDX], function(i) phd[[i]] )
names(phd) <- tmp2[, CLU_ID]
# plot starting trees
## Not run:
phd.plot <- eval(parse(text=paste('phd[[',seq_along(phd),']]', sep='',collapse='+')))
phd.plot <- ladderize(phd.plot)
pdf(file=paste(outdir,gsub('.newick',paste('_',select,'.pdf',sep=''),basename(infile.starttrees)),sep='/'), w=10, h=Ntip(phd.plot)*0.1)
plot(phd.plot, show.tip=TRUE, cex=0.5)
dev.off()
## End(Not run)
#
# create XML file for POL
#
seq.select.pol <- subset(seq.select, select=c("CLU_ID", "TAXON_ID", "TAXON_NAME", "POL" ))
setnames(seq.select.pol, 'POL', 'SEQ')
file.name <- paste(outdir, gsub('_seq.R',paste('_HKY_fixedtree_',select,'.xml',sep=''),basename(infile.seq)), sep='/')
bxml <- beastxml.multilocus.codon.gtr( file.name, seq.select.pol, phd, verbose=1 )
# write to file
## Not run:
cat(paste("\nwrite xml file to",file.name))
saveXML(bxml, file=file.name)
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.