The haircut pipeline involves two steps. First, descriptive statistics are calculated for each contig and the consensus sequence of that associated references. Second, these descriptive statistics are used to calculate a call probability for 10 base pair long contig chunks. The call probability is modeled as a function of the descriptive statistics. The underlying statistical model is pre-computed and supplied with the R package. Based on the call probabilites, 10bp chunks of each contig are called (yes=1, no=0). If cut/raw contigs correspond to each other, only one of both is returned.
1 2 | cmd.haircut.pipeline(in.raw, in.cut = NA, out = NA, batch.n = NA,
batch.id = NA, CNF.contig.idx = 1)
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | #
# run from command line
# this produces a command line string that can be run in UNIX alikes
# only one raw IVA infile specified
#
## Not run:
in.raw <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_haircut/contigs_151026_unaligned_raw/78268.assembly_contigs_hit_ref_hiv.fasta'
cat(cmd.haircut.pipeline(in.raw))
## End(Not run)
#
# cut versions of IVA infile are also available and out file is also specified
#
## Not run:
in.raw <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_haircut/contigs_151026_unaligned_raw/78270.assembly_contigs_hit_ref_hiv.fasta'
in.cut <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_haircut/contigs_151026_unaligned_cut/78270.assembly_contigs_hit_ref_hiv_cut.fasta'
out <- '/Users/Oliver/Dropbox (Infectious Disease)/OR_Work/2015/2015_PANGEA_haircut/78270_nohair.fasta'
cat(cmd.haircut.pipeline(in.raw, in.cut, out))
## End(Not run)
#
# run from command line
# this produces a command line string that can be run in UNIX alikes
# directories with raw IVA files and cut versions specified
#
## Not run:
indir.cut <- paste(DATA, 'contigs_150408_unaligned_cut', sep='/' )
indir.raw <- paste(DATA, 'contigs_150408_unaligned_raw', sep='/' )
outdir <- paste(DATA, 'contigs_150408_model150816a', sep='/' )
cat(cmd.haircut.pipeline(indir.raw, indir.cut, outdir, batch.n=200, batch.id=1))
## End(Not run)
#
# create multiple runs on HPC using the command line version
#
## Not run:
#DATA <- SET THIS DIRECTORY
indir.cut <- paste(DATA, 'contigs_150408_unaligned_cut', sep='/' )
indir.raw <- paste(DATA, 'contigs_150408_unaligned_raw', sep='/' )
outdir <- paste(DATA, 'contigs_150408_model150816a', sep='/' )
batch.n <- 200
tmp <- data.table(FILE=list.files(indir.raw, pattern='fasta$', recursive=T))
tmp[, BATCH:= ceiling(seq_len(nrow(tmp))/batch.n)]
tmp <- tmp[, max(BATCH)]
for(batch.id in seq.int(1,tmp))
{
cmd <- cmd.haircut.pipeline(indir.raw, indir.cut, outdir, batch.n=batch.n, batch.id=batch.id)
cmd <- cmd.hpcwrapper(cmd, hpc.nproc= 1, hpc.q='pqeelab', hpc.walltime=4, hpc.mem="5000mb")
cat(cmd)
#cmd.hpccaller(paste(DATA,"tmp",sep='/'), paste("hrct",paste(strsplit(date(),split=' ')[[1]],collapse='_',sep=''),sep='.'), cmd)
}
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.