haircutwrap.get.call.for.PNG_ID: Call 10bp chunks of cut/raw contigs with the same PANGEA_ID

Description Usage Examples

View source: R/haircut.fun.R

Description

Call 10bp chunks of cut/raw contigs with the same PANGEA_ID

Usage

1
2
haircutwrap.get.call.for.PNG_ID(indir.st, indir.al, outdir, ctrmc, predict.fun,
  par, ctrain = NULL, batch.n = NA, batch.id = NA)

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#
#	within R
#
## Not run: 
	
#DATA					<- SET THIS DIRECTORY		
tmp						<- haircut.get.fitted.model.150816a()
ctrmc					<- tmp$coef		
predict.fun				<- tmp$predict
#	get contigs that were used for training
outfile	<- paste(DATA,'contigs_150408_trainingset_subsets.R',sep='/')
ctrain	<- haircut.get.training.contigs(NULL, outfile, NULL)
set(ctrain, NULL, 'CUT', ctrain[, factor(CUT, levels=c('cut','raw'), labels=c('Y','N'))])
setnames(ctrain, 'CUT', 'BLASTnCUT')		
#	get covariates for all contigs
indir.st<- paste(DATA,'contigs_150408_wref_cutstat',sep='/')
indir.al<- paste(DATA,'contigs_150408_wref',sep='/')
outdir	<- paste(DATA,'contigs_150408_model150816a',sep='/')
par		<- c(	'FRQx.quantile'=NA, 'FRQx.thr'=NA, 'CNS_FRQ.window'=200, 'CNS_AGR.window'=200, 'GPS.window'=200, 
		'PRCALL.thrmax'=0.8, 'PRCALL.thrstd'=10, 'PRCALL.cutprdcthair'=150, 'PRCALL.cutprdctcntg'=50, 'PRCALL.cutrawgrace'=100, 'PRCALL.rmintrnlgpsblw'=100 ,'PRCALL.rmintrnlgpsend'=9700)
haircutwrap.get.call.for.PNG_ID(indir.st,indir.al,outdir,ctrmc,predict.fun,par,ctrain=ctrain)	

## End(Not run)
#
#	run from command line 
#	this produces a command line string that can be run in UNIX alikes
#
## Not run: 

#DATA		<- SET THIS DIRECTORY
indir.st	<- paste(DATA,'contigs_150408_wref_cutstat',sep='/')
indir.al	<- paste(DATA,'contigs_150408_wref',sep='/')
outdir		<- paste(DATA,'contigs_150408_model150816a',sep='/')
cmd			<- cmd.haircut.call(indir.st, indir.al, outdir)
cat(cmd)

## End(Not run)
#
#	create multiple runs on HPC using the command line version
#
## Not run: 
	
#DATA		<- SET THIS DIRECTORY
indir.st	<- paste(DATA,'contigs_150408_wref_cutstat',sep='/')
indir.al	<- paste(DATA,'contigs_150408_wref',sep='/')
outdir		<- paste(DATA,'contigs_150408_model150816a',sep='/')
trainfile	<- paste(DATA,'contigs_150408_trainingset_subsets.R',sep='/')
batch.n		<- 200

tmp			<- data.table(INFILE=list.files(indir.st, pattern='\\.R$', recursive=T))
tmp[, BATCH:= ceiling(seq_len(nrow(tmp))/batch.n)]
tmp			<- tmp[, max(BATCH)]
for(batch.id in seq.int(1,tmp))
{			
	cmd			<- cmd.haircut.call(indir.st, indir.al, outdir, trainfile=trainfile, batch.n=batch.n, batch.id=batch.id, prog=PR.HAIRCUT.CALL )
	cmd			<- cmd.hpcwrapper(cmd, hpc.nproc= 1, hpc.q='pqeelab', hpc.walltime=4, hpc.mem="5000mb")
	cat(cmd)		
	cmd.hpccaller(paste(DATA,"tmp",sep='/'), paste("hrct",paste(strsplit(date(),split=' ')[[1]],collapse='_',sep=''),sep='.'), cmd)	
}	
quit("no")

## End(Not run)

olli0601/PANGEAhaircut documentation built on May 24, 2019, 12:52 p.m.