anomDetectLOH<-function(intenData, genoData, scan.ids, chrom.ids, snp.ids,
known.anoms,
smooth=50,min.width=5,nperm=10000,alpha=.001,
run.size=50,inter.size=4,
homodel.min.num=10,homodel.thresh=10,
small.num=20,small.thresh=2.25, medium.num=50,medium.thresh=2,
long.num=100,long.thresh=1.5, small.na.thresh=2.5,
length.factor=5,merge.fac=.85,min.lrr.num=20,verbose=TRUE){
##scan.ids: samples to consider
##chrom.ids: chromosomes to consider, usually 1:23
##snp.ids: intid's for eligible snps
##known.anoms: data.frame of known anoms (usually from BAF);
# must have "scanID","chrom","left","right" where left and right are snp indices
##run.size: number of markers to declare a 'homozygous' run (here homozygous includes missing)
##inter.size: number of consecutive heterozygous markers allowed to interrupt a run
### detection of anomalies based on a chromsome-wide and local mad.fac thresholds
# where mad.fac is (segment median-nonanomalous median)/nonanom mad
##homodel.min.num: minimum number of markers to detect extreme diff in lrr (for homo deletion)
##homodel.thresh: threshold for 'mad.fac' to detect extreme diff in lrr
##small.num: minimum number of markers to detect anomaly (other than extreme)
##small.thresh:threshold for number of markers between small.num and medium.num
##medium.num: number of markers threshold for 'medium'
##medium.thresh: threshold for 'mad.fac' when number of markers is between medium num and long num
##long.num: number of markers threshold for 'long'
##long.thresh:threshold for 'mad.fac' when number of markers is bigger than long.num
##small.na.thresh:
# chrom mad.fac threshold when between small.num and medium.num and no local mad.fac
##length.factor:
# local mad.fac based on interval that is length.factor*(no. of markers in segment) on either side
##merge.fac: threshold used to merge original segmentation segments
##min.lrr.num: if any 'nonanomalous' interval less than min.lrr.num,
# ignore this piece in finding overall nonanomalous unless is only piece left
#### checks ####
# check that intenData has LRR
if (!hasLogRRatio(intenData)) stop("LogRRatio not found in intenData")
# check that dimensions of intenData and genoData are equal
intenSnpID <- getSnpID(intenData)
genoSnpID <- getSnpID(genoData)
if (!all(intenSnpID == genoSnpID)) stop("snp dimensions of intenData and genoData differ")
intenScanID <- getScanID(intenData)
genoScanID <- getScanID(genoData)
if (!all(intenScanID == genoScanID)) stop("scan dimensions of intenData and genoData differ")
# check that sex is present in annotation
if (hasSex(intenData)) {
sex <- getSex(intenData)
} else if (hasSex(genoData)) {
sex <- getSex(genoData)
} else stop("sex not found in intenData or genoData")
intid <- intenSnpID
if (!all(is.element(snp.ids,intid))) stop("snp.ids has values not present in intenData")
chrom <- getChromosome(intenData)
if (!all(is.element(chrom.ids, chrom))) stop("chrom.ids has values not present in intenData (all values in chrom.ids should be integers)")
sid <- intenScanID
male <- sid[!is.na(sex) & sex == "M"]
if(!is.element(class(known.anoms),"data.frame") | !all(is.element(c("scanID","chromosome","left.index","right.index"),names(known.anoms)))){
stop("known.anoms input needs to be data.frame with variables including scanID, chromosome, left.index, right.index") }
####
# internal functions require these names, so convert from package standard
names(known.anoms)[names(known.anoms) == "chromosome"] <- "chrom"
names(known.anoms)[names(known.anoms) == "left.index"] <- "left"
names(known.anoms)[names(known.anoms) == "right.index"] <- "right"
LOH.raw<-NULL;LOH.base.info<-NULL; LOH.filtered<-NULL
LOH.segments<-NULL; LOH.merge<-NULL;LOH.raw.adjusted<-NULL
## compute parameter needed for DNAcopy
max.ones<-floor(nperm*alpha)+1
sbdry<-getbdry(.05,nperm,max.ones)
sel<-is.element(intid,snp.ids)
orindex<-which(sel) #indices of eligible snp's
orchr<-chrom[sel]
for(snum in scan.ids){
sindex <- which(is.element(sid, snum))
if(length(sindex)==0) stop(paste("Sample ",snum, " does not exist",sep=""))
GENO <- getGenotype(genoData, snp=c(1,-1), scan=c(sindex,1))
olrr<-getLogRRatio(intenData, snp=c(1,-1), scan=c(sindex,1))[sel]
ogeno<-GENO[sel]
ws<-!is.na(olrr)
olrr<-olrr[ws]
oindex<-orindex[ws]
ogeno<-ogeno[ws]
chr<-orchr[ws]
##homoz and missing have value 0
whomo<-is.element(ogeno,c(0,2)) | is.na(ogeno)
ogeno[whomo]<-0
####### DNAcopy Segmentation for given sample ########
temp.CNA<-CNA(as.vector(olrr),chr,oindex,data.type="logratio",sampleid=snum)
temp.smooth<-smooth.CNA(temp.CNA,smooth.region=smooth,outlier.SD.scale=4) #smooth.region,outlier.SD.scale=default
temp.segment<-segment(temp.smooth,alpha=alpha,sbdry=sbdry,p.method="h",min.width=min.width,nperm=nperm,undo.splits="sdundo",undo.SD=1,verbose=as.integer(verbose))
segments<-temp.segment$out
if(dim(segments)[1]<1) next
segments$ID<-rep(snum,length(segments$ID))
#$loc.start and $loc.end are indices of snp
names(segments)<-c("scanID","chrom","left","right","num.mark","seg.mean")
for(ch in chrom.ids){
if(ch==XchromCode(intenData) & is.element(snum,male)) next
wc<-chr==ch
geno<-ogeno[wc]
index<-oindex[wc]
lrr<-olrr[wc]
chrr<-chr[wc]
ansch<-known.anoms[known.anoms$scanID==snum & known.anoms$chrom==ch,]
segs<-segments[segments$chrom==ch,]
###### find homozygous runs and base info for current sample/chrom
out<-.LOHfind(snum,ch,geno,index,lrr,chrr,segs,ansch,
run.size,inter.size,min.lrr.num )
base.snch<-out$base.info
base.snch$sex<-sex[sindex]
RUNS.snch<-out$RUNS
segs.snch<-out$segments
LOH.base.info<-rbinddt(LOH.base.info,base.snch)
LOH.segments<-rbinddt(LOH.segments,segs.snch)
LOH.raw<-rbinddt(LOH.raw,RUNS.snch)
#### BEGIN filtering process #############
#### Special Cases ###########
if(base.snch$num.runs==0) next #no anomalies
if(is.na(base.snch$chrom.nonanom.mad) & is.element(base.snch$num.runs,c(1,2)) & dim(ansch)[1]==0) {
# happens if runs take up most of chromosome with no other known anoms, i.e. probably whole chrom LOH
seg.median<-NULL;seg.mean<-NULL;nm<-NULL
for(k in 1:dim(RUNS.snch)[1]){
subind<-index>=RUNS.snch$left[k] & index<=RUNS.snch$right[k]
nm<-c(nm,sum(subind))
sublrr<-lrr[subind]
seg.med<-median(sublrr,na.rm=TRUE)
seg.median<-c(seg.median,seg.med)
seg.mean<-c(seg.mean,mean(sublrr,na.rm=TRUE))
}#end of k loop
RUNS.snch$num.mark<-nm
RUNS.snch$seg.median<-seg.median
RUNS.snch$seg.mean<-seg.mean
RUNS.snch$sd.fac<-NA
RUNS.snch$mad.fac<-NA
RUNS.snch$local<-NA
RUNS.snch$num.segs<-base.snch$num.segs
RUNS.snch$chrom.nonanom.mad<-NA
RUNS.snch$chrom.nonanom.median<-NA
RUNS.snch$chrom.nonanom.mean<-NA
RUNS.snch$chrom.nonanom.sd<-NA
RUNS.snch$sex<-sex[sindex]
LOH.raw.adjusted<-rbinddt(LOH.raw.adjusted,RUNS.snch)
LOH.filtered<-rbinddt(LOH.filtered,RUNS.snch)
next
}
if(base.snch$num.segs==1) next
#if no segmentation and previous situation not occur, there are no anomalies
if(is.na(base.snch$chrom.nonanom.mad)) next
#no base left to compare with;already singled out whole chrom possibiltiy; too many pieces: no anoms
##### end Special cases###
### Get info needed for filtering process
## find indices for 'nonanom' snps: not in known anoms nor in any found runs (which are potential anoms)
baf.del<-NULL
if(dim(ansch)[1]!=0){
for(j in 1:dim(ansch)[1]) {
int<-index[index>=ansch$left[j]& index<=ansch$right[j]]
baf.del<-union(baf.del,int)
}
} #index values
runs.del<-NULL #delete all found runs or anoms to determine "nonanom" base
for(i in 1:dim(RUNS.snch)[1]){
int<-index[index>=RUNS.snch$left[i] & index<=RUNS.snch$right[i]]
runs.del<-union(runs.del,int)
}
possible.anom.index<-union(baf.del,runs.del)
#by previous checks, will have at least some runs and runs+known anoms will not take up whole chrom
nonanom.index<-setdiff(index,possible.anom.index)
#### final FILTERING #########
outt<-.LOHselectAnoms(snum,ch,segs.snch,RUNS.snch,base.snch,index,nonanom.index,lrr,
homodel.min.num,homodel.thresh,small.num,small.thresh, medium.num,medium.thresh,
long.num,long.thresh, small.na.thresh,length.factor,merge.fac,min.lrr.num)
raw.adj<-outt$raw.adjusted
if(!is.null(raw.adj)) raw.adj$sex<-sex[sindex]
filtered<-outt$filtered
if(!is.null(filtered)) filtered$sex<-sex[sindex]
LOH.raw.adjusted<-rbinddt(LOH.raw.adjusted,raw.adj)
LOH.filtered<-rbinddt(LOH.filtered, filtered)
if(outt$merge.flag){
tmp<-data.frame(snum,ch);names(tmp)<-c("scanID","chrom")
LOH.merge<-rbinddt(LOH.merge,tmp)
}
} #end chrom loop
} #end sample loop
colm<-c("scanID","chrom","left","right","num.mark","seg.median","seg.mean",
"mad.fac","sd.fac","local","num.segs","chrom.nonanom.mad","chrom.nonanom.median","chrom.nonanom.mean","chrom.nonanom.sd","sex")
if(!is.null(LOH.raw.adjusted)){
LOH.raw.adjusted<-LOH.raw.adjusted[,colm]
}
if(!is.null(LOH.filtered)){
LOH.filtered<-LOH.filtered[order(LOH.filtered$scanID,LOH.filtered$chrom,LOH.filtered$left),]
LOH.filtered<-LOH.filtered[,colm]
}
outr<-list(LOH.raw,LOH.raw.adjusted,LOH.filtered,LOH.base.info,LOH.segments,LOH.merge)
names(outr)<-c("raw","raw.adjusted","filtered","base.info","segments","merge")
# convert back to package standard names
for (i in 1:length(outr)) {
if ("chrom" %in% names(outr[[i]]))
names(outr[[i]])[names(outr[[i]]) == "chrom"] <- "chromosome"
if ("left" %in% names(outr[[i]]))
names(outr[[i]])[names(outr[[i]]) == "left"] <- "left.index"
if ("right" %in% names(outr[[i]]))
names(outr[[i]])[names(outr[[i]]) == "right"] <- "right.index"
}
# convert index to base position
for (i in 1:length(outr)) {
if (!is.null(outr[[i]]) & ("left.index" %in% names(outr[[i]]))) {
outr[[i]]$left.base <- getPosition(intenData, index=outr[[i]]$left.index)
outr[[i]]$right.base <- getPosition(intenData, index=outr[[i]]$right.index)
}
}
return(outr)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.