Nothing
runExPANdS<-function(SNV, CBS, maxS=0.7, max_PM=6, min_CF=0.1, p=NA, ploidy=2,
nc=1, plotF=2, snvF=NULL, maxN=8000, region=NA, verbose=T){
MINSNVS=15
if (!exists("SNV") || !exists("CBS")){
print("Input-parameters SNV and/or CBS missing. Please add the paths to tabdelimited files containing the SNVs and copy numbers.");
return();
}
dirF=getwd();
##Read point mutation and copy number input
tmp=.readSNVandCBS(SNV,CBS,max_PM=max_PM, min_CF=min_CF, snvF=snvF, verbose=verbose);
dm=tmp$dm; copyNumber=tmp$copyNumber; snvF=tmp$snvF;
ii=which(dm[,"CN_Estimate"]<0.5);
if (length(ii)>0){
print(paste(length(ii), " SNV(s) excluded due to homozygous deletions within that region."));
dm=dm[-ii,];
}
if(is.null(dm) || is.null(dim(dm)) || nrow(dm)<MINSNVS){
print("Not enough mutations provided. Minimum",MINSNVS,"SNVs required to attempt a run.")
return(list("finalSPs"=NULL,"dm"=NULL,"densities"=NULL,"sp_cbs"=NULL));
}
##Regions of interest
idx_R=.indexOfSNVsWithin(region, dm, maxN, verbose=verbose)
if (is.na(p)){
p=0.1/log(length(idx_R)/7);
}
#############################
###Input parsing ends here###
cfd=computeCellFrequencyDistributions(dm = dm, max_PM = max_PM, p = p,
ploidy=ploidy, min_CF=min_CF, nc=nc, v=verbose);
toUseIdx=which(apply(is.finite(cfd$densities),1,all) );
toUseIdx=intersect(toUseIdx,idx_R);
SPs=clusterCellFrequencies(densities = cfd$densities[toUseIdx,], p = p, min_CF=min_CF,verbose=verbose);
if(is.null(SPs) || size(SPs,1)==0 || all(is.na(SPs)) || sum(SPs[,"score"]<=maxS)==0 ){
print("No SPs found.")
result=list("finalSPs"=NULL,"dm"=cfd$dm,"densities"=cfd$densities);
return(result);
}
finalSPs=SPs[SPs[,"score"]<=maxS,,drop=F];
ia=order(finalSPs[,"Mean Weighted"]);
finalSPs=matrix(finalSPs[ia,,drop=F],nrow=nrow(finalSPs), ncol=ncol(finalSPs), dimnames=list(1:nrow(finalSPs),colnames(finalSPs)));
if(size(finalSPs,1)==0){
print(paste("No SPs found below score:",maxS))
result=list("finalSPs"=finalSPs,"dm"=dm,"densities"=cfd$densities);
return(result);
}
###########################
###Assigning SNVs to SPs###
aM = assignMutations( dm = cfd$dm, finalSPs = finalSPs, max_PM=max_PM, ploidy=ploidy,verbose=verbose);
dm=aM$dm; SPs=aM$finalSPs;
# ########################################################################
# ##Remove SPs with no single SNV/CNV cooccurence and reassign mutations##
# iF=which(SPs[,'snv_cnv_Co']==0);
# while(length(iF)>0 && nrow(SPs)>2){ ##Remove only if enough SPs are left
# iF=iF[which.max(SPs[iF,'score'])]
# .notifyUser(paste("Excluding subpopulation",SPs[iF,'Mean Weighted'],"with 0 SNV/CNV cooccurence incidence."),verbose=verbose)
# SPs=SPs[-iF,,drop=F]
# aM = assignMutations( dm = cfd$dm, finalSPs = SPs, max_PM=max_PM, ploidy=ploidy,verbose=verbose);
# dm=aM$dm; SPs=aM$finalSPs;
# iF=which(SPs[,'snv_cnv_Co']==0);
# }
###########################
##Choose between doublets##
continueprune=TRUE;
while (!is.null(dim(SPs)) && nrow(SPs)>1 && continueprune){
x=sort(SPs[,'Mean Weighted'],index.return=TRUE); SPs=SPs[x$ix,];
toRm=c();
for (sp in 1:nrow(SPs)){
ii=sp; ##sp_i * x != sp_j for all x in 2:6 and all SP pairs (i,j)
x=2; ##Check for doublets only
maxDev=SPs[sp,'precision']* 2/3 ;
i_=which(abs(SPs[sp,'Mean Weighted']*x-SPs[,'Mean Weighted'])<maxDev);
ii=union(ii,i_);
if( length(ii)>1 ){
ii=ii[which(SPs[ii,'nMutations']<0.6*max(SPs[ii,'nMutations'],na.rm=T))]; ##Keep only SP of max kurtosis
toRm=c(toRm,ii )
}
}
toRm=unique(toRm)
if (!is.null(toRm) && length(toRm)>0){
iReassign=which(dm[,'SP'] %in% SPs[toRm,'Mean Weighted'] | dm[,'SP_cnv'] %in% SPs[toRm,'Mean Weighted'] );
print(paste('Reassigning SNVs after pruning',length(toRm),'doublet subpopulation(s).'))
.notifyUser("Pruned subpopulation(s):",verbose = verbose)
.notifyUser(SPs[toRm,'Mean Weighted'],verbose = verbose)
SPs=SPs[-toRm,, drop=FALSE];
aM = assignMutations(dm = dm[iReassign,,drop=FALSE], finalSPs = SPs,max_PM=max_PM, ploidy=ploidy,verbose=verbose);
dm[iReassign,]=aM$dm;
#Recount mutations after reassignment (absence of pruned SP_cnv may cause reassignment of mutations among remaining SPs):
SPs[,"nMutations"]=0;
tmp=count(dm[,"SP"]); ia=match(tmp$x,SPs[,"Mean Weighted"]);
SPs[ia,"nMutations"]=tmp$freq
finalSPs=SPs;
}else{
continueprune=FALSE;
finalSPs=SPs;
}
}
#if (plotF>2){
# if(!require(rgl)){
# message("Plot supressed: Package rgl required for 3D plot of subpopulation clusters. Load this package before using this option.")
# }else{
# ##plot probability distributions
# cols=c("red","yellow","green","pink","magenta","cyan","lightblue","blue");
# kk=ceil(nrow(finalSPs)/2); par(mfcol=c(2,kk));
# for (i in 2:nrow(finalSPs)){
# idx=which(dm[toUseIdx,"SP"]==finalSPs[i,"Mean Weighted"]);
# open3d();
# persp3d(as.numeric(1:length(idx)),as.numeric(cfd$freq),t(cfd$densities[idx,]),col=cols[mod(i,length(cols))+1],aspect=c(1, 1, 0.5), add=FALSE,xlab="Mutation", ylab="cell-frequency", zlab="Probability");
# title3d(paste("SP_", round(finalSPs[i,"Mean Weighted"], digits=2),"\noo",sep=""));
# play3d(spin3d(axis=c(0,0,1), rpm=10), duration=5)
# }
# }
#}
##phylogeny
finalSPs=.addColumn(finalSPs,'Ancestor',NA);
finalSPs=.addColumn(finalSPs,'ClosestDescendant',NA);
aQ=try(assignQuantityToSP(cbs = copyNumber, dm = dm,v=verbose),silent=FALSE);
tr=NULL;
if(class(aQ)=="try-error"){
print("Error encountered while assigning subpopulation specific copy number to genomic segments. Phylogeny will not be inferred.")
}else {
.writeExpandsOutput(X=aQ, dirF,snvF,suffix=".sps.cbs", message="Subpopulation specific copy numbers")
output=paste(dirF, .Platform$file.sep, snvF, sep=""); # output=paste(dirF, .Platform$file.sep, gsub("\\.","_",snvF), sep="");
tr=try(buildPhylo(sp_cbs = aQ,outF = output,dm=dm,verbose=verbose),silent=FALSE);
if(class(tr)!="try-error" ){
if(class(tr$dm)!="try-error" && !is.na(tr$dm)){
dm=tr$dm;
##Assign ancestor and closest descendant
phySPs=colnames(tr$spRelations)
for (sp in rownames(tr$spRelations)){
desc=as.numeric(gsub('SP_','',c(sp,phySPs[tr$spRelations[sp,]==1])))
if(length(desc)>1){
iAnc=which.min(abs(finalSPs[,'Mean Weighted']-desc[1]))
desc=desc[which.min(abs(desc[2:length(desc)]-desc[1]))+1]
iDesc=which.min(abs(finalSPs[,'Mean Weighted']-desc))
finalSPs[iDesc,'Ancestor']=finalSPs[iAnc,'Mean Weighted']
finalSPs[iAnc,'ClosestDescendant']=finalSPs[iDesc,'Mean Weighted']
}
}
}else{
print("Error encountered while reconstructing phylogeny")
}
tr=tr$tree;
}
}
####################
###Write output#####
.writeExpandsOutput(X=dm, dirF,snvF,suffix=".sps", message="Subpopulation specific point mutations")
.writeExpandsOutput(X=finalSPs, dirF,snvF,suffix=".spstats", message="Summary file of detected subpopulations")
if (plotF>0){
tmp=try(plotSPs(dm = dm[toUseIdx,],sampleID = snvF),silent=FALSE);
}
if (plotF>1 && !is.null(tr) && class(tr)!="try-error"){
try(plot(tr),silent=FALSE);
}
result=list("finalSPs"=finalSPs,"dm"=dm,"densities"=cfd$densities,"sp_cbs"=aQ,"tree"=tr);
return(result);
}
.indexOfSNVsWithin<-function(region, dm, maxN, verbose){
if (is.character(region) && file.exists(region)){
roi=read.table(region,sep="\t",header=TRUE,check.names = F,stringsAsFactors = F);
roi <- roi[ as.character(roi[,"chr"]) %in% as.character(seq(100)), ,drop=F];
if(nrow(roi)==0){
warning(paste(region,"does not contain genomic regions on known chromosomes. Regions will be ignored. Correct format does not include \'chr\'-prefix."))
}
roi=data.matrix(roi);
}else if (is.matrix(region)){
roi=region;
}
idx_R=c(1:size(dm,1));
if (size(dm,1)>maxN){
if (is.na(region)){
print(paste("Input contains more than ",maxN," SNVs and parameter <region> not set. Using default regional boundary (SureSelectExome_hg19)"));
}
if (!is.numeric(roi)) {
print("Region matrix has to be numeric.");
return( list("finalSPs"=NULL,"dm"=NULL,"densities"=NULL,"sp_cbs"=NULL) );
}
print(paste("Gathering SNVs within regions of interest ..."))
##Find SNVs within regions of interest
idx_R=c();
so=sort(roi[,'start'],index.return=T);
roi=roi[so$ix,];
for(i in 1:nrow(dm)){
ii=which(roi[,'chr']==dm[i,'chr'])
ix=which.min(abs(roi[ii,'start']-dm[i,'startpos']))
ii=ii[c(max(0,(ix - 2)):min(length(ii),(ix + 2)))];
if(i %% 100 ==0){
.notifyUser(paste(i," out of ",nrow(dm), " SNVs tested"),verbose=verbose)
}
for (j in ii){
if (roi[j,'start']<=dm[i,'startpos'] && roi[j,'end']>=dm[i,'startpos']){
idx_R=cbind(idx_R,i);
break;
}
}
}
print(paste('Found ',length(idx_R), ' SNVs within regions of interest',sep=""))
idx_R=sample(idx_R,min(maxN,length(idx_R)),replace=FALSE);
print(paste("Keeping ",length(idx_R), ' of these SNVs (randomly selected).',sep=""))
}else{
if (!is.na(region)){
print(paste("Input contains less than ",maxN," SNVs. Parameter <region> ignored."));
}
}
return(idx_R)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.