R/TAPS.r

Defines functions chromFix TAPS_click returnSkippedValues copyNumberClicker clicker clickWrap drawImage getEstimates TAPS_estimates TAPS_region karyotype_chromsCN karyotype_chroms OverviewPlot compare_regionSet TAPS_compare sum_regionSet TAPS_freq addGenes pileup_copies pileup_pos vectorMatch getGenes pvals pileup_dif pileup karyotype_check setCNs findCNs is.autosome neighbour is.odd is.even myMeans weightedMean weightedMedian getOverlap probesInRegions mySorter thisSubdir getSubdirs vplayout subplot chrom_ucsc deChrom_ucsc save.txt load.txt nrows equals makeRegions chrom_n readSegments readAlf readLog2 segment_DNAcopy segMatch TAPS_call TAPS_plot

Documented in TAPS_call TAPS_click TAPS_compare TAPS_compare TAPS_compare TAPS_compare TAPS_estimates TAPS_freq TAPS_plot TAPS_region

## load('~/patchwork/pkg/TAPS/R/sysdata.rda')
## load('shortRegions.Rdata')
## load('allRegions.Rdata')

## TAPS source code reimplemented 2013 Markus (Rasmussen) Mayrhofer, Sebastian DiLorenzo

#########    ###    ########   ######     ########  ##        #######  ######## 
#   ##      ## ##   ##     ## ##    ##    ##     ## ##       ##     ##    ##    
#   ##     ##   ##  ##     ## ##          ##     ## ##       ##     ##    ##    
#   ##    ##     ## ########   ######     ########  ##       ##     ##    ##    
#   ##    ######### ##              ##    ##        ##       ##     ##    ##    
#   ##    ##     ## ##        ##    ##    ##        ##       ##     ##    ##    
#   ##    ##     ## ##         ######     ##        ########  #######     ##  

TAPS_plot <- function(#samples='all',
                     directory=NULL,autoEstimate=FALSE,
                      bin=250,cores=1,matched=FALSE,allelePeaks=FALSE) {
    # cores=1
    #Automatically check, and if needed install, packages stats and fields
    
    #Load stats. It should be in all, at least semi-new, R distributions so we dont need to install.package it or
    #pre-install it
    #library(stats)
    suppressPackageStartupMessages(library(stats))
    suppressPackageStartupMessages(library(DNAcopy))
    suppressPackageStartupMessages(library(fields))
    suppressPackageStartupMessages(library(xlsx))    
    suppressPackageStartupMessages(library(foreach))
    try(suppressPackageStartupMessages(library(doMC)),silent=T)
    try(suppressPackageStartupMessages(registerDoMC(cores=cores)),silent=T)


    #list.of.packages <- c("stats", "fields")
    #new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
    #if(length(new.packages)) install.packages(new.packages)
    
    if (is.null(directory))
    {
        print("Using working directory")
        directory = "."
        #cat("You have not assigned a directory containing one or more folders of samples for TAPS_plot to execute. \n")
        #cat("Example: \"/user/mysamples/\" or, to run it in your current working directory, \".\" \n")
        #directory = readline("Please supply such a directory now: ")
    }
    
    ## This function takes a directory as input, then builds short-segment TAPS scatter plots for each sample (subdirectory) in the directory.
    setwd(directory)
    subs <- getSubdirs()
    subs=subs[subs!='frequencies' & subs!='frequencies_comp']
    subsToSampleData=NULL
    if (is.null(subs)) {                                         ## check samples = subdirectories or a single sample = current directory
        subs=thisSubdir()
        setwd('..')
        subsToSampleData <- getSubdirs()
    }
    
   # create SampleData file if there is none.   
    if (length(grep('SampleData.csv',dir()))==0) {
        if(!is.null(subsToSampleData))
            {
            sampleData <- data.frame(Sample=subsToSampleData,cn1= NA, cn2=NA, cn3=NA, loh=NA, MAPD=NA, MHOF=NA)
            }
        else
            {
            sampleData <- data.frame(Sample=subs,cn1= NA, cn2=NA, cn3=NA, loh=NA, MAPD=NA, MHOF=NA)
            }
        save.txt(sampleData,'SampleData.csv')
    } else {
        sampleData=load.txt('SampleData.csv')
    }

    #if (samples[1]=='all') samples=rep(T,length(subs))
    #if (is.logical(samples)) samples=which(samples)
    #subs=subs[samples]
    root <- getwd()
    print(paste('root: ',root,sep=''))
    
    # for (i in 1:length(subs)) {
    #junk only stores the list from foreach.
    sampleData$MHOF <- sampleData$MAPD <- NA
    junk <- foreach (i = 1:length(subs)) %dopar% {
        setwd(paste(root,'/',subs[i],sep=''))
        name <- subs[i]
        #if (length(grep('sampleData.csv',dir()))==0) save.txt(data.frame(Sample=name,cn2='',delta='',loh='',completed.analysis='no'),file='sampleData.csv')
        # cat(' ..loading', subs[i])
        # print(paste(i,': ',subs[i],': Opening',sep=''))
        print(paste(i,'/',length(subs),': ',subs[i],' Loading',sep=''))
        
        if(length(grep("*.cychp",dir()))==1)				##cychp sample
        {
            ##read CYCHP file from ChAS with log-ratio info##
            ################################################
            suppressPackageStartupMessages(library(affxparser))
            
            name=list.files(".",pattern="*.cychp")
            temp=readCcg(name)
            tempLog2=temp$dataGroups$ProbeSets$dataSets$CopyNumber$table[,1:4]
            #Value=NULL
            Start=tempLog2$Position
            End=tempLog2$Position
            Value=tempLog2$Log2Ratio
            tempChr=tempLog2$Chromosome
            nrows=dim(tempLog2)[1]
            Chromosome=rep(NA,nrows)
            for (i in 1:nrows) {
                Chromosome[i]=paste('chr', tempChr[i], sep="")
            }
            
            Log2=as.data.frame(cbind(Chromosome, Start, End, Value))
            
            colnames(Log2)=c("Chromosome","Start","End","Value")
            
            Log2$Chromosome = as.character(Log2$Chromosome)
            Log2$Chromosome[Log2$Chromosome=="chr24"] = "chrX"
            Log2$Chromosome[Log2$Chromosome=="chr25"] = "chrY"
            Log2$Chromosome = as.factor(Log2$Chromosome)
            
            Log2$Start = as.integer(as.character(Log2$Start))
            Log2$End = as.integer(as.character(Log2$End))
            Log2$Value = as.numeric(as.character(Log2$Value))
            
            ##read txt file from ChAS with snp info##
            #########################################
            
            name=c(list.files(".",pattern="*.cy750K.txt"),list.files(".",pattern="*.cyhd.txt"))
            tempalf=read.table(file=name, header=FALSE, sep="\t", skip=12)
            SignalA=tempalf$V4
            SignalB=tempalf$V5
            tempChr=tempalf$V8
            Start=tempalf$V9
            End=tempalf$V9
            Chromosome=Value=rep(NA,nrows)
            nrows=dim(tempalf)[1]
            for (i in 1:nrows) {
                Value[i]=SignalB[i]/(SignalB[i]+SignalA[i])
                Chromosome[i]=paste('chr', tempChr[i], sep="")
            }
            
            alf=as.data.frame(cbind(Chromosome, Start, End, Value))
            
            colnames(alf)=c("Chromosome","Start","End","Value")
            
            alf$Start = as.integer(as.character(alf$Start))
            alf$End = as.integer(as.character(alf$End))
            alf$Value = as.numeric(as.character(alf$Value))
        } else if (file.exists('rawcopy.Rdata')) {
            load('rawcopy.Rdata')
            Log2=probes.txt[,2:5]
            alf=snps.txt[,2:5]
        } else {
            Log2 <- readLog2()                                     ## Log-R
            alf <- readAlf()                                         ## Allele Frequency
        }
        
        #browser()
        
        Log2=Log2[!is.nan(Log2$Value),]
        Log2=Log2[!is.na(Log2$Value),]
        Log2 <- Log2[which(Log2$Value != -Inf & Log2$Value != +Inf ),]
        Log2=chromFix(Log2)
        alf=alf[!is.nan(alf$Value),]
        alf=alf[!is.na(alf$Value),]
        alf$Value[alf$Value<0]=0; alf$Value[alf$Value>1]=1
        alf <- alf[which(alf$Value != -Inf & alf$Value != +Inf ),]
        alf=chromFix(alf)
        
        segments <- readSegments()                                ## segments if available (CBS recommended)
 
        # cat(' ..processing')
        if (is.null(segments)) {                                 ## segmentation using DNA-copy if needed (must then be installed)
            segments <- segment_DNAcopy(Log2)
            save.txt(segments,'_segments.txt') 
        }

        #Remove NA values that some samples give.
        segments <- segments[!is.nan(segments$Value),]
        segments <- segments[!is.na(segments$Value),]
        
        segments$Value <- segments$Value-median(Log2$Value,na.rm=T)     ## Median-centering
        Log2$Value <- Log2$Value-median(Log2$Value,na.rm=T)             ## Median-centering
        
        #browser()
        allRegions=NULL; #if ('allRegions.Rdata' %in% dir()) load('allRegions.Rdata')
        if (is.null(allRegions)) allRegions <- makeRegions(Log2, alf, segments,matched=matched,min=30,allelePeaks=allelePeaks)            ## Calculates necessary data for segments (all functions are in this file)
        save(allRegions,file='allRegions.Rdata')
        regs=NULL;# if ('shortRegions.Rdata' %in% dir()) load('shortRegions.Rdata')
        #browser()
        if (is.null(regs)) {
            regs <- regsFromSegs(Log2,alf,segments,bin=bin,min=30,matched=matched,allelePeaks=allelePeaks)    ## Calculates the same data for shortened segments
            save(regs,file='shortRegions.Rdata')
        }
        
        ## Sample QC 
        # print(paste(i,': ',segments,', ',' sample QC',sep=''))
        # sampleData$MAPD[i] <- MAPD <- round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2)
        # sampleData$MHOF[i] <- MHOF <- round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1)
        # print(round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2))
        # print(round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1))

        sampleData$MAPD[i] <- round(median(abs(diff(Log2$Value[Log2$Chromosome=='chr1'][order(Log2$Start[Log2$Chromosome=='chr1'])]))),2)
        sampleData$MHOF[i] <- round(100*median(0.5+abs(0.5-alf$Value[alf$Chromosome!='chrX'])),1)

            #round(100*median(0.5+regs$hom[regs$scores<0.5],na.rm=T),1)        
        #MAID=round(median(abs(diff(regs$scores[!is.na(regs$scores)]))),3)
        
        #Save for TAPS_region()
        # print(paste(i,': taps_region()',sep=''))
        save(regs,Log2,alf,segments,file="TAPS_plot_output.Rdata")
        
        #save.txt(sampleData,file='../sampleData.csv')
        
        #Clear warnings generated previously so hopefully I can see what is actually causing the program to fail.
        #assign("last.warning", NULL, envir = baseenv())
        
        #Test if hg18 or hg19 should be used. length of (hg18 chr19) > (hg19 chr19)
        hgtest=regs[regs$chr=="chr19",]
        if(hgtest$end[length(hgtest$chr)] > 60000000) {
            hg18=T
        } else {
            hg18=F
        }

        # cat('..plotting.\n')
        OverviewPlot(regs$chr,regs$start,regs$end,regs$logs,regs$scores,hg18=hg18,
                     as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start,alf$Value,
                     name=name,MAPD=sampleData$MAPD[i],MHOF=sampleData$MHOF[i])                
        
        ## Chromosome-wise plots for manual analysis
        regions=allRegions$regions
        
        karyotype_chroms(regs$chr,regs$start,regs$end,regs$logs,regs$scores,hg18=hg18,
                         as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),alf$Start,
                         alf$Value,name=name,MAPD=sampleData$MAPD[i],MHOF=sampleData$MHOF[i])
        
        ## Finally add estimates if needed:
        e=NULL
        if (is.logical(autoEstimate)) {
            if (length(autoEstimate)>1) {
                if (autoEstimate[i]) e=getEstimates(regs$logs,regs$scores)
            } else if (autoEstimate[1]) e=getEstimates(regs$logs,regs$scores)
        } else if (is.numeric(autoEstimate)) { 
            if (i %in% autoEstimate) e=getEstimates(regs$logs,regs$scores)
        } else if (is.character(autoEstimate)) if (name %in% autoEstimate)
            e=getEstimates(regs$logs,regs$scores)
        if (!is.null(e)) {
            sampleData$cn1=e[1]
            sampleData$cn2=e[2]
            sampleData$cn3=e[3]
            sampleData$loh=e[4]
        }
        
        # cat('..done\n')
        # print(paste(i,': ',subs[i],': OK',sep=''))
        print(paste(i,'/',length(subs),': ',subs[i],' OK',sep=''))
        setwd(root)
        1
    }
    save.txt(sampleData,'SampleData.csv')
}
###

#########    ###    ########   ######      ######     ###    ##       ##       
#   ##      ## ##   ##     ## ##    ##    ##    ##   ## ##   ##       ##       
#   ##     ##   ##  ##     ## ##          ##        ##   ##  ##       ##       
#   ##    ##     ## ########   ######     ##       ##     ## ##       ##       
#   ##    ######### ##              ##    ##       ######### ##       ##       
#   ##    ##     ## ##        ##    ##    ##    ## ##     ## ##       ##       
#   ##    ##     ## ##         ######      ######  ##     ## ######## ######## 

###
TAPS_call <- function(samples='all',directory=getwd(),cores=1) {
    # cores=1
    minseg=1
    maxCn=12
    #suppressPackageStartupMessages(library(xlsx))    
    suppressPackageStartupMessages(library(foreach))
    try(suppressPackageStartupMessages(library(doMC)),silent=T)
    try(suppressPackageStartupMessages(registerDoMC(cores=cores)),silent=T)
    
    
    ## TAPS_call outputs the total and minor allele copy numbers of all segments as a text file, and as images for visual confirmation.
    ## sampleInfo_TAPS.txt must be present in each sample folder. If TAPS_plot could not make a good guess of the Log-R of copy number 2 
    ## and the Log-R difference to a deletion, you must interpret the scatter plots and edit sampleInfo_TAPS.txt.
    if (is.null(directory))
    {
        print("No directory supplied, using working directory.")
        directory = "."
        #cat("You have not assigned a directory containing one or more folders of samples for TAPS_call to execute. \n")
        #cat("Example: \"/user/mysamples/\" or, to run it in your current working directory, \".\" \n")
        #directory = readline("Please supply such a directory now: ")
    }
    
    setwd(directory)
    #subs <- getSubdirs()
    
    if (length(grep('SampleData.csv',dir()))==0)
    {
        if (length(grep('SampleData.xlsx',dir()))==1)
        {
            try(library(xlsx))
            sampleData=read.xlsx('SampleData.xlsx',1)
            save.txt(sampleData,'SampleData.csv')
        }
        sampleData=load.txt('SampleData.csv')
    }
    
    if (length(grep('SampleData.csv$',dir()))==1)
    {
        sampleData=load.txt('SampleData.csv')
    } else
    {
        sampleData <- load.txt('../SampleData.csv')
    }
    subs=as.character(sampleData$Sample)
    
    if (is.null(subs)) {
        subs=thisSubdir()
        subs=subs[subs!='frequencies' & subs!='frequencies_comp']
        setwd('..')
    }
    
    if (samples[1]=='all') samples=rep(T,length(subs))
    if (is.logical(samples)) samples=which(samples)
    subs=subs[samples]
    sampleData <- sampleData[samples,]

    # for (i in 1:length(subs)) {
    #junk only stores the list from foreach.
    junk <- foreach(i=1:length(subs)) %dopar% {
        setwd(subs[i])
        name <- subs[i]
        sampleInfo <- sampleData[i,c('cn1','cn2','cn3','loh')]
        if (nrow(sampleInfo)==1) if (sum(is.na(sampleInfo))<4) {
            
                # cat(' ..loading', subs[i])
            print(paste(i,'/',length(subs),': ',subs[i],' Loading',sep=''))
            if (file.exists('rawcopy.Rdata')) {
                load('rawcopy.Rdata')
                Log2=probes.txt[,2:5]
                alf=snps.txt[,2:5]
            } else {
                Log2 <- readLog2()
                alf <- readAlf(localDir)
            }
            segments <- readSegments()
            
            #Some samples contain NA values, we simply remove these.
            Log2=Log2[!is.nan(Log2$Value),]
            Log2=chromFix(Log2)
            
            Log2=Log2[!is.na(Log2$Value),]
            
            alf=alf[!is.nan(alf$Value),]
            alf=alf[!is.na(alf$Value),]
            alf=chromFix(alf)
            
            
            segments <- segments[!is.nan(segments$Value),]
            segments <- segments[!is.na(segments$Value),]    
            segments=chromFix(segments)
            
            
            segments$Value <- segments$Value-mean(Log2$Value) 
            Log2$Value <- Log2$Value-mean(Log2$Value)
            
            # cat(' ..processing.\n')
            
            load('allRegions.Rdata')                            ## These were prepared in TAPS_plot
            load('shortRegions.Rdata')
            #allRegions <- makeRegions(Log2, alf, segments)
            
            ## estimates the Log-R and Allelic Imbalance Ration of all variants up to maxCn
            t <- findCNs(Log2,alf,allRegions,regs,name,maxCn=maxCn,ceiling=1,sampleInfo=sampleInfo) 
            save(t,file='t.Rdata')
            
            u <- setCNs(allRegions,t$int,t$ai,t$model,maxCn)            ## Assigns copy number variant for all segments
            allRegions$regions <- u$regions
            ## adjacent segments with idendical copy number are NOT... merged (except over centromere) and all are saved to a text file
            save.txt(u$regions,file=paste(name,'_segmentCN.txt',sep='')) 
            regions=allRegions$regions

            #save(u$model,file="model.Rdata")
            write.table(t(as.data.frame(u$model)),file='model.txt',row.names=T)

            save(t,regions,file="regions_t.Rdata")


            #Test if hg18 or hg19 should be used. length of (hg18 chr19) > (hg19 chr19)
            hgtest=regions[regions$Chromosome=="chr19",]
            if(hgtest$End[length(hgtest$Chromosome)] > 60000000)
            {
                hg18=T
            } else {
                hg18=F
            }

            #save parameters as strings
            parameters=paste("Parameters given: cn2:",sampleInfo$cn2," delta:",sampleInfo$delta," loh:",sampleInfo$loh)
            
            #karyotype_check(regions$Chromosome,regions$Start,regions$End,regions$log2,regions$imba,regions$Cn,regions$mCn,t,name=name)
            
            karyotype_chromsCN(regions$Chromosome,regions$Start,regions$End,regions$log2,
                               regions$imba,regions$Cn,regions$mCn,hg18=hg18,
                               as.character(Log2$Chromosome),Log2$Start,Log2$Value,as.character(alf$Chromosome),
                               alf$Start,alf$Value,t,name=name,xlim=c(-1,1),ylim=c(0,1),parameters=parameters)
            
            # cat('..done\n')
            print(paste(i,'/',length(subs),': ',name,' ', 'OK',sep=''))
        } else print(paste(i,'/',length(subs),': ',name,' ', 'Skipped',sep=''))
        
        setwd('..')
    }
    #save.txt(sampleData,file='sampleData.csv')
    
}
###
segMatch <- function(segments.txt,probes.txt) {
    ## Returns a data frame with probes.startIx probes.endIx
    #verry fast
    colnames(segments.txt)[colnames(segments.txt)=='chr']='Chromosome'
    colnames(segments.txt)[colnames(segments.txt)=='start']='Start'
    colnames(segments.txt)[colnames(segments.txt)=='end']='End'
    
    segStarts=data.frame(n.seg=1:nrow(segments.txt),n.pos=NA,chr=chrom_n(segments.txt$Chromosome),pos=segments.txt$Start,type='start')
    segEnds=data.frame(n.seg=1:nrow(segments.txt),n.pos=NA,chr=chrom_n(segments.txt$Chromosome),pos=segments.txt$End,type='end')
    lpos=data.frame(n.seg=NA,n.pos=1:nrow(probes.txt),chr=chrom_n(probes.txt$Chromosome),pos=(probes.txt$Start+probes.txt$End)/2,type='logr')
    dummy1=data.frame(n.seg=NA,n.pos=NA,chr=-Inf,pos=-Inf,type='logr')
    dummy2=data.frame(n.seg=NA,n.pos=NA,chr=Inf,pos=Inf,type='logr')
    
    table=rbind(segStarts,segEnds,lpos,dummy1,dummy2)
    table=table[order(table$chr,table$pos),]
    
    start=(table$type=='start')
    end=(table$type=='end')
    #l=(table$type=='logr')
    
    # omitting segment ends, let segment start "n.pos" be that of the next marker
    table$n.pos[!end][start[!end]] <- table$n.pos[!end][which(start[!end])+1]
    
    # omitting segment starts, let segment end "n.pos" be that of the previous marker
    table$n.pos[!start][end[!start]] <- table$n.pos[!start][which(end[!start])-1]
    
    # re-create a data frame with startIx and endIx (in probes.txt) per segment
    indices=data.frame(startIx=table$n.pos[start],endIx=table$n.pos[end])
    
    na=is.na(indices$startIx+indices$endIx)
    na[!na] = indices$startIx[!na]>indices$endIx[!na]
    indices$startIx[na] <- indices$endIx[na] <- NA
    
    return(indices)
}
regsFromSegs <- function (Log2,alf, segments, bin=150,min=1,matched=F,allelePeaks=FALSE) {
    ## This function builds short segments and calcualtes their average Log-R and Allelic Imbalance Ratio.
    rownames(Log2)=1:nrows(Log2)
    rownames(alf)=1:nrows(alf)
    regs=NULL #list('chr'=NULL,'start'=NULL,'end'=NULL,'logs'=NULL,'scores'=NULL,'probes'=NULL,'snps'=NULL)
                                #,'key1'=rep(NA,nrow(Log2)),'key2'=rep(NA,nrow(alf)))
    n=nrow(segments)
    s_check=NULL
    
    pMatch=segMatch(segments,Log2)
    sMatch=segMatch(segments,alf)
    
    for (c in 1:n) { ## for every segment
        #tlog=Log2[Log2$Chromosome==segments$Chromosome[c],] ## Log-R on this chromosome
        #talf=alf[alf$Chromosome==segments$Chromosome[c],] ## Allele Freq on this chromosome
        #tlog=tlog[(tlog$Start>=segments$Start[c])&(tlog$Start<segments$End[c]),] ## Log-R on this segment
        #talf=talf[(talf$Start>=segments$Start[c])&(talf$Start<segments$End[c]),] ## Allele Freq on this segment
        
        tsnps=sMatch[c,2]-sMatch[c,1]+1; if (is.na(tsnps)) tsnps=0   #nrow(talf)    ## number of snps and probes in this segment
        tprobes=pMatch[c,2]-pMatch[c,1]+1; if (is.na(tsnps)) tsnps=0   #nrow(tlog)
        tnregs=max(trunc(tsnps/bin),1) ## Further split into how many (a minimum of 1) subsegments
        tcuts=segments$Start[c] ## The first start pos
        tlength=segments$End[c]-segments$Start[c]    ## Length of this whole segment
        for (i in 1:tnregs) tcuts = c(tcuts, tcuts[1]+round(i/tnregs*tlength)) ## Break the segment at these positions
        
        tab=data.frame(chr=segments$Chromosome[c],start=tcuts[-length(tcuts)],end=tcuts[-1],logs=NA,scores=NA,probes=0,snps=0)
        if (is.null(regs)) regs=tab else regs=rbind(regs,tab)
    }
    
    pMatch=segMatch(regs,Log2)
    sMatch=segMatch(regs,alf)
        
    for (i in 1:nrow(regs)) {    ## build the subsegments
        #regs$chr=c(regs$chr,as.character(segments$Chromosome[c]))    ## Chromosome
        #s_=tcuts[r]                                                     ## Start
        #e_=tcuts[r+1]                                                 ## End
        #thisalf=talf[(talf$Start>=s_)&(talf$Start<=e_),]             ## get the Log-R values
        #thislog=tlog[(tlog$Start>=s_)&(tlog$Start<=e_),]             ## and the allele frequency
        #regs$key1[as.integer(rownames(thislog))]=length(regs$chr)    ## store their positions for fast access during plotting
        #regs$key2[as.integer(rownames(thisalf))]=length(regs$chr)    ## --"--
        if (!is.na(pMatch[i,1])) { 
            regs$logs[i]=median(Log2$Value[pMatch[i,1]:pMatch[i,2]],na.rm=T)  ## store average log ratio of this segment
            regs$probes[i]=pMatch[i,2]-pMatch[i,1]+1                   ## store number of probes
        }
        if (!is.na(sMatch[i,1])) { 
            temp = NA
            try(temp <- allelicImbalance(alf$Value[sMatch[i,1]:sMatch[i,2]],min,matched,allelePeaks),silent=T)
            regs$scores[i]=temp
            regs$snps[i]=sMatch[i,2]-sMatch[i,1]+1                       
        }
    }

    #regs=as.data.frame(regs)
    regs=regs[!is.na(regs$logs),]  ### MODDAT MARKUS MAJ 2013
    return (regs)
}
allelicImbalance <- function (data,min=30,matched=F,allelePeaks=F) {
    #browser()
    if(matched == T ) {
         return(2*median(abs(data-.5),na.rm=T))
    }
    if(allelePeaks == F) {
        if (length(data)>min) {                                    ## Time to calculate Allelic Imbalance Ratio (if enough SNPs)
            t1=sort( abs(data-0.5) )                            ## distance from middle (het) in the allele freq pattern, ascending
            if (length(unique(t1))>3) {                                ## do not attempt clustering with too few snps
                xx=NULL
                try(xx <- kmeans(t1, 2),silent=T)                            ## Attempt k-means (Hartigan-Wong: has proven very stable)
                if (!is.null(xx)) if (min(xx$size) > 0.05*max(xx$size)) {    ## On some occations data quality is poor, requiring 5%+ heterozygous SNPs avoids most such cases.
                    xx=xx$centers
                } else xx=median(t1,na.rm = T)
            } else xx=median(t1,na.rm = T)      
        } else xx=NA
        #try (if (is.na(xx)) xx=0:1, silent=T)
        #try (if (length(xx)==0) xx=0:1, silent=T)
        #regs$scores=c(regs$scores, min(xx)/max(xx) )                ## Allelic Imbalance Ratio = inner / outer cluster.
        #regs$het=c(regs$het, min(xx))                                ## $het and $hom are no longer in use.
        #regs$hom=c(regs$hom, max(xx))
        return( min(xx)/max(xx) )
    }
    #The new shiet
    if(allelePeaks == T) {
        return(1-max(kmeans(data,2)$centers))
    }
}
###
segment_DNAcopy <- function(Log2) {
    ## If segmentation is required, DNAcopy is a good choice. Must be installed. 
    #library(DNAcopy)
    # cat('..Using DNAcopy to create segments:\n')
    segs=NULL
    chroms=c(as.character(1:22),'X','Y')
    chroms=paste('chr',chroms,sep='')
    for (c in 1:24) { # segment chromosome
        tlog=Log2[Log2$Chromosome==chroms[c],]    ## Log-R of this chromosome
        if (nrow(tlog)>0) {                        ## (ChrY may be absent)
            cnaObject=segment(smooth.CNA(CNA(tlog$Value, rep(c,nrow(tlog)), tlog$Start, data.type='logratio',sampleid=paste('chr',c))), undo.splits='sdundo', undo.SD=1,verbose=0) 
            segs=rbind(segs,cnaObject$output)        ## Add result to data frame
        }
    }
    colnames(segs)=c('x','Chromosome','Start','End','Markers','Value')
    segs$Chromosome=chrom_ucsc(segs$Chromosome)    ## Adjust chromosome names to standard
    return(segs[,2:6]) 
}
###
readLog2 <- function() {
    ## This function reads Log-ratio from the file "probes.txt" which must be present in the current directory.
    # Log2=NULL
    # try( Log2 <- read.csv(file='probes.txt',header=T,sep='\t'), silent=T)
    # if (is.null(Log2)) {
    #     try( Log2 <- read.csv(file='_probes.txt',header=T,sep='\t'), silent=T)
    #     if (!is.null(Log2)) cat(' ..found _probes.txt')
    # }

    if(file.exists('probes.txt')) {
        Log2 <- read.csv(file='probes.txt',header=T,sep='\t')
    } else if(file.exists('_probes.txt')) {
        Log2 <- read.csv(file='_probes.txt',header=T,sep='\t')
    } else if(file.exists('raw.txt')) {
        Log2 <- read.csv(file='raw.txt',header=T,sep='\t')
        colnames(Log2)[4:5] <- c('Start','Value')
        Log2$Chromosome <- paste('chr',Log2$Chromosome,sep='')
        Log2$Chromosome[Log2$Chromosome == 'chr24'] <- 'chrX'
        Log2$Chromosome[Log2$Chromosome == 'chr25'] <- 'chrY'
    } else {
        print('No probes.txt found!')
    }

    Log2$Chromosome <- as.character(Log2$Chromosome)
    #c("Chromosome", "Start", "End", "Value", "Array")


    ## This code was used if Log-R must be read from .CNCHP file (Affymetrix Genotyping Console or APT). 
    ## NOT currently supported downstream as .CNCHP is lacks allele-specific information for Affy 250k/500k
    # if (is.null(Log2)) {   
    # dir=dir()
    # ix=grep('cnchp',dir,ignore.case=TRUE)
    # if (length(ix)==1)
    # cat(' ..found',dir[ix])
    # library(affxparser)
    # temp=readCcg(dir[ix])
    # temp=temp$dataGroups$MultiData$dataSets$CopyNumber$table
    # temp$Chromosome[temp$Chromosome==24]=23 # strange numbers in cnchp
    # temp$Chromosome[temp$Chromosome==25]=24
    # Log2 = data.frame('Chromosome'=chrom_ucsc(temp$Chromosome), 'Start'=temp$Position, 'End'=temp$Position, 'Value'=temp$Log2Ratio)
    # save.txt(Log2, file='_probes.txt')
    # cat(' ..wrote _probes.txt')
    # adif=NULL
    # try(
    # adif <- temp$Allele,
    # silent=T
    # )      
    # if (!is.null(adif)) {
    # ix= !is.nan(adif)
    # alf=data.frame('Chromosome'=chrom_ucsc(temp$Chromosome[ix]), 'Start'=temp$Position[ix], 'End'=temp$Position[ix], 'Value'=0.5+adif[ix])
    # save.txt(alf, file='_snps.txt')
    # cat(' ..wrote _snps.txt')
    # }
    # }
    return (Log2) 
}
###
readAlf <- function(localDir=NULL) {
    ## This funciton reads allele frequency [B/(A+B)] from the file 'snps.txt', which must be present in the current directory.
    # alf=NULL
    # try( alf <- read.csv(file='snps.txt',header=T,sep='\t'), silent=T)
    # if (is.null(alf)) {
    #     try( alf <- read.csv(file='_snps.txt',header=T,sep='\t'), silent=T)
    #     if (!is.null(alf)) cat(' ..found _snps.txt')
    # }

    if(file.exists('snps.txt')) {
        alf <- read.csv(file='snps.txt',header=T,sep='\t')
    } else if(file.exists('_snps.txt')) {
        alf <- read.csv(file='_snps.txt',header=T,sep='\t')
    } else if(file.exists('raw_snps.txt')) {
        alf <- read.csv(file='raw_snps.txt',header=T,sep='\t')
        colnames(alf)[c(4,6)] <- c('Start','Value')
        alf$Chromosome <- paste('chr',alf$Chromosome,sep='')
        alf$Chromosome[alf$Chromosome == 'chr24'] <- 'chrX'
        alf$Chromosome[alf$Chromosome == 'chr25'] <- 'chrY'
    } else {
        print('No snps.txt found!')
    }
    #c("Chromosome", "Start", "End", "Value", "Array")

    return (alf) 
}
###
readSegments <- function() {
    ## This function reads segment information in 'segments.txt' which must be present in the current folder.
    ## The author recommends SNP-rank segmentation (NEXUS) or another CBS such as that in DNACopy.
    ## Using a HMM is not recommended unless you have a homogenous, diploid sample. (And then there is more user-friendly software anyway.)
    segments=NULL
    if(file.exists('segments.txt')) {
        segments <- read.csv(file='segments.txt',header=T,sep='\t')
    }
    # try( segments <- read.csv(file='segments.txt',header=T,sep='\t'),silent=T)
    # if (!is.null(segments)) cat(' ..found segments.txt')
    if (is.null(segments)) { 
        # try( segments <- read.csv(file='_segments.txt',header=T,sep='\t'),silent=T)
        # if (!is.null(segments)) cat(' ..found _segments.txt')
    }
    if(!is.null(segments)) {
        segments$Chromosome <- as.character(segments$Chromosome)
    }
    return (segments)
}
###
chrom_n <- function(data) {
    out=rep(Inf,length(data))
    for (c in c(1:22)) {
        out[data==paste('chr',c, sep='')]=c
    }
    out[data=='chrX']=23
    out[data=='chrY']=24
    out[data=='chrM']=25
    return(out)
}
makeRegions <- function(Log2, alf, segments,dataType='Nexus',min=30,matched=FALSE,allelePeaks=F) {
    ## makeRegions is similar to "regsfromsegs" except regions are not subdivided before calculation of mean Log-R and Allelic Imbalance Ratio.
    #browser()
    regions=segments
    regions$Chromosome=as.character(segments$Chromosome)            ## Chromosome
    regions$lengthMB=round((regions$End-regions$Start)/1000000,3)    ## length in megabases
    regions$probes=0                                                ## # of probes
    regions$snps=0                                                    ## # of bi-allelic probes (SNPs)
    #regions$log2=round(regions$Value,4)
    regions$imba=NA                                                 ## Allelic Imbalance Ratio
    regionIx=NULL                                                   ## Not currently used
    regionIx$Log2 <- list()
    regionIx$alf <- list()
    
    pMatch=segMatch(regions,Log2)
    na=is.na(pMatch[,1])
    regions=regions[!na,]
    pMatch=pMatch[!na,]
    sMatch=segMatch(regions,alf)
    
    for (i in 1:nrows(regions)) {
        #log2temp=which(equals(Log2$Chromosome,regions$Chromosome[i])) ## index of Log-R (current chrom)
        #alftemp=which(equals(alf$Chromosome,regions$Chromosome[i]))    ## index of Allele frequency (current chrom)
        
        #log2temp=log2temp [Log2$Start[log2temp]>=regions$Start[i] & Log2$Start[log2temp]<regions$End[i]]    ## index of Log-R (current segment)
        #alftemp=alftemp [alf$Start[alftemp]>=regions$Start[i] & alf$Start[alftemp]<regions$End[i]]        ## index of Allele frequency (current segment)
        
        log2temp=Log2$Value[pMatch[i,1]:pMatch[i,2]]    ## index of Log-R (current segment)
        alftemp=numeric(0)
        if (!is.na(sMatch[i,1])) alftemp=alf$Value[sMatch[i,1]:sMatch[i,2]]        ## index of Allele frequency (current segment)
        
        regions$probes[i]=length(log2temp)
        regions$snps[i]=length(alftemp)
        #regionIx$Log2[[i]]=log2temp            ## indexes of Log-R and Allele Frequency are saved for future use (plot color coding)
        #regionIx$alf[[i]]=alftemp
        #log2temp=Log2$Value[log2temp]
        regions$log2[i]=median(log2temp, na.rm=T)
        #alftemp=alf$Value[alftemp]
        temp = NA
        try(temp <- allelicImbalance(alftemp,min,matched=matched,allelePeaks=allelePeaks),silent=T)
        regions$imba[i]  <- temp

        # if (length(alftemp)>3) {                ## Prepare to calculate Allelic Imbalance Ratio
        #     if (dataType=="Nexus") t1=sort( abs(alftemp-0.5) ) ## distance from middle (het), ascending
        #     if (dataType=="CNCHP") t1=sort( abs(alftemp)-0 ) ## This is not currently in use, was intended for analysis of SNP6 .CNCHP data.
        #     if (length(unique(t1))>5) { ## Avoid calculating Allelic Imbalance Ratio unless there are several different values to cluster
        #         xx=NA
        #         xx=try(kmeans(t1, 2), silent=T)            ## This part is nearly identical to that of 'regsfromsegs()'
        #         try( if (min(xx$size) > 0.05*max(xx$size)) {
        #             xx=xx$centers
        #         } else xx=NA, silent=T)    
        #     } else xx=NA
        #     try(regions$imba[i] <- round( min(xx)/max(xx) ,2), silent=T)
        # }
        # if(matched == T | is.na(regions$imba[i])) {
        #     regions$imba[i] <- 2*median(abs(alftemp-.5),na.rm=T)
        # }
    }
    return(list('regions'=regions,'regionIx'=regionIx))
}
###
equals <- function(a,b) {
    ## a helper function in case factors are compared
    a <- as.character(a)
    b <- as.character(b)
    return (a==b)
}
###
nrows <- function(data) dim(data)[1] ## don't ask me about this one.
###
load.txt <- function(file) { ## just to be rid of "sep"
    read.csv(file,sep='\t') 
}
###
save.txt <- function(data,file='temp', row.names=F, col.names=T, append=F) {   ## simplified for convenience
    write.table(data,file=file,sep='\t',quote=F,row.names=row.names, col.names=col.names, append=append)
}
###
deChrom_ucsc <- function(data) {
    ##  chroms in 1:24 and chr1:chrY format conversion
    if (length(data)==0) return (data)
    keep_index=NULL
    CHR=rep(0,length(data))
    existing=unique(data)
    for (i in 1:length(existing))   {
        temp=strsplit(as.character(existing[i]),'_')
        temp=strsplit(temp[[1]],'chr')[[1]][2]
        if (temp=='X') temp='23'
        if (temp=='_X') temp='23'
        if (temp=='Y') temp='24'
        CHR[data==existing[i]]=as.integer(temp)
    }
    return (CHR)
}
###
chrom_ucsc <- function(data) {
    ## same as above, backwards
    n=length(data)
    out=rep("",n)
    for (i in 1:n) {
        temp=as.character(data[i])
        if (temp=='23') temp='X'
        if (temp=='24') temp='Y'
        out[i]=paste('chr',temp, sep='')
    }
    return(out)
}
###
subplot <- function(x, y) viewport(layout.pos.col=x, layout.pos.row=y) ## for subplots
vplayout <- function(x, y) {        
    grid.newpage()
    pushViewport(viewport(layout=grid.layout(y,x)))
}
###
getSubdirs <- function() {
    ## Reach for the subdirectories of current directory.
    dir=dir()
    here=getwd()
    n=length(dir)
    subdir=F
    dir_ix=rep(FALSE,n)
    for (i in 1:n) {
        try(setwd(dir[i]), silent=T)
        if (getwd()!=here) { # in another directory
            setwd('..')
            subdir=T
            dir_ix[i]=T
        }
    }
    if (subdir) return (dir[dir_ix])
    return (NULL)
}
###
thisSubdir <- function() { 
    ## get the name of this subdirectory
    here=getwd()
    here=strsplit(here,'/')[[1]]
    here=here[length(here)]
    return(here)
}
###
mySorter <- function(data) {
    ## sort data on $Chromosome and $Start
    temp=data[order(data$Start),]
    t=deChrom_ucsc(temp$Chromosome)
    temp=temp[!is.na(t),]
    t=t[!is.na(t)]
    temp=temp[order(t),]
    return (temp)
}
###
probesInRegions <- function(regions,probes) {
    ## This function is not in use.
    rchr=as.character(regions$Chromosome)
    pchr=as.character(probes$Chr)
    nprobes=0
    
    for (i in 1:length(rchr)) {
        t=strsplit(rchr[i],'chr')[[1]][2]
        here=probes[(t==pchr),] #finding probe subset on this chr
        matching=(here$Position>=regions$Start[i]) & (here$Position<=regions$End[i])
        nprobes[i]=sum(matching)
    }
    return(nprobes)
}
###
getOverlap <- function(s1,e1,s2,e2) { ## This is not currently used anywhere - it is destined for group studies and pileups.
    return(max((min(e1,e2)-max(s1,s2)),0))
}
###
weightedMedian <- function(data,weights) {
    try( if (length(weights)==0) return (NULL), silent=T)
    try( return (median(rep(data,weights),na.rm=T)), silent=T)
    return (NULL)
}
weightedMean <- function(data,weights) {
    if (length(weights)==0) return (NULL)
    return (mean(rep(data,weights),na.rm=T))
}
###
myMeans <- function(data,k) { ## Not currently used
    best <- 0
    clus <- list()
    for (i in 1:10) {
        clus[[1]] <- kmeans(data,k)
        var <- var(clus[[1]]$centers)
        if (var>best) {
            ix <- i
            best <- var
        }
    }
    return (clus[[ix]])
}
is.even <- function(data) {
    return (trunc(data/2)*2 == data)
}
is.odd <- function(data) {
    return (trunc(data/2)*2 != data)
}
neighbour <- function(data) {
    n <- length(data)
    dif <- data[2:n]-data[1:(n-1)]
    return (mean(dif))
}
is.autosome <- function(vector) {
    temp <- deChrom_ucsc(vector)
    return (temp<23)
}

## 
###
findCNs <- function(Log2,alf,allRegions,regs,name=thisSubdir(),maxCn=10,ceiling=1,sampleInfo=NULL) {
    ## This function takes an estimate of the Log-R of copy numbers 1, 2 and 3. At least two of these should be entered.
    ## Then, the Log-R and Allelic Imbalance Ratio of all possible copy number variants up to maxCn are estimated from
    ## the Log-R and Allelic Imbalance Ratio of all the segments. 
    if (is.null(sampleInfo)) cat ('there was no estimation available for',name)
    
    cns=1:maxCn; est=sampleInfo[1:3]; est[est==' ']=NA; est=as.numeric(as.character(est))
    m <- lm(2^est ~ cns[1:3])$coefficients # can handle one NA in est. This model is for "ratio as a function of copy number".
    est[is.na(est)] = log2(m[1]+cns[which(is.na(est))]*m[2]) # simple linear regression to fill the missing
    
    tix=NULL     #temporary index
    probes=NULL  #number of probes at each copy number, for weighting
    int=NULL     ## contains Log-R estimate of each (total) copy number
    ai=NULL         ## contains Allelic Imbalance Ratio estimate of each copy number variant.
    regions <- allRegions$regions
    regions <- regions[(is.autosome(regions$Chromosome)&regions$lengthMB>1)&(!is.na(regions$imba)),] ## will use these regions
    
    ## likely cn2 regions sit near the estimate.
    expectedAt <- est[2]
    tix$cn2 <- abs(regions$log2 - expectedAt) < diff(est)[2]/3    ## index of likely cn2 regions
    temp <- regions[tix$cn2,]                                ## cn2 regions
    med <- weightedMedian(temp$log2,temp$probes)            ## improved value of Log-R at cn2 (returns NULL if theres nothing there)
    probes[2] <- sum(temp$probes)
    int[2] <- ifelse(!is.null(med),med,expectedAt)            ## saved to int.
    
    ## likely cn1 regions sit near estimate                                         
    expectedAt <- est[1]                                     ## cn1 is expected here
    tix$cn1 <- abs(regions$log2 - expectedAt) < diff(est)[1]/3        ## index of likely cn1 regions
    temp <- regions[tix$cn1,]
    med <- weightedMedian(temp$log2,temp$probes)
    probes[1] <- ifelse(is.null(med), 50, sum(temp$probes))
    int[1] <- ifelse(!is.null(med),med,expectedAt)
    
    ## likely cn3 regions sit near estimate
    expectedAt <- est[3]
    tix$cn3 <- abs(regions$log2 - expectedAt) < diff(est)[2]/3
    temp <- regions[tix$cn3,]
    med <- weightedMedian(temp$log2,temp$probes)
    probes[3] <- ifelse(is.null(med), 50, sum(temp$probes))
    int[3] <- ifelse(!is.null(med),med,expectedAt)
    
    ## cn4 follows at ...
    m <- lm(2^int[1:3] ~ cns[1:3], weights=probes[1:3])$coefficients ## use regression to estimate.
    expectedAt <- log2(m[1]+4*m[2])
    tix$cn4 <- abs(regions$log2 - expectedAt) < mean(diff(int))/3
    temp <- regions[tix$cn4,]
    med <- weightedMedian(temp$log2,temp$probes)
    probes[4] <- ifelse(is.null(med), 50, sum(temp$probes))
    int[4] <- ifelse(!is.null(med),med,expectedAt)
    
    ## generalized for higher cns
    for (cn in 5:maxCn) {
        thisCn <- paste('cn',cn,sep='')
        prevCn <- paste('cn',cn-1,sep='')
        pprevCn <- paste('cn',cn-2,sep='')
        m <- lm(2^int[1:(cn-1)] ~ cns[1:(cn-1)], weights=probes[1:(cn-1)])$coefficients
        expectedAt <- log2(m[1]+cn*m[2])
        tix[[thisCn]] <- abs(regions$log2 - expectedAt) < mean(diff(int))/5
        temp <- regions[tix[thisCn][[1]],]
        med <- weightedMedian(temp$log2,temp$probes)
        probes[cn] <- ifelse(is.null(med), 0, sum(temp$probes))
        int[cn] <- ifelse(!is.null(med),med,expectedAt)
    }
    
    ## likely cn0 regions sit below cn1 - delta:
    expectedAt <- log2(m[1]+0*m[2])
    tix$cn0 <- abs(regions$log2 - expectedAt) < 0.5*(int[2]-int[1])
    temp <- regions[tix$cn0,]
    med <- weightedMedian(temp$log2,temp$probes)
    int0 <- ifelse(!is.null(med),med,expectedAt) ## "int0"
    
    
    ## Estimate tumor dna content from intensity-cn relationship and average ploidy --UNRELIABLE
    md <- lm(2^int ~ cns, weights=probes)$coefficients
    m=NULL; m$intercept=md[1][[1]]; m$k=md[2][[1]]
    probes[is.na(probes)]=0
    m$meanCn <- mean(rep(cns, probes), na.rm=T)
    m$theoretical_delta=1/m$meanCn
    #m$real_delta=0.57*m$theoretical_delta ## The 0.57 is empirical from cancer cell lines    
    #m$dnafrac=m$k/m$real_delta
    #m$cellfrac=1/(1+m$meanCn/2*(1/m$dnafrac-1))    
    

    
    loh_exp <- as.numeric(sampleInfo[4])
    ## at cn2, find the variant clusters (normal and CNNLOH)
    ix <- (abs(regions$log2 - int[2]) < 0.2*(int[3]-int[2]) ) # taking only closely-matching segments
    data <- regions[ix,]
    data <- data[!is.na(data$imba),] # ...with a calculated allelic imbalance.
    ## 2m1
    expectedAt=0.1
    ix <- abs(data$imba-.1) < abs(data$imba-loh_exp)
    med <- weightedMedian(data$imba[ix],data$snps[ix]) ## Average allelic imbalance weighted on snp count
    ai$cn2m1 <- ifelse (!is.null(med),med,expectedAt)
    ## loh
    expectedAt=loh_exp
    ix <- !ix
    med <- weightedMedian(data$imba[ix],data$snps[ix]) 
    ai$cn2m0 <- ifelse (!is.null(med),med,expectedAt)
    
    
    ## for cn1 (and 0)
    ix <- (abs(regions$log2 - int[1]) < 0.2*(int[2]-int[1]) )
    data <- regions[ix,]
    data <- data[!is.na(data$imba),]
    expectedAt <- (ai$cn2m0+ai$cn2m1)*3/5     ## Decent estimate.
    med <- weightedMedian(data$imba,data$snps) ## Average allelic imbalance weighted on snp count
    ai$cn1m0 <- ifelse (!is.null(med),med,expectedAt)    ## Will be NA if there was no CNNLOH
    ai$cn0m0 <- NA #unimportant
    
    ## for cn3:
    ix <- (abs(regions$log2 - int[3]) < 0.2*(int[4]-int[3]) )
    data <- regions[ix,]
    data <- data[!is.na(data$imba),]
    
    if (!is.na(ai$cn2m0)) { # CNNLOH is not missing
        range <- ai$cn2m0-ai$cn2m1 #the distance between 2normal and CNNLOH
        # get the 3(1) regions: 
        expectedAt <- ai$cn2m1+range/3 # this is an approx if cn3m1 is absent.
        
        ix <- (data$imba<ai$cn2m0) & (data$imba>ai$cn2m1) # take regions with less AI than cn2m0 but more than cn2m1
        med <- weightedMedian(data$imba[ix],data$snps[ix]) # average allelic imbalance weighted on snp count
        ai$cn3m1 <- ifelse (!is.null(med),med,expectedAt)
        
        # now for cn3m0
        expectedAt <- ai$cn3m1 + range*0.9 # approx of cn3m0
        
        ix <- (abs(data$imba-expectedAt) / abs(data$imba-ai$cn3m1)) < 0.5  # take those much closer to exp than to cn3m1
        med <- weightedMedian(data$imba[ix],data$snps[ix]) 
        try (if (med<ai$cn2m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
        ai$cn3m0 <- ifelse (!is.null(med),med,expectedAt)
    } else { ## (if CNNLOH was missing, use a different strategy.)
        data <- data[data$imba>ai$cn2m1,] # avoid 2(0) / 4(0) heterogeneity
        data <- data[data$imba<0.85,] # avoid highly-hom (clus-errors)
        #centers <- sort(kmeans(rep(data$imba,data$snps),2)$centers)## bad idea if either is missing! (common on low-mut samples)
        ix <- data[data$imba<0.4,]
        med <- weightedMedian(data$imba[ix],data$snps[ix])
        ai$cn3m1 <- ifelse (!is.null(med),med,ai$cn2m1+0.1)
        ix <- data[data$imba>0.4,]
        med <- weightedMedian(data$imba[ix],data$snps[ix])
        ai$cn3m0 <- ifelse (!is.null(med),med,ai$cn2m1+0.35)
        ai$cn2m0 <- ai$cn2m1+(ai$cn3m0-ai$cn3m1) # the CNNLOH estimate from CN3
        #print(cat('Warning: cn2m0 missing, estimated cn3m1,cn3m0 centers at',ai$cn3m1,ai$cn3m0,'\n'))
    }
    
    if (is.na(ai$cn1m0)) ai$cn1m0 <- (ai$cn3m1+ai$cn2m0)/2 ## If deletions were missing, place an estimate from cn3 
    
    ## now for cn4
    ix <- abs(regions$log2 - int[4]) < 0.2*(int[4]-int[3]) 
    data <- regions[ix,]
    data <- data[!is.na(data$imba),]
    
    # get the 4(2) regions: they are at AI about cn2m1
    expectedAt <- ai$cn2m1 # this is a good approx 
    
    ix <- data$imba<ai$cn3m1 # let all below cn3m1 in
    med <- weightedMedian(data$imba[ix],data$snps[ix]) 
    ai$cn4m2 <- ifelse (!is.null(med),med,expectedAt)
    if (ai$cn4m2 > ai$cn2m1) ai$cn4m2 <- ai$cn2m1 # don't let it sneak off.'
    
    data <- data[!ix,] # coutinue with the remaining
    # now 4(1) has less ai than 3(0) -> sits at about cn3m1+(cn3m1-cn2m1).
    expectedAt <- ai$cn3m1+(ai$cn3m1-ai$cn2m1)
    ix <- abs(data$imba-expectedAt)<abs(data$imba-ai$cn3m0) # take those closer to exp than to 3(0)
    med <- weightedMedian(data$imba[ix],data$snps[ix]) 
    ai$cn4m1 <- ifelse (!is.null(med),med,expectedAt)
    
    # now 4(0) is at about 3(0) + [0.9 - 3(0)] * [3(0)-2(0)] / [0.9-2(0)] 
    expectedAt <- ai$cn3m0 + (ceiling-ai$cn3m0)*(ai$cn3m0-ai$cn2m0)/(ceiling-ai$cn2m0)
    ix <- abs(data$imba-expectedAt) < 0.2*(expectedAt-ai$cn4m2) # we take those very close to exp
    med <- weightedMedian(data$imba[ix],data$snps[ix])
    try (if (med<ai$cn3m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
    ai$cn4m0 <- ifelse (!is.null(med),med,expectedAt)
    
    ## generalization for higher copy numbers. it gets complicated.
    for (cn in 5:maxCn) {
        thisCn <- paste('cn',cn,sep='')
        prevCn <- paste('cn',cn-1,sep='')
        pprevCn <- paste('cn',cn-2,sep='')
        
        ix <- (abs(regions$log2 - int[cn]) < 0.2*(int[cn]-int[cn-1]) )
        data <- regions[ix,]
        data <- data[!is.na(data$imba),]
        data <- data[data$lengthMB>3,] # long regions for safety
        
        ## try to find variants, starting with LOH
        # LOH such as 5(0)
        mi <- 0
        thisVariant=paste(thisCn,'m',0,sep='')
        c4m0 <- ai[paste(prevCn,'m',mi,sep='')][[1]] # relative naming for clarity
        c3m0 <- ai[paste(pprevCn,'m',mi,sep='')][[1]]
        
        expectedAt <- ceiling-((ceiling-c4m0)*(ceiling-max(c4m0,c3m0))/(ceiling-min(c3m0,c4m0)))
        #ai[thisVariant] <- expectedAt
        # we then take those close to exp (within delta[3(0),4(0)])
        ix <- abs(data$imba-expectedAt) < abs(c4m0 - c3m0) 
        med <- weightedMedian(data$imba[ix],data$snps[ix]) 
        try (if (med<ai$cn4m0) med <- NULL, silent=T)    ## in case of heterogeneities or strange signals, we disallow this effect.
        ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
        
        ## then from balanced to less balanced
        minorVariants=trunc(cn/2):1 
        first <- T
        for (mi in minorVariants) {
            thisVariant=paste(thisCn,'m',mi,sep='')
            if (mi==cn/2) {
                # We have balanced variant, so rather easy.
                expectedAt <- ai[paste(pprevCn,'m',mi-1,sep='')][[1]] # this is a good approx, balanced at cn-2
                ix <- data$imba<ai[paste(prevCn,'m',mi-1,sep='')][[1]] # let all below (cn-1, mcn-1) in
                med <- weightedMedian(data$imba[ix],data$snps[ix]) 
                ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
                if (ai[thisVariant] > ai[paste(pprevCn,'m',mi-1,sep='')][[1]]) ai[thisVariant] <- ai[paste(pprevCn,'m',mi-1,sep='')][[1]] # don't let it sneak off.'
                first <- F
            } else if (first) { 
                # its not balanced but its the most balanced of the unbalanced. something like 5(2)
                expectedAt <- 0.5*( ai[paste(prevCn,'m',mi,sep='')][[1]] + ai[paste(pprevCn,'m',mi-1,sep='')][[1]] ) # that means between 4(2) and 3(1)
                ix <- abs(data$imba-expectedAt) < ( ai[paste(prevCn,'m',mi,sep='')][[1]] - ai[paste(pprevCn,'m',mi-1,sep='')][[1]] ) /3 # let all "between" 4(2) and 3(1) in
                med <- weightedMedian(data$imba[ix],data$snps[ix]) 
                ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
                first <- F
            } else {
                # not the most balanced unbalanced variant, for example 5(1):
                expectedAt <- ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]] + 
                    (minorVariants[1]-mi) * (ai[paste(thisCn,'m',0,sep='')][[1]] - 
                                                 ai[paste(thisCn,'m',minorVariants[1],sep='')][[1]]) / trunc(cn/2) # 5(2) + (which)* 5(0)-5(2) /(n)
                ix <- abs(data$imba-expectedAt) < ( ai[paste(prevCn,'m',mi,sep='')][[1]] - ai[paste(pprevCn,'m',mi-1,sep='')][[1]] ) /3 # let all "between" 4(1) and 3(0) in
                med <- weightedMedian(data$imba[ix],data$snps[ix]) 
                ai[thisVariant] <- ifelse (!is.null(med),med,expectedAt)
            } 
        } # done with minor variants
    } # done with copy numbers
    int=as.list(c(int0,int)); names(int)=paste('cn',0:maxCn,sep='')
    return(list('int'=int,'ai'=ai,'model'=m))
}



###
setCNs <- function(allRegions,int,ai,model,maxCn=12) {
    ##  Assign total and minor copy numbers to all segments.
    regions <- allRegions$regions[,-4]    ## This time, work on all segments available.
    
    Cn <- NULL            ## Total copy number
    mCn <- NULL            ## Minor allele copy number
    Cnx <- NULL        ## Variant label. ('cnXmY')
    
    intDist <- NULL        ## distance to certain Log-R
    imbaDist <- NULL    ## distance to certain allelic imbalance
    
    ## give each segment the best matching CN and mCN
    for (i in 1:nrow(regions)) {
        # set total copy number
        distance <- Inf
        for (cn in 0:maxCn) {
            t_int <- int[[paste('cn',cn,sep='')]]   ## get Log-R of particular cn from 'int'
            t_dis <- abs(regions$log2[i]-t_int)            ## distance to that particular cn
            try (if (t_dis < distance) {                        ## nearest so far, save.
                distance <- t_dis -> intDist[i]
                Cn[i] <- cn
            }, silent=T)
        }
    }        

    ### Calculate model based Cns
    Cnx=Cn; for (cn in 0:maxCn) {
        ix <- Cn==cn
        ix[is.na(ix)] <- F
        Cnx[ix]=(cn + (2^regions$log2[ix]-2^int[[paste('cn',cn,sep='')]])/model$k)
    }; Cnx[Cnx<0]=0
    Cn=round(Cnx)
    
    ## Set minor CN
    for (i in (1:nrow(regions))[!is.na(Cn)]) {
        # set minor CN
        distance <- Inf
        if (Cn[i]<=1) {
            mCn[i] <- 0
        } else if (Cn[i]<=maxCn & !is.na(regions$imba[i])) for (m in 0:trunc(Cn[i]/2)) {
            t_ai <- ai[paste('cn',Cn[i],'m',m,sep='')][[1]]
            t_dis <- abs(regions$imba[i]-t_ai)
            #cat('Line',i,'Cn', Cn[i],'m:',m,'Comparing',t_dis,'with',t_int,'.......')
            try( if (t_dis < distance) {
                distance <- t_dis -> imbaDist[i]
                mCn[i] <- m
            }, silent=T)
        } else mCn[i] <- NA
        #fullCN[i] <- paste('cn',Cn[i],'m',mCn[i],sep='') # Full description
    }
    ### Do not calculate mCn for high copy numbers
    mCn[Cn>maxCn] <- NA
    
    
    ## Special treatment for the X chromosome if male sample: work in progress
    #temp=regions[regions$Chromosome=='chrX' & regions$lengthMB>5,]
    
    
    regions$Cn <- Cn
    regions$mCn <- mCn
    regions$Cn_ <- round(Cnx,2)
    #regions$fullCN <- fullCN

    ## Merge consecutive identical variants
    data <- regions[,c(-7,-8,-11)]
    fullCN=paste(data$Cn,data$mCn,sep='m')
    row <- 1
    while (row < nrow(data)) { # while not on the last row
        #print(row)
        if (((data$Chromosome[row] == data$Chromosome[row+1]) & (fullCN[row] == fullCN[row+1])) & ( (data$Start[row+1]-data$End[row])<5000 ) ) { 
            ## segments are adjacent with same copy number and not separated by 5kb+ (centromere)
            data$End[row] <- data$End[row+1]
            data$probes[row] <- data$probes[row]+data$probes[row+1]
            data$snps[row] <- data$snps[row]+data$snps[row+1]
            data$lengthMB[row] <- data$lengthMB[row]+data$lengthMB[row+1]
            data <- data[-(row+1),]  # Merges and deletes row
        } else {
            row <- row+1 # or leaves it
        }
    }
    
    ### beräkna tumorcellshalt!!
    # Mha modellparameter
    ## empirical delta-R for ±one copy in diploid sample: 0.63 for Cyto/SNP6 and 0.48 for Illu
    model$k2=model$k*model$meanCn
    model$tumorDNA_fromLogR=model$k2/0.63
    model$tumorCell_fromLogR=1/ (1+model$meanCn/2* (1/model$tumorDNA_fromLogR -1))
    
    # Mha SNPs
    cn_near=c(floor(model$meanCn),ceiling(model$meanCn))
    ai_near=c(ai[[paste('cn',cn_near[1],'m0',sep='')]],ai[[paste('cn',cn_near[2],'m0',sep='')]])
    base_ai = ai_near[1] + (model$meanCn-trunc(model$meanCn))*diff(ai_near)
    
    #---The reference data (H1395):
    ref_ai=c(.8,.5,.33,.2,.1)
    ref_TumorCell=c(1,.7,.5,.3,0)
    ref_meanCn=2.5
    ref_tumorDNA=ref_TumorCell*ref_meanCn/(ref_TumorCell*ref_meanCn + (1-ref_TumorCell)*2)
    m2=lm(ref_tumorDNA ~ poly(ref_ai, 2, raw=TRUE))
    #---
    
    model$tumorDNA_fromAlf <- m2[[1]][[1]] + base_ai*m2[[1]][[2]] + base_ai^2*m2[[1]][[3]]
    model$tumorCell_fromAlf=1/ (1+model$meanCn/2* (1/model$tumorDNA_fromAlf -1))
    
    return (list('regions'=regions,'merged'=data,'model'=model))
}


karyotype_check <- function(chr,start,end,int,ai,Cn,mCn,t,name='') { #xlim=c(-1.02,1.02),ylim=0:1) {
    ## TAPS scatter plot of a full sample, used for visual quality control. 
    
    png(paste(name,'.karyotype_check.png',sep=''),width=1300,height=1300)
    
    #ideogram=getIdeogram()
    colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
    colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
    
    aix=!(is.na(mCn)|is.na(Cn))
    chr=chr[aix]
    start=start[aix]
    end=end[aix]
    int=int[aix]
    ai=ai[aix]
    Cn=Cn[aix]
    mCn=mCn[aix]
    pos <- (start+end)/2
    length=end-start
    labels=paste(Cn,mCn,sep='m')
    
    size=rep(1,length(chr))
    size[length>2000000]=2
    size[length>5000000]=3
    size[length>10000000]=4
    
    
    
    variants <- unique(labels)                ## The variants present in this data
    variants_data <- NULL    
    for (v in variants) {                            ## Extracts the Log-R and Allelic Imbalance Ratio of variants
        t_cn <- paste('cn',strsplit(v,'m')[[1]][1],sep='')
        t_int <- as.numeric(t$int[t_cn][[1]])
        t_ai <- as.numeric(t$ai[paste('cn',v,sep='')][[1]])
        variants_data <- rbind(variants_data,c(t_int,t_ai))
    }
    
    
    col <- rep('#B0B0B070',length(chr))
    
    plot(int,ai,
         pch=16,
         cex=c(size),
         cex.lab=2,
         mar=c(0.1,0.1,0.1,0.1),
         main = "",
         xlab = "",
         ylab = "",
         col = col,
         xlim=c(-1,2),ylim=c(0,1))
    
    text(variants_data[,1],variants_data[,2],
         labels=variants,
         col='black',
         cex=2
    )
    
    mtext("Allelic imbalance",side=2,line=2,cex=2)
    mtext("Average log-ratio",side=1,line=2.3,cex=2)
    mtext(name,side=3,line=1.5,cex=2)


    dev.off()
    
}

# getIdeogram <- function() {
#     c=1:24
#     chr=as.character(c)
#     chr[23:24]=c('X','Y')
#     chr=paste('chr',chr,sep='')
#     length=c(249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663, 
#              146364022, 141213431, 135534747, 135006516, 133851895, 115169878, 107349540, 
#              102531392,  90354753,  81195210,  78077248,  59128983,  63025520,  48129895,  
#              51304566, 155270560,  59373566)
#     start=c(121500000,  90500000,  87900000,  48200000,  46100000,  58700000,  58000000,  
#             43100000,  47300000,  38000000,  51600000,  33300000,  16300000,  16100000,  
#             15800000,  34600000,  22200000,  15400000,  24400000,  25600000,  10900000,  
#             12200000,  58100000,  11600000)
#     mid=c(1.25e+08, 9.33e+07, 9.10e+07, 5.04e+07, 4.84e+07, 6.10e+07, 5.99e+07, 4.56e+07, 
#           4.90e+07, 4.02e+07, 5.37e+07, 3.58e+07, 1.79e+07, 1.76e+07, 1.90e+07, 3.66e+07, 
#           2.40e+07, 1.72e+07, 2.65e+07, 2.75e+07, 1.32e+07, 1.47e+07, 6.06e+07, 1.25e+07)
#     end=c(128900000,  96800000,  93900000,  52700000,  50700000,  63300000,  61700000,  
#           48100000,  50700000,  42300000,  55700000,  38200000,  19500000,  19100000,  
#           20700000,  38600000,  25800000,  19000000,  28600000,  29400000,  14300000,  
#           17900000,  63000000,  13400000)
#     return(data.frame(c,chr,length,start,mid,end))
# }





## Pileups
pileup <- function(regions) { #, nsamples) {
    if (nrow(regions)==0) return(data.frame(Chromosome=integer(0),Start=integer(0),End=integer(0),Count=integer(0)))
    chrs <- unique(as.character(regions$Chromosome))
    if (length(chrs)>1) { # if more than one chrom, run them separately.
        pile=NULL
        for (i in 1:length(chrs)) pile <- rbind(pile,pileup(regions[as.character(regions$Chromosome)==chrs[i],]))
        #pile$Percent <- round(100*pile$Count/nsamples) # this is done once.
        return(pile[order(pile$Chromosome),])
    } else {
        c=as.character(regions$Chromosome[1])
        starts=data.frame(Chromosome=c,Start=regions$Start,End=regions$Start,type='start')
        ends=data.frame(Chromosome=c,Start=regions$End,End=regions$End,type='end')
        pile <- rbind(starts,ends)
        pile <- pile[order(pile$Start),]
        pile$Count <- 1
        
        for (i in 2:nrow(pile)) {
            pile$Count[i] <- pile$Count[i-1] + ifelse(pile$type[i]=='start',1,-1)
            pile$End[i-1] <- pile$Start[i]
        }
        pile <- pile[(pile$End > pile$Start) & (pile$Count > 0),]
        return(pile)
    }
}

pileup_dif <- function(regions1,regions2) {
    chrs <- unique(c(as.character(regions1$Chr),as.character(regions2$Chr)))
    if (length(chrs)==0) return(data.frame(Chromosome=integer(0),Start=integer(0),End=integer(0),Count1=integer(0),Count2=integer(0)))
    if (length(chrs)>1) { # if more than one chrom, run them separately.
        pile=NULL
        for (i in 1:length(chrs)) pile <- rbind(pile,pileup_dif(regions1[as.character(regions1$Chr)==chrs[i],], regions2[as.character(regions2$Chr)==chrs[i],]))
        #pile$Percent <- round(100*pile$Count1/n1 - 100*pile$Count2/n2) # this is done once.
        #pile <- pile[pile$Percent != 0,]
        return(pile[order(pile$Chr),])
    } else {
        c <- chrs
        starts1 <- starts2 <- ends1 <- ends2 <- NULL
        if (nrow(regions1)>0) {
            starts1=data.frame(Chromosome=c,Start=regions1$Start,End=regions1$Start,type='start',grp=1)
            ends1=data.frame(Chromosome=c,Start=regions1$End,End=regions1$End,type='end',grp=1)
        }
        if (nrow(regions2)>0) {
            starts2=data.frame(Chromosome=c,Start=regions2$Start,End=regions2$Start,type='start',grp=2)
            ends2=data.frame(Chromosome=c,Start=regions2$End,End=regions2$End,type='end',grp=2)
        }
        pile <- rbind(starts1,ends1,starts2,ends2)
        pile <- pile[order(pile$Start),]
        pile$Count1 <- ifelse(pile$grp[1]==1,1,0)
        pile$Count2 <- ifelse(pile$grp[1]==2,1,0)
        
        for (i in 2:nrow(pile)) {
            if (pile$type[i]=='start') {
                pile$Count1[i] <- pile$Count1[i-1] + ifelse(pile$grp[i]==1,1,0)
                pile$Count2[i] <- pile$Count2[i-1] + ifelse(pile$grp[i]==2,1,0)
            } else {
                pile$Count1[i] <- pile$Count1[i-1] - ifelse(pile$grp[i]==1,1,0)
                pile$Count2[i] <- pile$Count2[i-1] - ifelse(pile$grp[i]==2,1,0)
            }
            pile$End[i-1] <- pile$Start[i]
        }
        
        pile <- pile[(pile$End > pile$Start),]
        return(pile)
    }
}


pvals <- function(regions,tot1,tot2) {
    if (nrow(regions)==0) return(data.frame(Chromosome=integer(0),Start=integer(0),End=integer(0),Count1=integer(0),Count2=integer(0),
                                            percent1=integer(0),precent2=integer(0),precent=integer(0),p.value=integer(0),
                                            odds.ratio=integer(0),conf_low=integer(0),conf_high=integer(0)))
    p <- rep(1,nrow(regions))
    or <- rep(1,nrow(regions))
    conf_low <- rep(NA,nrow(regions))
    conf_high <- rep(NA,nrow(regions))
    for (i in 1:nrow(regions)) {
        temp <- matrix(c(regions$Count1[i],regions$Count2[i],tot1-regions$Count1[i],tot2-regions$Count2[i]),
                       nrow=2,
                       dimnames=list(c('rec','ej'),c('have','nohave')))
        temp <- fisher.test(temp)
        p[i] <- temp$p.value
        or[i] <- temp$estimate
        conf_low[i] <- temp$conf.int[1]
        conf_high[i] <- temp$conf.int[2]
    }
    regions$percent1 <- round(100*regions$Count1/tot1)
    regions$percent2 <- round(100*regions$Count2/tot2)
    regions$percent <- regions$percent1-regions$percent2
    regions$p.value <- p 
    regions$odds.ratio <- or
    regions$conf_low <- conf_low
    regions$conf_high <- conf_high
    return (regions)#[order(regions$p.value,decreasing=F),])
}

getGenes <- function(regs,genes) {
    n <- nrow(regs)
    res <- rep('NA',n)
    for (i in 1:n) {
        ix <- genes$Chromosome==regs$Chromosome[i]
        ix2 <- (genes$End>regs$Start[i])&(genes$Start<regs$End[i])
        ix <- ix & ix2
        temp <- genes$Name[ix]
        if (length(temp)==0) temp <- '-'
        temp <- paste(unique(temp),collapse=',')
        res[i] <- temp
    }
    return(res)
}

vectorMatch <- function(v1,v2) {
    v1 <- as.character(v1); v2 <- as.character(v2)
    n1 <- length(v1); n2 <- length(v2) ## vector lengths
    
    ix1 <- order(v1); ix2 <- order(v2) ## how to sort them
    ori1 <- (1:n1)[ix1]; ori2 <- (1:n2)[ix2] ## how to restore the sorted
    
    v1 <- v1[ix1]; v2 <- v2[ix2] ## now sort
    mix <- rep(NA,n1) ## for each in sorted1, which is it in original 2?
    i1=1; i2=1
    while((i1 <= n1) & (i2 <= n2)) {
        if (v1[i1]>v2[i2]) {
            i2 <- i2+1
        } else if (v1[i1]<v2[i2]) {
            i1 <- i1+1
        } else { ## we have a match. 
            mix[i1] <- ori2[i2]       ## mix[i1] must point to the original pos of i2, in vector 2.
            i1 <- i1+1
            i2 <- i2+1
        }
    }
    return (mix[order(ori1)])
}

pileup_pos <- function(chr,pos,names,chroms, binsize=5000000) {
    data1 <- data.frame(chr=as.character(chr),pos=as.integer(pos),name=names)
    data <- unique(data1[,1:2])
    
    allbins <- NULL
    #binsize <- 5000000 ###warning
    for (i in 1:24) {
        chr <- as.character(chroms$Chr[i])
        pos <- data$pos[data$chr==chr]
        breaks <- c(0,seq(0,chroms$length[i],binsize)+binsize)
        histo <- hist(pos,breaks=breaks,plot=F)
        start <- breaks[-length(histo$breaks)]
        end <- breaks[-1]
        mid=histo$mid
        count <- histo$count
        
        # now add number of samples that make up the count (UNFINISHED)
        for (j in 1:length(start)) { # for every segment
            ix <- pos>=start[j] & pos<=end[j] # which positions are in
            samples[j] <- length(unique(names[ix]))
        }
        
        allbins <- rbind(allbins,data.frame(chr,start,mid,end,count))
    }
    e <- 0 -> s
    for (i in 1:24) s[allbins$chr==chroms$Chr[i]] <- e[allbins$chr==chroms$Chr[i]] <- chroms$before[i]
    allbins$s <- s+allbins$start
    allbins$e <- e+allbins$end
    return(allbins)
}

pileup_copies <- function(regions, chroms=NULL) {
    chrs <- unique(as.character(regions$Chromosome))
    if (length(chrs)>1) { # if more than one chrom, run them separately.
        pile=NULL
        for (i in 1:length(chrs)) pile <- rbind(pile,pileup_copies(regions[as.character(regions$Chromosome)==chrs[i],],chroms))
        return(pile[order(pile$Chromosome),])
    } else {
        c=as.character(regions$Chromosome[1])
        starts=data.frame(Chromosome=c,Start=regions$Start,End=regions$Start,type='start', Cn=regions$Cn, mCn=regions$mCn)
        ends=data.frame(Chromosome=c,Start=regions$End,End=regions$End,type='end', Cn=regions$Cn, mCn=regions$mCn)
        pile <- rbind(starts,ends)
        pile <- pile[order(pile$Start),]
        cn <- c('cn0','cn1','cn2','cn3','cn4','cn5')
        try(pile$Cn[pile$Cn>5] <- 5, silent=T)
        pile$mCn[is.na(pile$mCn)] <- -1
        pile$cn0hom <- 0
        pile$cn1hom <- 0
        pile$cn2 <- 0   
        pile$cn3 <- 0
        pile$cn4 <- 0   
        pile$cn5 <- 0
        #pile$cn6 <- 0   
        pile$cn2hom <- 0   
        pile$cn3hom <- 0
        pile$cn4hom <- 0   
        pile$cn5hom <- 0
        #pile$cn6hom <- 0   
        
        n <- nrow(pile)
        thisCn <- paste('cn',pile$Cn[1],sep='')
        if (pile$mCn[1]==0) thisCn <- paste(thisCn,'hom',sep='')
        pile[[thisCn]][1:n] <- 1
        #pile$loh[1:n] <- ifelse(pile$mCn[1]==0,1,0)
        
        for (i in 2:n) {
            thisCn <- paste('cn',pile$Cn[i],sep='')
            if (pile$mCn[i]==0) thisCn <- paste(thisCn,'hom',sep='')
            pile[[thisCn]][i:n] <- pile[[thisCn]][i-1] + ifelse(pile$type[i]=='start',1,-1)            
            #if (pile$mCn[i]==0) pile$loh[i:n] <- pile$loh[i-1] + ifelse(pile$type[i]=='start',1,-1)
            pile$End[i-1] <- pile$Start[i]
        }
        pile <- pile[(pile$End > pile$Start),]
        if (!is.null(chroms)) {
            e <- 0 -> s
            for (i in 1:24) s[pile$Chromosome==as.character(chroms$Chr[i])] <- e[pile$Chromosome==as.character(chroms$Chr[i])] <- chroms$before[i]
            pile$s <- s+pile$Start
            pile$e <- e+pile$End
        }
        return(pile)
    }
}

addGenes <- function(data,genes) {
    if (nrow(data)==0) return(data)
    data$genes=''
    for (i in 1:nrow(data)) {
        ix=which(genes$chrom==as.character(data$Chromosome[i]) & genes$gtxStart<data$End[i] & genes$gtxEnd>data$Start[i])
        if (length(ix)>0) data$genes[i]=paste(sort(unique(genes$name2[ix])),collapse=', ')
    }
    return(data)
}

### Markus 2013.
### Function for summarizing alteration frquencies in X samples
TAPS_freq <- function(samples='all', outdir='frequencies', hg19=T) {
    #suppressPackageStartupMessages(library(xlsx))    
    #browser()
    sampleData <- load.txt('SampleData.csv')
    olddir <- getwd()
    if (!is.na(outdir)) {
        try(dir.create(outdir), silent=T)
        setwd(outdir)
    }
    
    subs=as.character(sampleData$Sample)
    
    if (is.null(subs)) {
        subs=thisSubdir()
        setwd('..')
    }
    
    if (samples[1]=='all') samples=rep(T,length(subs))
    if (is.logical(samples)) samples=which(samples)
    subs=subs[samples]
    nSamples=length(subs)
    
    ## Load and parse all samples
    samples <- meanCns <- NULL
    
    for (i in 1:nSamples) try( {
        #cat(subs[i],'\n')
        table <- load.txt(paste(olddir,'/',subs[i],'/',subs[i],'_segmentCN.txt',sep=''))
        table$n <- i
        table$name <- as.character(subs[i])    
        ix <- as.numeric(deChrom_ucsc(table$Chromosome)) <= 22
        table=table[!is.na(ix),]
        ix <- as.numeric(deChrom_ucsc(table$Chromosome)) <= 22
        table$meanCn <- meanCns[i] <- round(weightedMean(table$Cn_[ix], table$lengthMB[ix]),2)    
        samples <- rbind(samples,table) 
    }, silent=T)

    samples=samples[!is.na(samples$Cn),]
    regs=samples[samples$Chromosome!='chrY',]
    
    if (hg19) genes <- knownGene else genes <- knownGene_hg18
    genes$name2=genes$gAlias
    genes$chrom=genes$chr
    
    
    n=length(unique(regs$name))
    
    gaincolor='#007602'
    losscolor='#FF0000'
    lohcolor='#0015FF'
    relhighcolor='#00CE03'
    lowcolor='#FF1FE1'
    lowlohcolor='#0084FF'
    highgaincolor='#003300'
    homlosscolor='#660066'
    focalgaincolor='#333300'
    focallosscolor='#9933FF'
    
    ## Gain:
    sum_regionSet(chroms, chromData, genes,regs[regs$Cn>2,],n,comparison='Absolute Gain (>2 copies)',color=gaincolor)
    ## Loss:
    sum_regionSet(chroms, chromData, genes,regs[regs$Cn<2,],n,comparison='Absolute Loss (<2 copies)',color=losscolor)
    ## LOH
    aix=!is.na(regs$mCn)
    sum_regionSet(chroms, chromData, genes,regs[aix,][regs[aix,]$mCn==0,],n,comparison='LOH (minor=0)',color=lohcolor) 
    ## Relative Gain (more than 25% above the average)
    sum_regionSet(chroms, chromData, genes,regs[regs$Cn>regs$meanCn*1.25,],n,comparison='Relative Gain (>1.25*average)',color=relhighcolor)
    ## Relative Loss (up to 3/4 of average)
    sum_regionSet(chroms, chromData, genes,regs[regs$Cn<0.75*regs$meanCn,],n,comparison='Relative Loss (<0.75*average)',color=lowcolor)
    ## Low and LOH
    sum_regionSet(chroms, chromData, genes,regs[aix,][regs[aix,]$mCn==0 & regs[aix,]$Cn<0.75*regs$meanCn[aix],],n,comparison='Low&LOH',color=lowlohcolor)
    ## High Gain (more than 3 times the average)
    sum_regionSet(chroms, chromData, genes,regs[regs$Cn>regs$meanCn*3,],n,comparison='High Gain (>3*average)',color=highgaincolor)
    ## Homozygous Loss
    sum_regionSet(chroms, chromData, genes,regs[regs$Cn==0,],n,comparison='Homozygous Loss (0 copies)',color=homlosscolor)
    ## Focal Gain
    sum_regionSet(chroms, chromData, genes,regs[regs$Cn>regs$meanCn*2 & regs$lengthMB<1,],n,comparison='Focal Gain (<1Mb,>2*average)',color=focalgaincolor)
    ## Focal Loss
    sum_regionSet(chroms, chromData, genes,regs[regs$Cn<regs$meanCn*.67 & regs$lengthMB<1,],n,comparison='Focal Loss (<1Mb,<0.67*average)',color=focallosscolor)
    setwd(olddir)
}

### Generalized comparing function used by the above function 
sum_regionSet <- function(chroms, chromData, genes, 
                          regs, n, 
                          comparison='', 
                          color='#000000') {    
    
    p_cutoff <- 0.05
    freq_cutoff <- 20
    
    # Generate frequency plots            
    pile <- pileup(regs); pile$Percent <- 100*pile$Count / n
    save.txt(addGenes(pile,genes),file=paste(comparison,'frequencies','txt',sep='.'))
    
    ### Total frequency plot    
    #quartz(file=paste(comparison,'freq_dif',name1,name2,'png',sep='.'),width=15,height=4.5,dpi=300,type='png')
    png(filename=paste(comparison,'png',sep='.'),width=3000,height=800)
    ylim <- c( -15, 110)
    plot(1,1,type='n',
         bty='n', ann=T, mar=c(0,0,0,0), oma=c(0,0,0,0),
         main=paste(comparison,'frequency'), cex.main=2,
         xlab=NA,#'',#Genomic position (??????=significance)',
         ylab=NA,#'Alteration frequency difference (% units)', cex.lab=2,
         xlim = c(0,sum(as.numeric(chroms$length))),
         ylim = ylim,
         yaxt="n",
         xaxt="n"
         )
    #mtext(text="Allelic imbalance",side=2,line=4,las=3,cex=2)
    #mtext(text="Coverage, all segments",side=1,line=4,cex=2,las=1)
    
    if (nrow(pile)>0) {
        data <- pile
        data$e <- 0 -> data$s
        for (i in 1:23) data$s[data$Chromosome==as.character(chroms$Chr[i])] <- chroms$before[i] -> data$e[data$Chromosome==as.character(chroms$Chr[i])]
        data$s <- data$s+data$Start
        data$e <- data$e+data$End
        rect( 
            xleft=data$s, xright=data$e,
            ybottom=0,ytop=data$Percent,
            col=color,   
            border=NA,
        )
    }
    ## The chrom labels
    text(
        x=chroms$before+chroms$length/2,
        y= -15,
        cex=2,
        label=substr(chroms$chr,4,6)
    )
    
    ## The chromosomes
    rect(xleft=chromData$genomeStart,xright=chromData$genomeEnd,
         ybottom= -5-chromData$thickness,ytop= -5+chromData$thickness,
         col=paste(chromData$col,'80',sep=''),
         lty=0
    )
    
    ## The vertical lines
    segments(
        x0=c(chroms$before,sum(as.numeric(chroms$length))),x1=c(chroms$before,sum(as.numeric(chroms$length))),
        y0= -15,y1=100,                
        col='#000000',
        lwd=1
    )
    
    # Horizontal lines
    segments(
        x0=0,x1=sum(as.numeric(chroms$length)),
        y0=seq(0,80,10),y1=seq(0,80,10),                
        col='#00000070',
        lwd=1,lty=3
    )
    
    ## Y axis ticks
    axis(2,at=seq(0,100,20), 
         labels=abs(seq(0,100,20)), pos=0,
         cex.axis=2,tck=-0.01, las=2)
    axis(4,at=seq(0,100,20), 
         labels=abs(seq(0,100,20)), pos=sum(as.numeric(chroms$length)),
         cex.axis=2,tck=-0.01, las=2)
    dev.off()  
}

########### Group comparisons
TAPS_compare <- function(grp1, grp2, name1='1', name2='2', outdir='frequencies_comp', hg19=T) {

    #suppressPackageStartupMessages(library(xlsx))    

    sampleData=load.txt('SampleData.csv')
    
    subs=as.character(sampleData$Sample)
    
#     if (is.null(subs)) {
#         subs=thisSubdir()
#         subs=subs[subs!='frequencies' & subs!='frequencies_comp']
#         setwd('..')
#     }
    
    olddir <- getwd()
    if (!is.na(outdir)) {
        try(dir.create(outdir), silent=T)
        setwd(outdir)
    }
    

    
    if (is.logical(grp1)) grp1=which(grp1)
    if (is.logical(grp2)) grp2=which(grp2)
    if (is.numeric(grp1)) grp1=subs[grp1]
    if (is.numeric(grp2)) grp2=subs[grp2]
    grps=unique(c(grp1,grp2))
    
    ## Load and parse all samples
    samples <- meanCns <- NULL
    nSamples=length(grps)
    
    for (i in 1:nSamples) {
        table <- load.txt(paste(olddir,'/',grps[i],'/',grps[i],'_segmentCN.txt',sep=''))
        table$n <- i
        table$name <- as.character(grps[i])    
        ix <- as.numeric(deChrom_ucsc(table$Chromosome)) <= 22
        table$meanCn <- meanCns[i] <- round(weightedMean(table$Cn_[ix], table$lengthMB[ix]),2)    
        samples <- rbind(samples,table) 
    }
    
    samples=samples[!is.na(samples$Cn),]
    samples=samples[samples$Chromosome!='chrY',]
    
    if (hg19) genes <- knownGene else genes <- knownGene_hg18
    genes$name2=genes$gAlias
    genes$chrom=genes$chr
    
    regs1=samples[samples$name %in% grp1,]
    regs2=samples[samples$name %in% grp2,]
    n1=length(grp1)
    n2=length(grp2)
    
    gaincolor='#007602'
    losscolor='#FF0000'
    lohcolor='#0015FF'
    relhighcolor='#00CE03'
    lowcolor='#FF1FE1'
    lowlohcolor='#0084FF'
    highgaincolor='#003300'
    homlosscolor='#660066'
    focalgaincolor='#333300'
    focallosscolor='#9933FF'
    
    ## Gain:
    compare_regionSet(chroms, chromData, genes,regs1[regs1$Cn>2,],regs2[regs2$Cn>2,],n1,n2,comparison='Absolute Gain (>2 copies)',name1,name2,color=gaincolor)
    ## Loss:
    compare_regionSet(chroms, chromData, genes,regs1[regs1$Cn<2,],regs2[regs2$Cn<2,],n1,n2,comparison='Absolute Loss (<2 copies)',name1,name2,color=losscolor)
    ## LOH
    aix1=!is.na(regs1$mCn); aix2=!is.na(regs2$mCn)
    compare_regionSet(chroms, chromData, genes,regs1[aix1,][regs1[aix1,]$mCn==0,],regs2[aix2,][regs2[aix2,]$mCn==0,],n1,n2,comparison='LOH (minor=0)',name1,name2,color=lohcolor) 
    ## Relative Gain (more than 25% above the average)
    compare_regionSet(chroms, chromData, genes,regs1[regs1$Cn>regs1$meanCn*1.25,],regs2[regs2$Cn>regs2$meanCn*1.25,],n1,n2,comparison='Relative Gain (>1.25*average)',name1,name2,color=relhighcolor)
    ## Relative Loss (up to 3/4 of average)
    compare_regionSet(chroms, chromData, genes,regs1[regs1$Cn<0.75*regs1$meanCn,],regs2[regs2$Cn<0.75*regs2$meanCn,],n1,n2,comparison='Relative Loss (<0.75*average)',name1,name2,color=lowcolor)
    ## Low and LOH
    compare_regionSet(chroms, chromData, genes,regs1[aix1,][regs1[aix1,]$mCn==0 & regs1[aix1,]$Cn<0.75*regs1$meanCn[aix1],],regs2[aix2,][regs2[aix2,]$mCn==0 & regs2[aix2,]$Cn<0.75*regs2$meanCn[aix2],],n1,n2,comparison='Low&LOH',name1,name2,color=lowlohcolor)
    ## High Gain (more than 3 copies above the average)
    compare_regionSet(chroms, chromData, genes,regs1[regs1$Cn>regs1$meanCn*3,],regs2[regs2$Cn>regs2$meanCn*3,],n1,n2,comparison='High Gain (>3*average)',name1,name2,color=highgaincolor)
    ## Homozygous Loss
    compare_regionSet(chroms, chromData, genes,regs1[regs1$Cn==0,],regs2[regs2$Cn==0,],n1,n2,comparison='Homozygous Loss (0 copies)',name1,name2,color=homlosscolor)
    ## Focal Gain
    compare_regionSet(chroms, chromData, genes,regs1[regs1$Cn>regs1$meanCn*2 & regs1$lengthMB<1,],regs2[regs2$Cn>regs2$meanCn*2 & regs2$lengthMB<1,],n1,n2,comparison='Focal Gain (<1Mb,>2*average)',name1,name2,color=focalgaincolor)
    ## Focal Loss
    compare_regionSet(chroms, chromData, genes,regs1[regs1$Cn<regs1$meanCn*.67 & regs1$lengthMB<1,],regs2[regs2$Cn<regs2$meanCn*.67 & regs2$lengthMB<1,],n1,n2,comparison='Focal Loss (<1Mb,<0.67*average)',name1,name2,color=focallosscolor)
    setwd(olddir)
}

### Generalized comparing function: 
compare_regionSet <- function(chroms, chromData, genes, 
                              regs1, regs2, 
                              n1, n2, 
                              comparison='', 
                              name1='1', name2='2', 
                              color='#000000') {    
    p_cutoff <- 0.05
    freq_cutoff <- 0
    
    difcolor <- color
    color <- colorRampPalette(c("#FFFFFF",color),space="rgb")(4)[2]
    
    # Generate frequency plots            
    pile1 <- pileup(regs1); pile1$Percent <- 100*pile1$Count / n1
    pile2 <- pileup(regs2); pile2$Percent <- 100*pile2$Count / n2
    dif <- pvals(pileup_dif(regs1,regs2),n1,n2)
    
    save.txt(addGenes(dif,genes),file=paste(comparison,name1,name2,'txt',sep='.'))
    save.txt(addGenes(dif[dif$p.value<p_cutoff & abs(dif$percent)>freq_cutoff,],genes),file=paste('significant',comparison,name1,name2,'txt',sep='.'))
    
    
    ### Total and Differential frequency plot    
    #quartz(file=paste(comparison,'freq_dif',name1,name2,'png',sep='.'),width=15,height=4.5,dpi=300,type='png')
    png(filename=paste(comparison,'allFreqs',name1,name2,'png',sep='.'),width=3000,height=800)
    ylim <- c(-100, 110)
    plot(1,1,type='n',
         bty='n', ann=T, mar=c(0,0,0,0), oma=c(0,0,0,0),
         main=paste(comparison,' frequency difference, ', name1, ' (', n1, ') vs ', name2, ' (', n2, ')', sep=''), cex.main=2,
         xlab=NA,#'',#Genomic position (??????=significance)',
         ylab=NA,#'Alteration frequency difference (% units)', cex.lab=2,
         xlim = c(0,sum(as.numeric(chroms$length))),
         ylim = ylim,
         yaxt="n",
         xaxt="n"
         )
    #mtext(text="Allelic imbalance",side=2,line=4,las=3,cex=2)
    #mtext(text="Coverage, all segments",side=1,line=4,cex=2,las=1)
    mhcolor=colorRampPalette(c("#FFFFFF",color),space="rgb")(5)[3]
    
    if (nrow(pile1)>0) {
        ## Group 1
        data <- pile1
        data$e <- 0 -> data$s
        for (i in 1:23) data$s[data$Chromosome==as.character(chroms$Chr[i])] <- chroms$before[i] -> data$e[data$Chromosome==as.character(chroms$Chr[i])]
        data$s <- data$s+data$Start
        data$e <- data$e+data$End
        rect( 
            xleft=data$s, xright=data$e,
            ybottom=0,ytop=data$Percent,
            col=color,   
            border=NA,
        )
    }
    if (nrow(pile2)>0) {
        ## Group 2
        data <- pile2
        data$e <- 0 -> data$s
        for (i in 1:23) data$s[data$Chromosome==as.character(chroms$Chr[i])] <- chroms$before[i] -> data$e[data$Chromosome==as.character(chroms$Chr[i])]
        data$s <- data$s+data$Start
        data$e <- data$e+data$End
        rect( 
            xleft=data$s, xright=data$e,
            ybottom=0,ytop= -data$Percent,
            col=color,   
            border=NA,
        )
    }
    if (nrow(dif)>0) {
        ### The difference
        data <- dif
        data$e <- 0 -> data$s
        for (i in 1:23) data$s[data$Chromosome==as.character(chroms$Chr[i])] <- chroms$before[i] -> data$e[data$Chromosome==as.character(chroms$Chr[i])]
        data$s <- data$s+data$Start
        data$e <- data$e+data$End
        data <- data[order(data$percent,decreasing=T),]
        
        #difcolor <- colorRampPalette(c("#FFFFFF",color),space="rgb")(4)[2]
        
        rect( 
            xleft=data$s, xright=data$e,
            ybottom=0,ytop=data$percent,
            #ybottom=data$conf_low,ytop=data$conf_high,
            col=difcolor,   
            border=NA
        )
        
        ## The significant
        data=data[data$p.value<p_cutoff & abs(data$percent)>freq_cutoff,]
        if (nrow(data)>0) {
            rect( 
                xleft=data$s, xright=data$e,
                ybottom= -100,ytop=-95,
                col='#000000',   
                border=NA
            )
        }
        ## The OR
        #for (i in 1:23) {
        #    temp=data[data$Chr==as.character(chroms$chr[i]),]
        #    if (nrow(temp)>5) {
        #        temp$odds.ratio[temp$odds.ratio<1 & temp$odds.ratio>0]=1/temp$odds.ratio[temp$odds.ratio<1 & temp$odds.ratio>0]
        #        temp=temp[order(temp$odds.ratio,decreasing=T),][1,]
        #        text(x=chroms$before[i]+mean(temp$Start,temp$End), y=-105, cex=1.3, label=round(temp$odds.ratio,1))                
        #    }
        #}
    }
    axis(2,at=c(-97),#,-105), 
         labels=c('sign.'),#, 'BestOR'), 
         pos=0,
         cex.axis=1.5,tck=0,las=2)
    
    ## The chrom labels
    text(
        x=chroms$before+chroms$length/2,
        y= 105,
        cex=2,
        label=substr(chroms$chr,4,6)
    )
    
    ## The chromosomes
    rect(xleft=chromData$genomeStart,xright=chromData$genomeEnd,
         ybottom=95-chromData$thickness,ytop=95+chromData$thickness,
         col=paste(chromData$col,'80',sep=''),
         lty=0
    )
    
    ## The vertical lines
    segments(
        x0=c(chroms$before,sum(chroms$length)),x1=c(chroms$before,sum(chroms$length)),
        y0=-100,y1=100,                
        col='#000000',
        lwd=1
    )
    
    # Horizontal lines
    segments(
        x0=0,x1=sum(chroms$length),
        y0=seq(-80,80,10),y1=seq(-80,80,10),                
        col='#00000070',
        lwd=1,lty=3
    )
    segments(
        x0=0,x1=sum(chroms$length),
        y0=0,y1=0,                
        col='#00000090',
        lwd=1,lty=1
    )
    
    ## Y axis ticks
    axis(2,at=seq(-80,80,20), 
         labels=abs(seq(-80,80,20)), pos=0,
         cex.axis=2,tck=-0.01, las=2)
    axis(4,at=seq(-80,80,20), 
         labels=abs(seq(-80,80,20)), pos=sum(chroms$length),
         cex.axis=2,tck=-0.01, las=2)
    
    ## Group names
    mtext(c(paste(name1,'(%)'), '(dif)', paste(name2,'(%)')),2,at=c(65,0,-65),cex=2, col=c(color,mhcolor,color))
    
    dev.off()      
}



#------------------------------------------------------------
# Author: Sebastian DiLorenzo , 2013
#------------------------------------------------------------

#------------------------------------------------------------
# Added TAPS plot functionality
#------------------------------------------------------------

# #######  ##     ## ######## ########  ##     ## #### ######## ##      ## ########  ##        #######  ######## 
###     ## ##     ## ##       ##     ## ##     ##  ##  ##       ##  ##  ## ##     ## ##       ##     ##    ##    
###     ## ##     ## ##       ##     ## ##     ##  ##  ##       ##  ##  ## ##     ## ##       ##     ##    ##    
###     ## ##     ## ######   ########  ##     ##  ##  ######   ##  ##  ## ########  ##       ##     ##    ##    
###     ##  ##   ##  ##       ##   ##    ##   ##   ##  ##       ##  ##  ## ##        ##       ##     ##    ##    
###     ##   ## ##   ##       ##    ##    ## ##    ##  ##       ##  ##  ## ##        ##       ##     ##    ##    
# #######     ###    ######## ##     ##    ###    #### ########  ###  ###  ##        ########  #######     ##                                                            

OverviewPlot <- function(chr,start,end,int,ai,hg18,mchr,mpos,mval,schr,spos,sval,name='',xlim=c(-1,1.5),ylim=c(0,1),MAPD,MHOF)
{
    if(hg18==T)
        {
        chroms=chroms_hg18
        chromData=chromData_hg18
        chroms$length=as.numeric(chroms$length)
        }

    #ideogram=getIdeogram()
    colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
    colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
    
    ai[is.na(ai)]=0
    
    aix=ai!=0
    chr=chr[aix]
    start=start[aix]
    end=end[aix]
    int=int[aix]
    ai=ai[aix]
    pos <- (start+end)/2
    length=end-start
    
    size=rep(0.2,length(chr)); size[int<xlim[1]]=0; size[int>xlim[2]]=0
    pch=rep(20,length(chr)); pch[chr=='chrX']=4
    #size[length>2000000]=0.4
    #size[length>5000000]=0.6
    #size[length>10000000]=0.8
    
    #Define the name,dimensions and resolution of the plot.
    jpeg(paste(name,'_overview.jpg',sep=''),width=11.7,height=8.3,units="in",res=300)
    
    #split the plot into desired formation
    split.screen(figs=c(2,1)) #two rows, one column
    #This details the margins (left,right,bottom,top) of all the screens. 
    #See split.screen section of http://seananderson.ca/courses/11-multipanel/multipanel.pdf
    #for the most complete explanation.
    split.screen(as.matrix(data.frame(left=c(rep(seq(from=0.05,to=7*(0.9/8)+0.05,by=(0.9/8)),3)),
                                      right=c(rep(seq(from=(0.9/8)+0.05,to=8*(0.9/8)+0.05,by=(0.9/8)),3)),
                                      bottom=c(rep(2*(0.75/3)+0.15,8),rep((0.75/3)+0.15,8),rep(0.15,8)),
                                      top=c(rep(0.9,8),rep(2*(0.75/3)+0.15,8),rep((0.75/3)+0.15,8)))),1) #screen 1 becomes screen 3 - 26
    
    split.screen(as.matrix(data.frame(left=c(rep(0.015,3)),
                                      right=c(rep(0.987,3)),
                                      bottom=c(0.15,0.5,0.55),
                                      top=c(0.5,0.55,1))),2) #screen 2 become screen 27-29
    
    #Screen configuration overview
    #------------------------------------------------------------
    #|   3  |   4    |   5   |  6   |   7  |  8   |  9   |  10  |
    #------------------------------------------------------------
    #|  11  |   12   |   13  |  14  |   15 |  16  |  17  |  18  |
    #------------------------------------------------------------
    #|  19  |   20   |   21  |  22  |   23 |  24  |  25  |  26  |
    #------------------------------------------------------------
    #|                            29                            |
    #------------------------------------------------------------
    #|                            28                            |
    #------------------------------------------------------------
    #|                            27                            |
    #------------------------------------------------------------
    
    
    #------------------------------------------------------------
    #Top part of the plot
    #------------------------------------------------------------
    
    #nchr=24; if (sum(chr=='chrY')==0) nchr=23
    #Loop over and plot the 24 chromosomes
    for (c in 1:24) 
    {
        #Pick a chromosome
        this <- chroms[chroms$c==c,]
        #Extract that chromosomes information
        ix <- chr==as.character(this$chr)
        x <- chr=='chrX'
        #Add color depending on the length of this chromosome
        col <- rep('#B0B0B030',length(chr))       
        col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
        col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='') 
        
        #Enter different screens to add whole genome plots
        screen(c+2)
        
        #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
        par(mar = c(0, 0, 0, 0))
        par(oma = c(0,0,0,0))
        par(mgp =c(1,0.5,0))
        par(xpd=NA)
        
        #Plot whole genome with chromosome in question colored
        plot(c(int[!(ix|x)],int[ix]),c(ai[!(ix|x)],ai[ix]),
             pch=c(pch[!(ix|x)],pch[ix]),
             cex=c(size[!(ix|x)],size[ix]),
             main = "",
             xlab = '',
             ylab = "",
             col = c(col[!(ix|x)],col[ix]),
             xlim=xlim,ylim=c(0,1),
             axes=F)
        
        #grid lines
        segments(
            x0=c(-0.5,0,0.5),x1=c(-0.5,0,0.5),
            y0=-0.05,y1=1.035,                
            col='#D3D3D360',
            lwd=1)
        
        #Add boxes around the plot to distinguish the chromosomes further
        box()
        
        #Add chromosome number inside plot
        if(c == 23)
        {
            mtext("X", side = 3, line = -1, adj = 0.8, cex = 1)
        } else if(c == 24)
        {
            mtext("Y", side = 3, line = -1, adj = 0.8, cex = 1)  
        } else
        {
            mtext(c, side = 3, line = -1, adj = 0.8, cex = 1)
        }
        
        #If we are one of the chromosomes plotted to the left edge or bottom, add axis
        if(c %in% c(1,9,17)) axis(side=2,cex.axis=0.6,tck=-0.05,at=seq(from=0,to=0.8,by=0.2),las=1)
        if(c %in% c(seq(from=17,to=24,by=1))) axis(side=1,cex.axis=0.6,at=seq(from=-0.5,to=1,by=0.5))
        if(c %in% c(8,16,24)) axis(side=4,cex.axis=0.6,tck=-0.05,at=seq(from=0,to=0.8,by=0.2),las=1)
        
        #Add titles with sample name (name of folder containing sample) and date
        if(c==4)
        {
            mtext(name,side=3)
        }
        if(c==8)
        {
            mtext(paste('MAPD: ',MAPD,', MHOF: ',MHOF,'\n',format(Sys.time(),"%Y-%m-%d %H:%M")),side=3,cex=0.6,adj=0.95)
        }
        if(c==9)
        {
            mtext("Allelic imbalance",side=2,line=1.5)
        }
        if(c==20)
        {
            mtext("Average log-ratio",side=1,line=1.5)
        }
    }
    
    #------------------------------------------------------------
    #Middle - Signal
    #------------------------------------------------------------
    
    #Go to screen 29 for plotting
    screen(29)
    
    #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
    par(mar = c(0, 0, 0, 0))
    par(oma = c(0,0,0,0))
    par(mgp =c(1,0.5,0))
    par(lend=1)
    
    #Remove chrY,M,XY
    mval[mchr=='chrXY'|mchr=='chrM']=NA    
    # mval[mchr=='chrY'|mchr=='chrXY'|mchr=='chrM']=NA    
    
    #Calculate previous distance of whole genome so the next chromsome is added
    #at the correct coordinate
    pre=rep(NA,24)
    for (c in 1:24)
    {
        this <- chroms[chroms$c==c,]
        if(nrow(this) == 0) next
        ix <- chr==as.character(this$chr)
        mix <- mchr==as.character(this$chr) #& mval>=(-2)
        six <- schr==as.character(this$chr)
        
        #Predefine ymin ymax and sequence between them
        ymin=floor(2*min(int,na.rm=T))/2
        if(ymin > -1)
        {
            ymin = -1
        }
        ymax=ceiling(2*max(int,na.rm=T)+1)/2
        if(ymax < 1)
        {
            ymax = 1
        }
        seqminmax=seq(-2,2,by=0.5)
        
        if(c>1)
        {
            prelength=prelength+chroms$length[chroms$c==(c-1)]
        } else
        {
            prelength=0
        }
        
        #position used for signal.
        mpos[mix] = mpos[mix] + prelength
        #Start and end used for colored segment placement (Adding whole preceding genome to each position)
        start[ix] = start[ix] + prelength
        end[ix] = end[ix] + prelength
        #pre used to position chromosome markers. Seen bottom of plot.
        pre[c] = prelength+(this$length/2)
        #spos used later for allelic imbalance plot. Same principle as mpos.
        spos[six] = spos[six] + prelength
    }
    
    #Only plot mval higher than -2
    #ix=mval>=(-2)
    #mpos = mpos[ix]
    #mval = mval[ix]
    
    #Remove every third value to make plotting more sparse in this overview. (saves space and time)
    ix = seq(3,to=length(mpos),by=3)
    mpos = mpos[-ix]
    mval = mval [-ix]
    
    #Plot signal over complete genome
    plot(mpos,mval,
         pch=20,
         cex=0.25,
         main='',
         xlab = "",
         ylab = "",
         col = '#00000003',
         xaxt="n",
         axes=F,
         ylim = c(-2,2),
         xlim = c(0,max(mpos))
    )
    
    #Add axis to the left & right of signal
    axis(side=2,tck=-0.025,at=seqminmax,cex.axis=0.6,pos=0,las=1)
    axis(side=4,tck=-0.025,at=seqminmax,cex.axis=0.6,pos=sum(chroms$length),las=1)
    mtext("Log-ratio",side=2,line=0)
    
    #Add grey segments
    segments(
        y0=c(-0.5,0,0.5),y1=c(-0.5,0,0.5),
        x0=0,x1=max(mpos),                
        col='#D3D3D360',
        lwd=1)
    
    #Add colored segment information for each chromosome, red to blue gradient.
    for(c in 1:24)
    {
        this <- chroms[chroms$c==c,]
        ix <- chr==as.character(this$chr)
        col <- rep('#B0B0B030',length(chr))       
        col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
        col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='')
        col=rep('#000000',sum(ix))
        col[pos[ix] < this$mid] <- colors_p(sum(pos[ix] < this$mid))
        col[pos[ix] > this$mid] <- colors_q(sum(pos[ix] > this$mid))
        
        #Add segments
        segments(x0=start[ix],x1=end[ix],
                 y0=int[ix],y1=int[ix],                
                 col=col,
                 lwd=2)
    }
    
    #Add a bar between chromosomes to distinguish them
    segments(
        x0=c(chroms$before,sum(chroms$length)),x1=c(chroms$before,sum(chroms$length)),
        y0=-100,y1=100,                
        col='#000000',
        lwd=1)
    #Top and bottom bar
    #segments(
    #    x0=c(0,0),x1=c(max(mpos),max(mpos)),
    #    y0=c(ymin,ymax),y1=c(ymin,ymax),
    #    col='#000000',
    #    lwd=1)
    
    #------------------------------------------------------------
    #Cytobands
    #------------------------------------------------------------
    
    #Go to screen 28 for plotting
    screen(28)
    
    #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
    par(mar = c(0, 0, 0, 0))
    par(oma = c(0,0,0,0))
    par(mgp =c(1,0.5,0))
    
    #Create a empty plot with the same ylim and xlim as the plot directly above it (signal) and below (AI)
    plot(0,0,xlab="",ylab="",main="",type="n",axes=F,xaxt="n",ylim=c(0,0.3),xlim=c(0,max(mpos)))
    
    
    mtext(text='',side=2,las=1,line=-1.2)
    
    #Add cytoband information as differently colored rectangles
    rect(xleft=chromData$genomeStart,xright=chromData$genomeEnd,
         ybottom=0.15-chromData$thickness*0.03,ytop=0.15+chromData$thickness*0.03,
         col=paste(chromData$col,'80',sep=''),
         lty=0)
    
    #Add a bar between chromosomes to distinguish them
    segments(
        x0=c(chroms$before[-1],sum(chroms$length)),x1=c(chroms$before[-1],sum(chroms$length)),
        y0=-100,y1=100,                
        col='#000000',
        lwd=1)
    
    #------------------------------------------------------------
    #Bottom - AI
    #------------------------------------------------------------ 
    
    #Go to screen 27
    screen(27)
    
    #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
    par(mar = c(0, 0, 0, 0))
    par(oma = c(0,0,0,0))
    par(mgp =c(1,0.5,0))
    
    #Remove M,XY
    sval[mchr=='chrXY'|mchr=='chrM']=NA    
    
    #Index to avoid spos/sval values that are all the way at 0 or 1.
    ix = !(sval %in% c(0,1))
    
    #Plot allelic imbalance over whole genome
    plot(spos[ix],sval[ix],
         pch=20,
         cex=0.35,
         cex.axis=1,
         main='',
         xlab = "",
         ylab = "",
         col = '#00000003',
         xaxt="n",
         axes=F,
         ylim = c(0,1),
         xlim = c(0,max(mpos))
    )
    
    #Add axis to the left,right and below of AI. The below axis is the chromosome numbers 1-24.
    axis(side=2,tck=-0.04,at=seq(from=0,to=1,by=0.2),cex.axis=0.6,pos=0,las=1)
    axis(side=1,at=pre,pos=0,labels=c(seq(from=1,to=22),"X",'Y'),cex.axis=0.55,lty=0)#,tck=0,col.ticks='#00000000')
    axis(side=4,tck=-0.04,at=seq(from=0,to=1,by=0.2),cex.axis=0.6,pos=sum(chroms$length),las=1) #
    mtext("Allele frequency",side=2,line=0)
    mtext("Chromosomes",side=1,line=1.5,adj=0.4)
    
    #Add a bar between chromosomes to distinguish them
    segments(
        x0=c(chroms$before,sum(chroms$length)),x1=c(chroms$before,sum(chroms$length)),
        y0=0,y1=100,                
        col='#000000',
        lwd=1)
    
    
    #Close all the opened split.screens and release the figure
    close.screen(all.screens=T)
    dev.off()
    
    file.copy(paste(name,'_overview.jpg',sep=''),paste("../",name,'_overview.jpg',sep=''),overwrite=T)
    
}


###    ##    ###    ########  ##    ##  #######  ######## ##    ## ########  ######## 
###   ##    ## ##   ##     ##  ##  ##  ##     ##    ##     ##  ##  ##     ## ##       
###  ##    ##   ##  ##     ##   ####   ##     ##    ##      ####   ##     ## ##       
######    ##     ## ########     ##    ##     ##    ##       ##    ########  ######   
###  ##   ######### ##   ##      ##    ##     ##    ##       ##    ##        ##       
###   ##  ##     ## ##    ##     ##    ##     ##    ##       ##    ##        ##       
###    ## ##     ## ##     ##    ##     #######     ##       ##    ##        ######## 


# ######  ##     ## ########   #######  ##     ##  ######  
###    ## ##     ## ##     ## ##     ## ###   ### ##    ## 
###       ##     ## ##     ## ##     ## #### #### ##       
###       ######### ########  ##     ## ## ### ##  ######  
###       ##     ## ##   ##   ##     ## ##     ##       ## 
###    ## ##     ## ##    ##  ##     ## ##     ## ##    ## 
# ######  ##     ## ##     ##  #######  ##     ##  ######  

#Function for generating individual plots for TAPS_call. Gives whole genome, log-ratio, cytoband and allele frequency.
karyotype_chroms <- function(chr,start,end,int,ai,hg18,mchr,mpos,mval,schr,spos,sval,name='',xlim=c(-2,2),ylim=0:1,MAPD,MHOF)  
{   
    if(hg18==T)
    {
    chroms=chroms_hg18
    chromData=chromData_hg18
    }
    #Get ideogram  
    #ideogram=getIdeogram()
    
    #Set color gradient for p and q arm of chromosome
    colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
    colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
    
    #filter the data (remove NA) and set some standard parameters
    #ai[is.na(ai)]=0
    #aix=ai!=0
    #chr=chr[aix]
    #start=start[aix]
    #end=end[aix]
    #int=int[aix]
    #ai=ai[aix]
    pos <- (start+end)/2
    length=end-start
    
    #Set sizes for circles in whole genome plot
    size=rep(0.5,length(chr))#; size[chr=='chrX']=0.2
    #size[length>2000000]=2
    #size[length>5000000]=3
    #size[length>10000000]=4
    
    #Loop over each chromosome. (Not chromosomeY as of writing)     
    for (c in 1:24) 
    {
        
        #Select the chromosome
        this <- chroms[chroms$c==c,]
        #Skip current chromosome if it doesn't exist.
        if(!any(as.character(this$chr) == chr)) next()
        if(nrow(this) == 0) next
        
        #Initialize jpeg
        jpeg(paste(name,'_karyotype.',this$chr,'.jpg',sep=''),width=11.7,height=8.3,units="in",res=300)
        
        #split plot into desired formation
        split.screen(as.matrix(data.frame(left=c(rep(0.05,4)),
                                          right=c(rep(1,4)),
                                          bottom=c(0.05,0.20,0.25,0.50),
                                          top=c(0.20,0.25,0.40,0.95)))) #screen 1,2,3
        
        #Screen configuration overview
        #------------------------------------------------------------
        #|                                                          |
        #|                                                          |
        #|                                                          |
        #|                                                          |
        #|                            4                             |
        #|                                                          |
        #|                                                          |
        #|                                                          |
        #|                                                          |
        #------------------------------------------------------------
        #|                            3                             |
        #------------------------------------------------------------
        #|                            2                             |
        #------------------------------------------------------------
        #|                            1                             |
        #------------------------------------------------------------ 
        
        #------------------------------------------------------------
        #Top part of the plot
        #------------------------------------------------------------
        
        #index of the selected chromsome to the chr object
        ix <- chr==as.character(this$chr)
        
        notix1 = chr=="chrX"
        notix = !ix + notix1
        
        #Create an index of colors relating to the positions and lengths on this chromosome
        col <- rep('#46464620',length(chr))       
        col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
        col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='')
        
        #Go to screen 4
        screen(4)

        #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
        par(mar = c(0, 0, 0, 0))
        par(oma = c(0,0,0,0))
        par(mgp =c(0.5,0.25,0))
        
        if(c!=23)
        {
            #Whole genome overview plot
            #Note that we are plotting chrY in the background, despite the fact that it is not an "active" chromosome.
            plot(c(int[notix],int[ix]),c(ai[notix],ai[ix]),
                 pch=16,
                 cex=c(size[notix],size[ix]),
                 main = "",
                 xlab = "",
                 ylab = "",
                 axes=F, #Remove axis
                 col = c(col[notix],col[ix]),
                 xlim = xlim,
                 ylim = ylim)
        } else
        {
            plot(int[notix],ai[notix],
                 pch=16,
                 cex=size[notix],
                 main = "",
                 xlab = "",
                 ylab = "",
                 axes=F, #Remove axis
                 col = col[notix],
                 xlim = xlim,
                 ylim = ylim)
        }
        
        xix=chr=="chrX"
        if(c!=23)
        {
            points(int[xix],ai[xix],pch=4,cex=0.5,col='#46464620')
        } else
        {
            points(int[xix],ai[xix],pch=4,cex=0.5,col=col[ix]) 
        }
        
        #Insert Y and X axis
        axis(side=2,cex.axis=0.6,tck=0.925,col.ticks='#80808040',pos=xlim[1],at=seq(from=0,to=1,by=0.1),las=1)
        axis(side=1,cex.axis=0.6,tck=0.925,col.ticks='#80808040',pos=0,at=seq(from=xlim[1],to=xlim[2],by=0.1))
        
        #Titles,date/time and axis labels
        mtext(text="Average log-ratio",side=1,line=1,cex=1)
        mtext(text="Allelic imbalance",side=2,line=0,cex=1)
        mtext(paste(name,"\nChromsome ",c,sep=""),side=3)
        mtext(paste('MAPD: ',MAPD,', MHOF: ',MHOF,'\n',format(Sys.time(),"%Y-%m-%d %H:%M")),side=3,cex=0.6,adj=0.95)
        
        #------------------------------------------------------------
        #Middle - Signal
        #------------------------------------------------------------
        
        #Go to screen 3
        screen(3)
        
        #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
        par(mar = c(0, 0, 0, 0))
        par(oma = c(0,0,0,0))
        par(mgp =c(0.5,0.25,0))
        par(lend=1)
        
        #Select the correct chromosome and remove stuff lower than -1 
        mix <- mchr==as.character(this$chr) #& mval>(-1)
        
        #Predefine ymin ymax and sequence between them
        ymin=floor(2*min(int,na.rm=T))/2
        if(ymin > -1)
        {
            ymin = -1
        }
        ymax=ceiling(2*max(int,na.rm=T)+1)/2
        if(ymax < 1)
        {
            ymax = 1
        }
        #ymin=floor(min(int[ix]))-0.5
        #if(ymin > -1)
        #{
        #    ymin = -1
        #}
        #ymax=ceiling(max(int[ix]))+0.5
        #if(ymax < 1)
        #{
        #    ymax = 1
        #}
        seqminmax=seq(-2,2,by=0.5)
        
        #Create an index of colors relating to the positions and lengths on this chromosome
        col=rep('#000000',sum(ix))
        col[pos[ix] < this$mid] <- colors_p(sum(pos[ix] < this$mid))
        col[pos[ix] > this$mid] <- colors_q(sum(pos[ix] > this$mid))
        
        #Plot log-ratio over position
        plot(mpos[mix],mval[mix],
             pch=20,
             cex=0.5,
             main = "",
             xlab = "",
             ylab = "",
             axes=F,
             col = '#00000005',
             xlim = c(0,this$length),
             ylim = c(-2,2))
        
        #Add colored segments based on the log-ratio data
        segments(x0=start[ix],x1=end[ix],
                 y0=int[ix],y1=int[ix],                
                 col=col,
                 lwd=2)
        
        #Add X and Y axis. The (side=4 and =1) axis is just a black line showing where the data ends
        # 2:left
        axis(side=2,tck=0.926,col.ticks='#80808040',at=seqminmax, #seq(from=-1,to=1.5,by=0.5),
             #labels=c("-1","-.5","0",".5","1","1.5"),
             cex.axis=0.6,pos=0,las=1)
        # 4: right
        axis(side=4,tck=0,pos=this$length,at=seqminmax,
             #labels=c("-1","-.5","0",".5","1","1.5"),
             cex.axis=0.6,las=1)
        # 1: below
        # axis(side=1,tck=1.041,col='#80808040',at=seq(5e6,this$length,by=5e6),labels=FALSE,cex.axis=0.6,pos=ymin)
        segments(x0=seq(5e6,this$length,by=5e6),x1=seq(5e6,this$length,by=5e6),y0=-2,y1=2,col='#80808040')
        #axis(side=1,tck=0,pos=ymin,
        #     cex.axis=0.6,las=1)
        # 3: top
        #axis(side=3,tck=0,pos=ymax,
        #     cex.axis=0.6,las=1)
        
        #Add Y axis label
        mtext("Log-ratio",side=2,line=-0.8)
        
        #------------------------------------------------------------
        #Cytobands
        #------------------------------------------------------------
        
        #Go to screen 2
        screen(2)
        
        #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
        par(mar = c(0, 0, 0, 0))
        par(oma = c(0,0,0,0))
        par(mgp =c(0.5,0.25,0))
        
        #Create a empty plot with the same ylim and xlim as the plot directly above it (signal) and below (AI)
        plot(0,0,xlab="",ylab="",main="",type="n",axes=F,xaxt="n",ylim=c(0,0.3),xlim=c(0,max(mpos[mix])))
        
        #Add Y axis label
        mtext(text=this$chr,side=2,las=1,line=-1.2)
        
        #index of the chromData for this chromosome
        dix = chromData$chr == as.character(this$chr)
        
        #Add cytoband information as differently colored rectangles
        rect(xleft=chromData$chromStart[dix],xright=chromData$chromEnd[dix],
             #ybottom=0.15-chromData$thickness[dix]*0.03,ytop=0.15+chromData$thickness[dix]*0.03,
             ybottom=0,ytop=0.3,
             col=paste(chromData$col[dix],'99',sep=''),
             lty=0)
        
        #Att text annotation to cytobands
        for(j in 1:length(chromData$name[dix]))
        {
            #If the cytoband region is larger than 5 Mb, it can fit some text
            if(T) #(chromData$chromEnd[dix][j] - chromData$chromStart[dix][j]) > 4*10^6)
            {
                # Text is added at x position (all of previous chromosome) + (middle of region)
                # The "srt" flips the text 90 degrees
                text(x=(chromData$chromStart[dix][j]+((chromData$chromEnd[dix][j]-chromData$chromStart[dix][j])/2)),
                     y=0.15,labels=chromData$name[dix][j], srt=90,cex = 0.3,xpd=T)
            }
        }
        
        #------------------------------------------------------------
        #Bottom - AI
        #------------------------------------------------------------ 
        
        #Go to screen 1
        screen(1)
        
        #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
        par(mar = c(0, 0, 0, 0))
        par(oma = c(0,0,0,0))
        par(mgp =c(0.5,0.25,0))
        
        #Index of correct chromsome and allele frequency not in either 0 or 1
        six <- schr==as.character(this$chr) & !(sval %in% c(0,1))
        
        #plot allele frequency over position
        plot(spos[six],sval[six],
             pch=20,
             cex=0.5,
             main = "",
             xlab = "",
             ylab = "",
             #yaxt="n",
             axes=F,
             col = '#00000010',
             xlim = c(0,this$length),
             ylim = c(0,1))
        
        #Add X and Y axis as well as a line to the rightmost of the data (side=4 and =1)
        axis(side=2,tck=0.926,col.ticks='#80808080',at=seq(from=0,to=1,by=0.2),cex.axis=0.6,pos=0,las=1)
        axis(side=4,tck=0,at=seq(from=0,to=1,by=0.2),cex.axis=0.6,pos=this$length,las=1)
        axis(side=1,tck=0.925,col='#80808000',col.ticks='#80808040',at=seq(5e6,this$length,by=5e6),
             labels=paste(seq(5,floor(this$length/1e6),5),'',sep=''),cex.axis=0.6,pos=0)
        #axis(side=1,tck=0.926,col='#80808040',at=seq(5e6,this$length,by=5e6),
        #     labels=FALSE,cex.axis=0.6,pos=ymin)
#         axis(side=1,tck=0,pos=0,at=c(0,max(mpos)),
#              cex.axis=0.6,las=1)
#         axis(side=3,tck=0,pos=1,at=c(0,max(mpos)),
#              cex.axis=0.6,las=1)
#         
        #Add X and Y label
        mtext("Allele frequency",side=2,line=-0.8)
        mtext("Position (Mb)",side=1,line=1)
        
        #Close all the opened split.screens and release the figure
        close.screen(all.screens=T)
        dev.off()
    }
}

###    ##    ###    ########  ##    ##  #######  ######## ##    ## ########  ######## 
###   ##    ## ##   ##     ##  ##  ##  ##     ##    ##     ##  ##  ##     ## ##       
###  ##    ##   ##  ##     ##   ####   ##     ##    ##      ####   ##     ## ##       
######    ##     ## ########     ##    ##     ##    ##       ##    ########  ######   
###  ##   ######### ##   ##      ##    ##     ##    ##       ##    ##        ##       
###   ##  ##     ## ##    ##     ##    ##     ##    ##       ##    ##        ##       
###    ## ##     ## ##     ##    ##     #######     ##       ##    ##        ######## 


# ######  ##     ## ########   #######  ##     ##  ######      ######  ##    ## 
###    ## ##     ## ##     ## ##     ## ###   ### ##    ##    ##    ## ###   ## 
###       ##     ## ##     ## ##     ## #### #### ##          ##       ####  ## 
###       ######### ########  ##     ## ## ### ##  ######     ##       ## ## ## 
###       ##     ## ##   ##   ##     ## ##     ##       ##    ##       ##  #### 
###    ## ##     ## ##    ##  ##     ## ##     ## ##    ##    ##    ## ##   ### 
# ######  ##     ## ##     ##  #######  ##     ##  ######      ######  ##    ## 

karyotype_chromsCN <- function(chr,start,end,int,ai,Cn,mCn,hg18,mchr,mpos,mval,schr,spos,sval,t,name='',xlim=c(-1.02,1.82),ylim=0:1, maxCn=8,parameters)  
{   
    if(hg18==T) {
        chroms=chroms_hg18
        chromData=chromData_hg18
    }
    #Get ideogram
    #ideogram=getIdeogram()
    
    #Set color gradient for p and q arm of chromosome
    colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
    colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
    
    #filter the data (remove NA) and set some standard parameters
    ai[is.na(ai)]=0
    aix=ai!=0
    chr=chr[aix]
    start=start[aix]
    end=end[aix]
    int=int[aix]
    ai=ai[aix]
    Cn=Cn[aix]
    mCn=mCn[aix]
    pos <- (start+end)/2
    length=end-start
    labels=paste(Cn[!is.na(mCn)],mCn[!is.na(mCn)],sep='m')
    
    #Set sizes for circles in whole genome plot
    size=rep(1,length(chr))
    size[length>2000000]=2
    size[length>5000000]=3
    size[length>10000000]=4
    
    # The variants present in this data
    variants <- unique(labels)
    variants_data <- NULL
    # Extracts the Log-R and Allelic Imbalance Ratio of variants    
    for (v in variants) 
    {
        t_cn <- paste('cn',strsplit(v,'m')[[1]][1],sep='')
        t_int <- as.numeric(t$int[t_cn][[1]])
        t_ai <- as.numeric(t$ai[paste('cn',v,sep='')][[1]])
        variants_data <- rbind(variants_data,c(t_int,t_ai))
    }
    
    #Loop over each chromosome. (Not chromosomeY as of writing)     
    for (c in 1:23) 
    {
        #Select the chromosome
        this <- chroms[chroms$c==c,]
        
        #Initialize jpeg
        jpeg(paste(name,'_karyotypeCN.',this$chr,'.jpg',sep=''),width=11.7,height=8.3,units="in",res=300)
        
        #split plot into desired formation
        split.screen(as.matrix(data.frame(left=c(0.05,rep(0.47,4)),
                                          right=c(0.45,rep(1,4)),
                                          bottom=c(0.25,0.63,0.42,0.37,0.17),
                                          top=c(0.75,0.83,0.62,0.42,0.37)))) #screen 1-5
        
        
        #Screen configuration overview
        #-------------------------------------------------------------
        #-------------------------------------------------------------
        #                             |                              |
        #                             |              2               |
        #                             |                              |
        #                             |------------------------------| 
        #                             |                              |
        #                             |              3               |
        #              1              |                              |
        #                             |------------------------------| 
        #                             |              4               |
        #                             |------------------------------|
        #                             |                              |
        #                             |              5               |
        #                             |                              |
        #-------------------------------------------------------------
        
        
        #------------------------------------------------------------
        #Left side of the plot (Whole genome)
        #------------------------------------------------------------
        #index of the selected chromsome to the chr object
        ix <- chr==as.character(this$chr)
        
        notix1 = chr=="chrX"
        notix = !ix + notix1
        
        #Create an index of colors relating to the positions and lengths on this chromosome
        col <- rep('#46464620',length(chr))       
        col[ix & (pos < this$mid)] <- paste(colors_p(sum(ix & (pos < this$mid))), '70', sep='')  
        col[ix & (pos > this$mid)] <- paste(colors_q(sum(ix & (pos > this$mid))), '70', sep='')
        
        #Go to screen 1
        screen(1)
        
        #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
        par(mar = c(0, 0, 0, 0))
        par(oma = c(0,0,0,0))
        par(mgp =c(0.5,0.25,0))
        
        if(c!=23) {
            #Whole genome overview plot
            #Note that we are plotting chrY in the background, despite the fact that it is not an "active" chromosome.
            plot(c(int[notix],int[ix]),c(ai[notix],ai[ix]),
                 pch=16,
                 cex=c(size[notix],size[ix]),
                 main = "",
                 xlab = "",
                 ylab = "",
                 axes=F, #Remove axis
                 col = c(col[notix],col[ix]),
                 xlim = c(-1,1.5),
                 ylim = ylim)
        } else {
            plot(int[notix],ai[notix],
                 pch=16,
                 cex=size[notix],
                 main = "",
                 xlab = "",
                 ylab = "",
                 axes=F, #Remove axis
                 col = col[notix],
                 xlim = c(-1,1.5),
                 ylim = ylim)
        }
        
        xix=chr=="chrX"
        if(c!=23) {
            points(int[xix],ai[xix],pch=4,col='#46464680')
        } else {
            points(int[xix],ai[xix],pch=4,col=col[ix]) 
        }
        
        #Insert Y and X axis
        axis(side=2,cex.axis=0.6,tck=0.963,col.ticks='#808080',at=seq(from=0,to=1,by=0.2),las=1)
        axis(side=1,cex.axis=0.6,tck=0.963,col.ticks='#808080',at=seq(from=-1,to=1.5,by=0.1))
        
        #Add cn and mcn labels
        text(variants_data[,1],variants_data[,2],
             labels=variants,
             col='black',
             cex=0.75)
        
        #Titles and axis labels
        mtext(text="Average log-ratio",side=1,line=1,cex=1)
        mtext(text="Allelic imbalance",side=2,line=1,cex=1)
        
        #Check if name of sample is too long to be graphically pleasing and cut it if that is the case
        if(nchar(name)>12)
        {
            mtext(paste("Sample: ",substring(name,1,12),"\n Chromsome ",c,sep=""),side=3)
        }
        else
        {
            mtext(paste("Sample: ",name,"\n Chromsome ",c,sep=""),side=3)
        }
        
        #------------------------------------------------------------
        #Right side, Top (Total and minor copy numbers)
        #------------------------------------------------------------
        
        #Go to screen 2
        screen(2)
        
        #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
        par(mar = c(0, 0, 0, 0))
        par(oma = c(0,0,0,0))
        par(mgp =c(0.5,0.25,0))
        par(lend=1)
        
        #Set colors for total and minor copynumber
        col=rep('#000000',sum(ix))
        col[pos[ix] < this$mid] <- colors_p(sum(pos[ix] < this$mid))
        col[pos[ix] > this$mid] <- colors_q(sum(pos[ix] > this$mid))
        
        #Empty plot for total and minor copynumbers
        plot(1,1,type='n',
             xlab='',
             ylab='',
             axes=F,
             xlim = c(0,this$length),
             ylim = c(-0.1,8.1))
        

        #Add minor copynumber segments
        segments(x0=start[ix],x1=end[ix],
                 y0=mCn[ix],y1=mCn[ix],                
                 col='#BBBBBB',
                 lwd=5)
        
        #Add total copynumber segments
        segments(x0=start[ix],x1=end[ix],
                 y0=Cn[ix],y1=Cn[ix],                
                 col=col,
                 lwd=5)

        
        #Add Y axis
        axis(side=2,tck=0.926,col.ticks='#808080',at=seq(0,8,by=1),cex.axis=0.6,pos=0,las=1)
        axis(side=4,labels=F,tck=0,pos=this$length)
        
        #Add Y label and date/time title
        mtext("Total and minor copynumber",side=2,line=0.3)
        mtext(format(Sys.time(),"%Y-%m-%d %H:%M"),side=3,cex=0.6,adj=0.95)
        
        #------------------------------------------------------------
        #Right side, Middle Top - Signal
        #------------------------------------------------------------
        #Go to screen 3
        screen(3)
        
        #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
        par(mar = c(0, 0, 0, 0))
        par(oma = c(0,0,0,0))
        par(mgp =c(0.5,0.25,0))
        par(lend=1)
        
        #Select the correct chromosome and remove stuff lower than -1 
        mix <- mchr==as.character(this$chr) #& mval>(-1)
        
        #Predefine ymin ymax and sequence between them
        ymin=floor(min(int[ix]))-0.5
        if(ymin > -1)
        {
            ymin = -1
        }
        ymax=ceiling(max(int[ix]))+0.5
        if(ymax < 1)
        {
            ymax = 1
        }
        seqminmax=seq(ymin,ymax,by=0.5)
        
        #Create an index of colors relating to the positions and lengths on this chromosome
        col=rep('#000000',sum(ix))
        col[pos[ix] < this$mid] <- colors_p(sum(pos[ix] < this$mid))
        col[pos[ix] > this$mid] <- colors_q(sum(pos[ix] > this$mid))
        
        #Plot log-ratio over position
        plot(mpos[mix],mval[mix],
             pch=20,
             cex=0.5,
             main = "",
             xlab = "",
             ylab = "",
             axes=F,
             col = '#00000005',
             xlim = c(0,this$length),
             ylim = c(ymin,ymax))
        
        #Add colored segments based on the log-ratio data
        segments(x0=start[ix],x1=end[ix],
                 y0=int[ix],y1=int[ix],                
                 col=col,
                 lwd=4)
        
        #Add X and Y axis. The (side=4) axis is just a black line showing where the data ends
        axis(side=2,tck=0.926,col.ticks='#808080',at=seqminmax, #seq(from=-1,to=1.5,by=0.5),
             #labels=c("-1","-.5","0",".5","1","1.5"),
             cex.axis=0.6,pos=0,las=1)
        axis(side=4,labels=F,tck=0,pos=this$length)
        axis(side=1,tck=0.926,col.ticks='#808080',at=seq(5e6,this$length,by=5e6),
             labels=FALSE,cex.axis=0.6,pos=ymin)
        
        #Add Y axis label
        mtext("log-ratio",side=2,line=0.3)
        
        #------------------------------------------------------------
        #Right side, Middle Bottom - Cytobands
        #------------------------------------------------------------
        
        #Go to screen 4
        screen(4)
        
        #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
        par(mar = c(0, 0, 0, 0))
        par(oma = c(0,0,0,0))
        par(mgp =c(0.5,0.25,0))
        
        #Create a empty plot with the same ylim and xlim as the plot directly above it (signal) and below (AI)
        plot(0,0,xlab="",ylab="",main="",type="n",axes=F,xaxt="n",ylim=c(0,0.3),xlim=c(0,max(mpos[mix])))
        
        #Add Y axis label
        mtext(text="Cytoband",side=2,las=1,line=-1,cex=0.8)
        
        #index of the chromData for this chromosome
        dix = chromData$chr == as.character(this$chr)
        
        #Add cytoband information as differently colored rectangles
        rect(xleft=chromData$chromStart[dix],xright=chromData$chromEnd[dix],
             #ybottom=0.15-chromData$thickness[dix]*0.03,ytop=0.15+chromData$thickness[dix]*0.03,
             ybottom=0,ytop=0.3,
             col=paste(chromData$col[dix],'99',sep=''),
             lty=0)
        
        #Att text annotation to cytobands
        for(j in 1:length(chromData$name[dix]))
        {
            #If the cytoband region is larger than 5 Mb, it can fit some text
            if((chromData$chromEnd[dix][j] - chromData$chromStart[dix][j]) > 4*10^6)
            {
                # Text is added at x position (all of previous chromosome) + (middle of region)
                # The "srt" flips the text 90 degrees
                text(x=(chromData$chromStart[dix][j]+((chromData$chromEnd[dix][j]-chromData$chromStart[dix][j])/2)),
                     y=0.15,labels=chromData$name[dix][j], srt=90,cex = 0.5,xpd=T)
            }
        }
        
        
        #------------------------------------------------------------
        #Right side, Bottom - AI
        #------------------------------------------------------------ 
        
        #Go to screen 5
        screen(5)
        
        #Set marginals, outer marginals and mgp(which is for xlab,ylab,ticks and axis)
        par(mar = c(0, 0, 0, 0))
        par(oma = c(0,0,0,0))
        par(mgp =c(0.5,0.25,0))
        
        #Index of correct chromsome and allele frequency not in either 0 or 1
        six <- schr==as.character(this$chr) & !(sval %in% c(0,1))
        
        #plot allele frequency over position
        plot(spos[six],sval[six],
             pch=20,
             cex=0.5,
             main = "",
             xlab = "",
             ylab = "",
             #yaxt="n",
             axes=F,
             col = '#00000010',
             xlim = c(0,this$length),
             ylim = c(0,1))
        
        #Add X and Y axis as well as a line to the rightmost of the data (side=4)
        axis(side=2,tck=0.926,col.ticks='#808080',at=seq(from=0,to=1,by=0.2),cex.axis=0.6,pos=0,las=1)
        axis(side=4,labels=F,tck=0,pos=this$length)
        axis(side=1,tck=0.925,col.ticks='#808080',at=seq(5e6,this$length,by=5e6),
             labels=paste(seq(5,floor(this$length/1e6),5),'',sep=''),cex.axis=0.6,pos=0)
        
        #Add X and Y label
        mtext("Allele frequency",side=2,line=0.3)
        mtext("Position (Mb)",side=1,line=1)

        #text describing the parameters used to generate the plot
        mtext(text=parameters,side=1,line=6,cex=0.7,adj=0.95)
        
        #Close all the opened split.screens and release the figure
        close.screen(all.screens=T)
        dev.off()
    }
}

#########    ###    ########   ######     ########  ########  ######   ####  #######  ##    ## 
#   ##      ## ##   ##     ## ##    ##    ##     ## ##       ##    ##   ##  ##     ## ###   ## 
#   ##     ##   ##  ##     ## ##          ##     ## ##       ##         ##  ##     ## ####  ## 
#   ##    ##     ## ########   ######     ########  ######   ##   ####  ##  ##     ## ## ## ## 
#   ##    ######### ##              ##    ##   ##   ##       ##    ##   ##  ##     ## ##  #### 
#   ##    ##     ## ##        ##    ##    ##    ##  ##       ##    ##   ##  ##     ## ##   ### 
#   ##    ##     ## ##         ######     ##     ## ########  ######   ####  #######  ##    ## 

#------------------------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------------------------
#TAPS_region() ; a function for zooming in on a specific region and showing the regions genes.
#------------------------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------------------------


TAPS_region <- function(directory=NULL,chr,region,hg18=F)
{
    if (is.null(directory))
    {
        cat("No directory supplied, using working directory.")
        directory = "."
        #cat("You have not assigned a directory containing one or more folders of samples for TAPS_plot to execute. \n")
        #cat("Example: \"/user/mysamples/\" or, to run it in your current working directory, \".\" \n")
        #directory = readline("Please supply such a directory now: ")
    }
    
    #Set working directory and make that directorys name the samplename. If it is too long, shorten it.
    setwd(directory)
    subs <- getSubdirs()
    if (is.null(subs)) 
    {                                         ## check samples = subdirectories or a single sample = current directory
        subs=thisSubdir()
        setwd('..')
    }
    
    #store chr value in nchr because i made the newbie mistake of having a second variable also
    #named chr.
    nchr = chr  
    
    #Loop over all subdirectories, should they exist.
    for (i in 1:length(subs)) try( 
{
    setwd(subs[i])
    name <- subs[i]
    cat(' ..plotting', subs[i],"\n")
    
    if(nchar(name)>12)
    {
        name = substring(name,1,12)
    }
    
    #From TAPS_plot():
    #save(regs,Log2,alf,segments,file="TAPS_plot_output.Rdata")  
    
    #Check if TAPS_plot_output.Rdata exists. It must for the plot function to run.
    #In this file are the objects needed to plot.
    if(file.exists("TAPS_plot_output.Rdata")==F)
    {
        cat("TAPS_region could not find a TAPS_plot_output.Rdata file in the sample directory. \n
       You must run TAPS_plot before TAPS_region. \n")
    }
    else
    {
        load("TAPS_plot_output.Rdata")
    }
    
    #If nchr has been given as character
    if(is.character(nchr) == T)
    {
        #If chr is longer than 2 characters ("chr1" but not "12")
        #take out the number and convert it to numeric
        if(nchar(nchr)>2)
        {
            c = strsplit(nchr,"chr")
            c = as.numeric(c[[1]][2])
        }
        #chr has been given as character but without "chr" infront. Convert to numeric.  
        else
        {
            c = as.numeric(nchr) 
        }
    }
    #else chr has been given as a numeric and can be used directly.
    else
    {
        c = nchr
    }
    
    #Rename parameters of TAPS_plot_output so we can re-use other plots as template.
    chr =regs$chr
    start = regs$start  
    end = regs$end
    int = regs$logs
    ai = regs$scores
    #ideogram=NULL
    mchr = as.character(Log2$Chromosome)
    mpos =  Log2$Start
    mval =Log2$Value
    schr = as.character(alf$Chromosome) 
    spos = alf$Start
    sval =alf$Value
    xlim=c(-1,2)
    ylim=c(0,1)
    
    #check if hg18 is true
    #then load hg18 knownGene list
    if(hg18==T)
    {
        kg = knownGene_hg18
        chroms = chroms_hg18
        chromData = chromData_hg18
    }
    else
    {
        kg = knownGene
    }
    
    
    #
    #Here is where pretty normal plotting procedure starts
    #
    
    #Get ideogram  
    #ideogram=getIdeogram()
    
    #Set color gradient for p and q arm of chromosome
    colors_p <- colorRampPalette(c("#6600FF","#9900CC"),space="rgb")
    colors_q <- colorRampPalette(c("#CC0099","#CC0000"),space="rgb")
    
    #filter the data (remove NA) and set some standard parameters
    ai[is.na(ai)]=0<