R/KC.R

Defines functions .getRegions .findfdrcutoff .varr .snr .makePermutations .comparativeKcPerms .comparativeKcSiggenes getSigRegionsCompKC compareSpmCollection idPoints plotScaleSpace .getSigRegions .getPeaks getSigSegments .samplePointMatrixOld .samplePointMatrix .permutedSpm .findCutoffByFdr .findPeaks .findPeaks_old findSigLevelFdr findSigLevelTrad calcSpmCollection calcSpm .convertCGHbase .add2spmc .spm2spmc .mirrorData .checkMirrorLocs

Documented in calcSpm calcSpmCollection compareSpmCollection findSigLevelFdr findSigLevelTrad getSigRegionsCompKC getSigSegments idPoints plotScaleSpace

#input functions

.checkMirrorLocs <- function(mirrorLocs, data){
	chroms <- unique(data@probeAnnotation@chromosome)
	nrChroms <- length(chroms)
	mirrored <- matrix(rep(0,(3*length(mirrorLocs))),ncol=3,nrow=length(mirrorLocs))
	chromNames <- attr(mirrorLocs, 'chromNames')
	
	for (i in chroms){
		chromIndex  <- which(chromNames == i)
		if(length(chromIndex ) == 0){
			chromIndex  = as.numeric(i)
			warning(paste('Could not find chromosome annotated as ',i,' (in dataset) in mirror locations object, using chromosome #', i, ' (annotated as ', chromNames[i], ' in mirror locations object) as an alternative'))
		}
		for(j in 1:length(mirrorLocs[[chromIndex ]])){
			max.maploc <- max(data@probeAnnotation@maploc[data@probeAnnotation@chromosome == i])
			if(mirrorLocs[[chromIndex ]][j] == 0){
				#print(paste('Mirroring at start of chromosome',i))
				mirrored[chromIndex ,1] <- mirrored[chromIndex ,1] + 1
			}
			if(mirrorLocs[[chromIndex ]][j] < max.maploc & mirrorLocs[[chromIndex]][j] > 0){
				#print(paste('Mirroring around position',mirrorLocs[[chromIndex]][j],'on chromosome',i))
				mirrored[chromIndex ,2] <- mirrored[chromIndex ,2] + 1
				
				#fix mirrorLocs ..
				if(j == length(mirrorLocs[[chromIndex ]])){
					warning(paste('Probe annotation AFTER chromosome ', i, ' end, adjusting mirror locations ..', sep=""), immediate.=TRUE)
					mirrorLocs[[chromIndex ]][j] <- max.maploc + 1
				}
			}
			if(mirrorLocs[[chromIndex ]][j] > max.maploc){
				#print(paste('Mirroring at end of chromosome',i))
				mirrored[chromIndex ,3] <- mirrored[chromIndex ,3] + 1
			}
		
		}
	}
	
	if(sum(mirrored[,3]) < nrChroms){
		print('Warning, not all chromosomes are mirrored at end!')
	}
	else{
		print('Mirror locations looking fine')
	}
	
	return(mirrorLocs)
}

.mirrorData <- function(data, mirrorLocs, mirrorLength=1500000){
  
  if(!is(data, "KcghDataSum")){stop("Need KcghDataSum object as input")}

	cc <- new("KcghDataSum")
	data <- new("KcghDataMirror", mirrorLocs, data)
	data <- sort(data)
		
	dataChromosomes <- unique(data@probeAnnotation@chromosome)
	chromNames <- attr(mirrorLocs, 'chromNames')
	
	for (i in dataChromosomes){
		#data should be ordered!
		
		current.data.index <- which(data@probeAnnotation@chromosome == i)
		cc@probeAnnotation <- data@probeAnnotation[current.data.index]

		cc@pos <- data@pos[current.data.index]
		cc@neg <- data@neg[current.data.index]
		
		first.probe.index <- current.data.index[1]
		new.first.probe.name <- paste(data@probeAnnotation@name[first.probe.index], 'first', i)
		data@probeAnnotation@name[first.probe.index] <- new.first.probe.name
		
		last.probe.index <- current.data.index[length(current.data.index)]
		new.last.probe.name <- paste(data@probeAnnotation@name[last.probe.index], 'last', i)
		data@probeAnnotation@name[last.probe.index] <- new.last.probe.name
		
		
		chromIndex  <- which(chromNames == i)
		if(length(chromIndex ) == 0){
			chromIndex  = as.numeric(i)
			warning(paste('Could not find chromosome',i,'in mirror locations object, using chromosome #', i, 'as an alternative'))
			data@probeAnnotation@chromosome[data@probeAnnotation@chromosome == i] <-  chromNames[chromIndex]
		}
		
		for(j in 1:length(mirrorLocs[[chromIndex ]])){
		
			#print(paste('At:', i, j))
			cm <- mirrorLocs[[chromIndex ]][j]
			
			#reset mirrorLength
			mirrorLength.current <- mirrorLength
			
			if(sum(cc@probeAnnotation@maploc < cm)){
				last.probe.before.index <- max(which(cc@probeAnnotation@maploc < cm))
				last.probe.before <- cc@probeAnnotation@maploc[last.probe.before.index]
			}
			else{
				#print('No probes before')
				last.probe.before.index <- 0
				last.probe.before <- 0
			}
			
			if(last.probe.before.index == length(cc@pos)){
				#print('No probes after')
				first.probe.after <- last.probe.before
			}
			else{
				first.probe.after <- cc@probeAnnotation@maploc[last.probe.before.index + 1]
				
				#if mirror probes around centromere would influence probes on other side of centromere, adjust mirrorLength
				if((cm > 0) & ((last.probe.before + mirrorLength.current) > first.probe.after)){
					mirrorLength.current <- first.probe.after - last.probe.before
					#print(paste("Yup, at: ", i , ',', j))
					#print(paste('Changed to', mirrorLength.current))
				}
			}
			
			#print(paste('before:', last.probe.before, 'after:', first.probe.after))
			
			#first calculate position of to be mirrored probes
			left.mirror.probes <- which((cc@probeAnnotation@maploc < last.probe.before) & (cc@probeAnnotation@maploc > (last.probe.before - mirrorLength.current)))
			right.mirror.probes <- which((cc@probeAnnotation@maploc > first.probe.after) & (cc@probeAnnotation@maploc < (first.probe.after + mirrorLength.current)))

			#now add maplocs to data maploc

			#adjust maploc positions
			maplocs.left <- last.probe.before + (last.probe.before - cc@probeAnnotation@maploc[left.mirror.probes])
			maplocs.right <- first.probe.after - (cc@probeAnnotation@maploc[right.mirror.probes] - first.probe.after)
			maplocs <- c(maplocs.left, maplocs.right)
			
			data@probeAnnotation@chromosome <- c(data@probeAnnotation@chromosome, rep(chromNames[chromIndex], length(maplocs)))
			data@probeAnnotation@maploc <- c(data@probeAnnotation@maploc, as.integer(maplocs))
			
			virtualNames <- vector()
			if(length(left.mirror.probes) > 0){
				virtualNames <- c(virtualNames, paste(cc@probeAnnotation@name[left.mirror.probes], 'virtual left',i))
			}
			if(length(right.mirror.probes) > 0){
				virtualNames <- c(virtualNames, paste(cc@probeAnnotation@name[right.mirror.probes], 'virtual right',i))
			}

			data@probeAnnotation@name <- c(data@probeAnnotation@name, virtualNames)
			
			data@pos <- c(data@pos, cc@pos[left.mirror.probes], cc@pos[right.mirror.probes])
			data@neg <- c(data@neg, cc@neg[left.mirror.probes], cc@neg[right.mirror.probes])
		}
	}

	data <- sort(data)
	data@mirrorLocs <- mirrorLocs

	return(data)
}

.spm2spmc <- function(spm){
  if(!is(spm, "samplePointMatrix")){stop("Need samplePointMatrix object as input")}
    
	sampleDensity <- spm@sampleDensity
	mirrorLocs <- spm@mirrorLocs

	chromosomes <- names(spm@data)
	chromNames <- attr(mirrorLocs, 'chromNames')
	chromosomesOrdered <- chromNames[chromNames %in% chromosomes]
	chromosomesOrdered <- c(chromosomesOrdered, chromosomes[!(chromosomes %in% chromNames)])
	chromosomes <- chromosomesOrdered

	spmCollection <- new("spmCollection")
	spmCollection@mirrorLocs <- mirrorLocs
	spmCollection@sigma <- spm@sigma
	spmCollection@sampleDensity <- spm@sampleDensity
	spmCollection@data <- matrix(ncol=1, nrow=0)
	tmpAnnotation <- vector(mode="list")
	
	for(i in chromosomes){	
		#browser()	
		spmCollection@data <- rbind(spmCollection@data, matrix(as.numeric(spm[[i]]$pos) + as.numeric(spm[[i]]$neg)))
		
		nrSamplePoints <- length(spm[[i]]$pos)
		samplePointPositions <- seq(1, (nrSamplePoints*sampleDensity), by=sampleDensity)
		tmpAnnotation$chrom <- c(tmpAnnotation$chrom, rep(i, nrSamplePoints))
		tmpAnnotation$maploc <- c(tmpAnnotation$maploc, as.numeric(samplePointPositions))
	}

	spmCollection@annotation <- new("probeAnnotation", tmpAnnotation$chrom, tmpAnnotation$maploc)
	
	
	return(spmCollection)
}

.add2spmc <- function(spmc, spm){
	chromosomes <- unique(spmc@annotation@chromosome)
	tempData <- matrix(ncol=1, nrow=0)
	
	for(i in chromosomes){		
		tempData <- rbind(tempData, matrix(as.numeric(spm[[i]]$pos) + as.numeric(spm[[i]]$neg)))
	}
	
	spmc@data <- cbind(spmc@data, tempData)
	
	return(spmc)
}

.convertCGHbase <-function(cghBase){
	#convert cghRaw to internal data representation
	#get midpositions
	maploc <- apply(cbind(CGHbase::bpstart(cghBase), CGHbase::bpend(cghBase)),1,function(x)return(round(mean(x))))
	tData <- data.frame(chrom=CGHbase::chromosomes(cghBase), maploc=maploc, data=CGHbase::copynumber(cghBase))
	row.names(tData) <- Biobase::featureNames(cghBase) 
	KCGHdata <- new("KcghData", tData)
	return(KCGHdata)	
}

#wrapper functions
calcSpm <- function(data, mirrorLocs, sigma=1000000, sampleDensity=50000, maxmem=1000, verbose=T, old=F){

	#do checks
	#is it CGHbase data?
	if(is(data, "cghRaw")){
		data <- .convertCGHbase(data)
	}

	if(!is(data, "KcghData")){
		data <- new("KcghData", data)
	}
	
	mirrorLocs <- KCsmart:::.checkMirrorLocs(mirrorLocs, data)
	mirrorLength <- sigma * 4
	
	print("Splitting data ..")
	data <- new("KcghDataSplit", data)
	
	print("Summing data ..")
	data <- new("KcghDataSum", data)
	
	print("Mirroring data ..")
	data <- KCsmart:::.mirrorData(data, mirrorLocs, mirrorLength)

	print("Calculating sample point matrix ..")
	# spm <- KCsmart:::.samplePointMatrix(data, sampleDensity=sampleDensity, sigma=sigma, maxmem=maxmem, verbose=verbose)
  
  if (!old) {
    spm <- KCsmart:::.samplePointMatrix(data, sampleDensity=sampleDensity, sigma=sigma, maxmem=maxmem, verbose=verbose)
  }
  else {
    spm <- KCsmart:::.samplePointMatrixOld(data, sampleDensity=sampleDensity, sigma=sigma, maxmem=maxmem, verbose=verbose)
  }

	rm(data)

	print('Done')
	return(spm)
}

calcSpmCollection <- function(data, mirrorLocs, cl=NULL, data2=NULL, sigma=1000000, sampleDensity=50000, maxmem=1000, verbose=F, doChecks=T, old=F){

  # Variables
  if(is(data, "cghRaw")){
    data <- KCsmart:::.convertCGHbase(data)
  }

  if(!is(data, "KcghData")){
    data <- new("KcghData", data)
  }
  nrSamples <- ncol(data@data)
  mirrorLength <- sigma * 4
  
  data <- new("KcghDataSplit", data)

  
	if(doChecks){
	
		if(is.null(data2) & is.null(cl)){
			stop('Please provide a default class vector or a second data set')
		}
	
		#checks for 2nd data set
		if(!is.null(data2)){
			if(is(data2, "cghRaw")){
				data2 <- KCsmart:::.convertCGHbase(data2)
			}
	
			if(!is(data2, "KcghData")){
				data2 <- new("KcghData", data2)
			}
			if(!all.equal(data@probeAnnotation, data2@probeAnnotation)) {stop("The data has different annotations, unable to continue")}
		}
    
    if(!is.null(cl)){
      if(sum(cl == 0 | cl == 1) != nrSamples) {stop('Invalid class vector given')}
	}
	
		
		mirrorLocs <- KCsmart:::.checkMirrorLocs(mirrorLocs, data)
	}
	
	originalData <- data

	for(i in 1:nrSamples){
		cat(paste("Processing sample", i, "/", nrSamples, "\r"))
		#single sample construction
		
		#copy probeAnnotations
		singleSample <- originalData
		singleSample@pos <- originalData@pos[,i, drop=F]
		singleSample@neg <- originalData@neg[,i, drop=F]
		singleSample <- new("KcghDataSum", singleSample)  
		
		data <- singleSample
		
		#cat(paste("Mirroring data ..","\r"))
		data <- KCsmart:::.mirrorData(data, mirrorLocs, mirrorLength)
	
		#cat(paste("Calculating sample point matrix ..","\r"))
		# spm <- KCsmart:::.samplePointMatrix(data, sampleDensity=sampleDensity, sigma=sigma, maxmem=maxmem, verbose=verbose)
    if (!old) {
      spm <- KCsmart:::.samplePointMatrix(data, sampleDensity=sampleDensity, sigma=sigma, maxmem=maxmem, verbose=verbose)
    }
    else {
      spm <- KCsmart:::.samplePointMatrixOld(data, sampleDensity=sampleDensity, sigma=sigma, maxmem=maxmem, verbose=verbose)
    } 
    
    
		rm(data)
		
		#cat(paste("Converting to spmc ..","\r"))
		if(!exists("spmc")){
			spmc <- KCsmart:::.spm2spmc(spm)
		}
		else{
			spmc <- KCsmart:::.add2spmc(spmc, spm)
		}
	}
	
	cat("\n")
	
	if(!is.null(data2)){
		spmc2 <- calcSpmCollection(data2, spmc@mirrorLocs, cl=cl, data2=NULL, sigma=sigma, sampleDensity=sampleDensity, maxmem=maxmem, verbose=verbose, doChecks=F)
		nrSamplesFirstClass <- ncol(spmc@data)
		spmc@data <- cbind(spmc@data, spmc2@data)
		rm(spmc2)
		spmc@cl <- c(rep(0, nrSamplesFirstClass), rep(1, (ncol(spmc@data) - nrSamplesFirstClass)))
	}
	
  if(!is.null(cl)){
    spmc@cl <- cl
  }
	
  return(spmc)
}

#sig level
findSigLevelTrad <- function(data, observedSpm, n=1, p=0.05, maxmem=1000){
	#method 1 (as published)
	if(!is(data, "KcghData")){data <- new("KcghData", data)}

	print(paste('Calculating alpha = ',p,'significance cut-off'))
	
	sampleDensity <- observedSpm@sampleDensity
	sigma <- observedSpm@sigma
	
	peaks <- vector(mode="list")
	
	spmUnlisted <- unlist(observedSpm)
		
	posPeaks <- KCsmart:::.findPeaks(spmUnlisted$pos)
	negPeaks <- KCsmart:::.findPeaks(spmUnlisted$neg, mode='neg')
	
	print(paste('Found ', length(posPeaks), ' pos peaks and ', length(negPeaks), ' neg peaks in observed sample point matrix'))
	
	p.corrected.pos <- p / length(posPeaks)
	p.corrected.neg <- p / length(negPeaks)
	
	mirrorLength <- sigma * 4
	mirrorLocs <- observedSpm@mirrorLocs
	
	
	print(paste('Calculating Mirror Positions'))
  dataSplit <- new("KcghDataSplit", data)
  dataSum <- new("KcghDataSum", dataSplit)
  dataMirrored <- KCsmart:::.mirrorData(dataSum, mirrorLocs, mirrorLength)
    
	print('Starting permutations ..')
	
	for(i in 1:n){
		cat(paste("\rAt iteration", i,'of',n))
		
		# spmPermuted <- KCsmart:::.permutedSpm(dataSplit, mirrorLocs, mirrorLength, sampleDensity=sampleDensity, sigma=sigma, maxmem=maxmem)
    spmPermuted <- KCsmart:::.permutedSpm(data, dataMirrored, sampleDensity=sampleDensity, sigma=sigma, maxmem=maxmem)
		spmPermuted <- unlist(spmPermuted)

		peaks$pos <- c(peaks$pos, KCsmart:::.findPeaks(spmPermuted$pos))
		peaks$neg <- c(peaks$neg, KCsmart:::.findPeaks(spmPermuted$neg, mode='neg'))
	}
	
	
	cutoff.pos <- min(sort(peaks$pos, decreasing=TRUE)[1:(p.corrected.pos*length(peaks$pos))])
	cutoff.neg <- max(sort(peaks$neg)[1:(p.corrected.neg*length(peaks$neg))])

	cat("\n")	
	return(list(pos=cutoff.pos, neg=cutoff.neg))
}

findSigLevelFdr <- function(data, observedSpm, n=1, fdrTarget=0.05, maxmem=1000){
		#FDR approach
		
		if(!is(data, "KcghData")){data <- new("KcghData", data)}
		print(paste('Calculating fdr = ',fdrTarget,'significance cut-off'))
		
		sampleDensity <- observedSpm@sampleDensity
		sigma <- observedSpm@sigma
		mirrorLength <- sigma * 4
		mirrorLocs <- observedSpm@mirrorLocs
		
		totalSpmPermuted <- vector(mode="list")
		observedSpmUnlisted <- unlist(observedSpm)
		dataSplit <- new("KcghDataSplit", data)
		dataSum <- new("KcghDataSum", dataSplit)
    dataMirrored <- KCsmart:::.mirrorData(dataSum, mirrorLocs, mirrorLength)
    
		for(i in 1:n){
			cat(paste("\rAt iteration", i,'of',n))
		
			spmPermuted <-  KCsmart:::.permutedSpm(data, dataMirrored, sampleDensity=sampleDensity, sigma=sigma, maxmem=maxmem)
			spmPermuted <- unlist(spmPermuted)
			totalSpmPermuted$pos <- c(totalSpmPermuted$pos, spmPermuted$pos)
			totalSpmPermuted$neg <- c(totalSpmPermuted$neg, spmPermuted$neg)
		}
		
		finalCutoffPos <- KCsmart:::.findCutoffByFdr(observedSpmUnlisted$pos, totalSpmPermuted$pos, fdr=fdrTarget)
		finalCutoffNeg <- -KCsmart:::.findCutoffByFdr(-observedSpmUnlisted$neg, -totalSpmPermuted$neg, fdr=fdrTarget)

		cat("\n")	
		return(list(pos=finalCutoffPos,neg=finalCutoffNeg))
}

.findPeaks_old <- function(data, mode='pos'){
	data <- data[!is.na(data)]
	dataOriginal <- data
	
	data <- c(data[-1], 0) - data
	data <- apply(as.matrix(data), 1,function(x){if(x>0){return(1)};if(x<0){return(-1)}else{return(0)}})
	nonFlats <- which(data!=0)
	data <- data[nonFlats]
	data <- data - c(data[-1], 0)
	if(mode=='pos'){
		peaksIndices <- which(data==2) + 1
	}
	else{
		peaksIndices <- which(data==-2) + 1
	}
	
	peaks <- dataOriginal[nonFlats][peaksIndices]

	return(peaks)
}

.findPeaks <- function(data, mode='pos'){
    dir <- ifelse(mode=="pos", -2, 2)
    data <- data[!is.na(data)]
    dataOriginal <- data[-1]
   
    data <- diff.default(data)
    data[data > 0] <- 1
    data[data < 0] <- -1
    nonFlats <- data != 0

    data2 <- diff.default(data[nonFlats])
   
    dataOriginal[nonFlats][data2 == dir]
}

.findCutoffByFdr <- function(observed, permuted, fdr=0.05, precision=0.01){
	#right-tailed!!
	t.fdr <- 0
	tCutoff <- max(observed, na.rm=TRUE)
	tCutoffOld <- 0
	maxTFdr <- fdr + (precision*fdr)
	minTFdr <-  fdr - (precision*fdr)
	
	multiplier <- length(permuted) / length(observed)
	
	#WARNING: this is 1-tailed (right)!
	
	while(!((t.fdr > minTFdr) & (t.fdr < maxTFdr))){
		if(length(tCutoffOld) > 1){
			tCutoffOldComp <- tCutoffOld[(length(tCutoffOld) - 1)]
		}
		else{
			tCutoffOldComp <- 0
		}
		
		if(t.fdr > fdr){
			tCutoff <- tCutoff + 0.5*abs(tCutoffOldComp - tCutoff)
		}
		else{
			tCutoff <- tCutoff - 0.5*abs(tCutoffOldComp - tCutoff)
		}
		
		tCutoffOld <- c(tCutoffOld, tCutoff)
		
		fp <- sum(permuted >= tCutoff, na.rm=TRUE)
		called <- sum(observed >= tCutoff, na.rm=TRUE)
		
		if(called == 0){print('Uhoh, 0 called!');return()}
		
		t.fdr <-  (fp/multiplier)  / called
		
		if(length(tCutoffOld) > 50){print('Uhoh, maximum number of iterations reached!'); return()}
	}

	return(tCutoff)
}

.permutedSpm <- function(data, dataMirrored, sampleDensity=50000, sigma=1000000, maxmem=1000, old=F){
	#takes sample data and returns sample point matrix of permuted probe label data
	if(!is(data, "KcghData")){stop("Need KcghData as input")}
	
	#permute data
  print('Permuting')
  
  for (i in 1:ncol(data@data)) {
    data@data[,i] <- sample(data@data[,i])
  }
  
  data <- new('KcghDataSplit', data)
  dataSum <- new("KcghDataSum", data)
  
  virtIndex <- grep('virtual', dataMirrored@probeAnnotation@name)
  realProbes <- dataMirrored@probeAnnotation[-virtIndex]
  permMirrors <- new('KcghDataSum')
  permMirrors@probeAnnotation <- dataMirrored@probeAnnotation[virtIndex]
  permMirrors@pos <- sample(dataSum@pos, size=length(virtIndex))
  permMirrors@neg <- sample(dataSum@neg, size=length(virtIndex))
  permMirrors@nrSamples <- dataMirrored@nrSamples
  
  print('Combining')
  combData <- new('KcghDataSum')
  combData@probeAnnotation@chromosome <- 
    c(permMirrors@probeAnnotation@chromosome, realProbes@chromosome)
  combData@probeAnnotation@maploc <- 
    c(permMirrors@probeAnnotation@maploc, realProbes@maploc)
  combData@probeAnnotation@name <- 
    c(permMirrors@probeAnnotation@name, realProbes@name)
  combData@pos <- c(permMirrors@pos, dataSum@pos)
  combData@neg <- c(permMirrors@neg, dataSum@neg)
  combData@nrSamples <- dataMirrored@nrSamples

  ordering <- order(combData@probeAnnotation@chromosome, combData@probeAnnotation@maploc)
  combData@probeAnnotation@chromosome <- combData@probeAnnotation@chromosome[ordering]
  combData@probeAnnotation@maploc <- combData@probeAnnotation@maploc[ordering]
  combData@probeAnnotation@name <- combData@probeAnnotation@name[ordering]
  combData@pos <- combData@pos[ordering]
  combData@neg <- combData@neg[ordering]
  
  combData <- new("KcghDataMirror", dataMirrored@mirrorLocs, combData)
  
  print('Returning')
	#get sample point matrix
	# spmPermuted <- KCsmart:::.samplePointMatrix(dataMirrored, sampleDensity, sigma, maxmem=maxmem, verbose=FALSE)
  if (!old) {
    spmPermuted <- .samplePointMatrix(combData, sampleDensity, sigma, maxmem=maxmem, verbose=FALSE)
  }
  else {
    spmPermuted <- KCsmart:::.samplePointMatrixOld(combData, sampleDensity, sigma, maxmem=maxmem, verbose=FALSE)
  } 
	
	return(spmPermuted)
}

.samplePointMatrix <- function(data, sampleDensity=50000,sigma=1000000, mirrorLocs=data@mirrorLocs, maxmem=1000, verbose=TRUE){
  
  #library(KernSmooth)
        
  if(!is(data, "KcghDataSum")){stop("Need KcghDataSum object as input")}
	
	cl <- unlist(lapply(mirrorLocs, max))
	attr(cl, 'chromNames') <- attr(mirrorLocs, 'chromNames')
	
	spm <- new("samplePointMatrix")
	sigma.4 <- sigma * 4
	nrsamples <- data@nrSamples
	dataChromosomes <- unique(data@probeAnnotation@chromosome)
	chromNames <- attr(cl, 'chromNames')
	
	for(i in dataChromosomes){
		if(verbose){cat(paste("\nProcessing chromosome", i,"\n"))}
		
		chromIndex  <- which(chromNames == i)
		if(length(chromIndex ) == 0){
			chromIndex  = as.numeric(i)
			warning(paste('Could not find chromosome',i,'in mirror locations object'))
		}
		
		all.cprobes <- which(data@probeAnnotation@chromosome == i)
		
    real.first.probe <- grep('first', data@probeAnnotation@name[all.cprobes])
		real.last.probe <- grep('last', data@probeAnnotation@name[all.cprobes])
    
    chromSamplePoints <- length(1:(cl[chromIndex]/sampleDensity))
    xval <- data@probeAnnotation@maploc[all.cprobes]
    
    # The kernel regression is performed here. The KernSmooth packages function
    # locpoly is used. This is a local approximation method.
    # Therefore we use the bkde, a kernel density estimation, on the probe spacing
    # we set areas with very low probe coverage to NA, as the local approximation
    # can cause artifacts to occur in the final result, if the local probe density
    # aproaches 0. The location of the NA's in the final spm is one of the big
    # differences in with the old function and may be the cause for differences
    # in results when using the new version of KCsmart
    #
    # The other difference is that due to the approximation using a Fourier Transform
    # the exact x-coordinates of the sampling grid might be different.
    
    tempPos <- locpoly(x=xval,
      y=data@pos[all.cprobes], 
      gridsize=chromSamplePoints, 
      bandwidth=sigma, range.x=c(0, cl[chromIndex]))

    tempNeg <- locpoly(x=xval,
      y=data@neg[all.cprobes], 
      gridsize=chromSamplePoints,
      bandwidth=sigma, range.x=c(0, cl[chromIndex]))
    
    tempNorm <- bkde(x=as.numeric(xval), kernel = "normal", bandwidth=as.numeric(sigma),
      gridsize = chromSamplePoints, range.x=c(0, cl[chromIndex]))
    # Scale tempNorm between 1 and 0 
    tempNorm$y <- tempNorm$y/max(tempNorm$y)
    
    # Any probe density .05 from the 0 line too sparse, so set to NA
    # So this is basically the nastiest hard coded piece in the code. It can result in some data
    # being set to all NA is there is some flukey massively high density probe region, which
    # results in all the other areas to be less than 5% as dense as the max dense region.
    # for now, I have no good ideas how to set this threshold any better -- but we need to filter
    # out low density areas to prevent weird artifact causing non-existant peaks in the data.
    tempNeg$y[tempNorm$y < .05] <- NA
    tempPos$y[tempNorm$y < .05] <- NA
    
    # Set virtual probes to NA
    
    virtProbes <- (tempPos$x < data@probeAnnotation@maploc[all.cprobes][real.first.probe]) | 
      (tempPos$x > data@probeAnnotation@maploc[all.cprobes][real.last.probe])
    tempPos$y[virtProbes] <- NA
    tempNeg$y[virtProbes] <- NA
    
    spm[[i]] <- list(pos = tempPos$y/nrsamples, neg = tempNeg$y/nrsamples)
    attr(spm[[i]], 'chromosome') <- chromIndex 
 
	}
	
	total <- 0
	maxy <- 0
	miny <- 0

	for(i in 1:length(spm@data)){
		total <- total + length(spm[[i]]$pos)
		if(max(spm[[i]]$pos, na.rm=TRUE) > maxy){
			maxy <- max(spm[[i]]$pos, na.rm=TRUE)
		}
		if(min(spm[[i]]$neg, na.rm=TRUE) < miny){
			miny <- min(spm[[i]]$neg, na.rm=TRUE)
		}
	}
	
	spm@totalLength <- as.integer(total)
	spm@maxy <- maxy
	spm@miny <- miny
	spm@sampleDensity <- as.integer(sampleDensity)
	spm@sigma <- as.integer(sigma)
	spm@mirrorLocs <- data@mirrorLocs

	#attach probeAnnotation to spm, less the virtual probes
	virtualProbes <- grep("virtual", data@probeAnnotation@name)
	spm@probeAnnotation <- data@probeAnnotation[-virtualProbes]
	
	if(verbose){cat("\n\n")}
	
	return(spm)
}

.samplePointMatrixOld <- function(data, sampleDensity=50000,sigma=1000000, mirrorLocs=data@mirrorLocs, maxmem=1000, verbose=TRUE){
        if(!is(data, "KcghDataSum")){stop("Need KcghDataSum object as input")}
	
	cl <- unlist(lapply(mirrorLocs, max))
	attr(cl, 'chromNames') <- attr(mirrorLocs, 'chromNames')
	
	spm <- new("samplePointMatrix")
	sigma.4 <- sigma * 4
	nrsamples <- data@nrSamples
	dataChromosomes <- unique(data@probeAnnotation@chromosome)
	chromNames <- attr(cl, 'chromNames')
	
	#prepare scaffold matrix -> maximal matrix
	sample.points <- seq.int(0,max(cl), by=sampleDensity)
	max.nrprobes <- max(aggregate(data@probeAnnotation@chromosome, by=list(chromosome=data@probeAnnotation@chromosome), length)[,2])
	
	#the maximum number of probes that are allowed to be processed simultaneously, this directly affects memory usage
	#use lower values for less memory usage
	max.nrprobes <- maxmem
	
	sp <- rep.int(sample.points, max.nrprobes)
	scaffold.matrix <- matrix(sp, nrow=max.nrprobes, byrow=TRUE)

	for(i in dataChromosomes){
		if(verbose){cat(paste("\nProcessing chromosome", i,"\n"))}
		
		chromIndex  <- which(chromNames == i)
		if(length(chromIndex ) == 0){
			chromIndex  = as.numeric(i)
			warning(paste('Could not find chromosome',i,'in mirror locations object'))
		}
		
		all.cprobes <- which(data@probeAnnotation@chromosome == i)
		
		#set max chunk size
		chunk.size <- max.nrprobes
		chunks <- c(seq(0,length(all.cprobes), by=chunk.size), length(all.cprobes))
		
		#find virtual probes
		real.first.probe <- grep('first', data@probeAnnotation@name[all.cprobes])
		real.last.probe <- grep('last', data@probeAnnotation@name[all.cprobes])
		virtual.probes <- grep('virtual', data@probeAnnotation@name[all.cprobes])
		virtual.probes <- virtual.probes[((virtual.probes > real.first.probe) & (virtual.probes < real.last.probe))]
		
		c.scaffold.matrix <- scaffold.matrix[1, 1:(cl[chromIndex ]/sampleDensity)]

		#identify virtual probes
		virtual <- c.scaffold.matrix < data@probeAnnotation@maploc[all.cprobes][real.first.probe]
		virtual <- virtual | (c.scaffold.matrix > (data@probeAnnotation@maploc[all.cprobes][real.last.probe]))
		if(length(virtual.probes > 0)){
			virtual <- virtual | ((c.scaffold.matrix > data@probeAnnotation@maploc[all.cprobes][(min(virtual.probes) - 1)]) & (c.scaffold.matrix < (data@probeAnnotation@maploc[all.cprobes][(max(virtual.probes) + 1)])))
		}
		
		stored.cspm <- vector(mode="list")
		stored.w.regression <- vector(length=(cl[chromIndex ]/sampleDensity))
		
		for(current.chunk in 1:(length(chunks) - 1)){
		
			if(verbose){cat(paste("\rProcessing chunk", current.chunk, 'of', (length(chunks) - 1), '(',(chunks[current.chunk] + 1),'-', chunks[current.chunk + 1],')'))}
			cprobes <- all.cprobes[(chunks[current.chunk]+1):chunks[current.chunk + 1]]
			nrprobes <- length(cprobes)
		
			cspm <- scaffold.matrix[1:nrprobes, 1:(cl[chromIndex ]/sampleDensity), drop=FALSE]

			probe.locs <- data@probeAnnotation@maploc[cprobes]
			cspm <- abs(cspm - probe.locs)
			
			cspm[cspm > sigma.4] <- NA
			cspm[,virtual] <- NA
			
			cspm <- dnorm(cspm, mean=0, sd=sigma)
			cspm[is.na(cspm)] <- 0
			cspm.pos <- cspm * data@pos[cprobes]
			cspm.neg <- cspm * data@neg[cprobes]
			
			w.regression <- colSums(cspm, na.rm=TRUE) * nrsamples
			
			stored.w.regression <- rbind(stored.w.regression, w.regression)
			rm(cspm)
			rm(w.regression)
			
			csspm.pos <- colSums(cspm.pos, na.rm=TRUE)
			csspm.neg <- colSums(cspm.neg, na.rm=TRUE)
			rm(cspm.pos)
			rm(cspm.neg)
			
			stored.cspm$pos <- rbind(stored.cspm$pos, csspm.pos)
			stored.cspm$neg <- rbind(stored.cspm$neg, csspm.neg)
			
			rm(csspm.pos)
			rm(csspm.neg)
		}
		
		stored.w.regression <- colSums(stored.w.regression, na.rm=TRUE)
		stored.cspm$pos <- colSums(stored.cspm$pos, na.rm=TRUE) / stored.w.regression
		stored.cspm$neg <- colSums(stored.cspm$neg, na.rm=TRUE) / stored.w.regression

		spm[[i]] <- list(pos = stored.cspm$pos, neg = stored.cspm$neg)
		
		attr(spm[[i]], 'chromosome') <- chromIndex 
		rm(stored.cspm)
		
		#gc()
	}
	
	total <- 0
	maxy <- 0
	miny <- 0

	for(i in 1:length(spm@data)){
		total <- total + length(spm[[i]]$pos)
		if(max(spm[[i]]$pos, na.rm=TRUE) > maxy){
			maxy <- max(spm[[i]]$pos, na.rm=TRUE)
		}
		if(min(spm[[i]]$neg, na.rm=TRUE) < miny){
			miny <- min(spm[[i]]$neg, na.rm=TRUE)
		}
	}
	
	spm@totalLength <- as.integer(total)
	spm@maxy <- maxy
	spm@miny <- miny
	spm@sampleDensity <- as.integer(sampleDensity)
	spm@sigma <- as.integer(sigma)
	spm@mirrorLocs <- data@mirrorLocs

	#attach probeAnnotation to spm, less the virtual probes
	virtualProbes <- grep("virtual", data@probeAnnotation@name)
	spm@probeAnnotation <- data@probeAnnotation[-virtualProbes]
	
	if(verbose){cat("\n\n")}
	
	return(spm)
}

getSigSegments <- function(spm, sigLevels, chromosomes=NULL){
    if(!is(spm, "samplePointMatrix")){stop("Need samplePointMatrix object as input")}
    
	sigSegments <- new("sigSegments")
	
	sampleDensity <- spm@sampleDensity
	mirrorLocs <- spm@mirrorLocs
	
	if(is.null(chromosomes)){
		chromosomes <- names(spm@data)
		chromNames <- attr(mirrorLocs, 'chromNames')
		chromosomesOrdered <- chromNames[chromNames %in% chromosomes]
		chromosomesOrdered <- c(chromosomesOrdered, chromosomes[!(chromosomes %in% chromNames)])
		chromosomes <- chromosomesOrdered
	}
	else{
		chromosomes <- as.character(chromosomes)
		spmLengths <- unlist(lapply(spm@data, function(x){return(length(x$pos))}))
		total <- sum(spmLengths[chromosomes])
	}
	
	for(i in chromosomes){
		cprobes <- (spm@probeAnnotation@chromosome == i)
		
		#gains
		sigGains <- spm[[i]]$pos >= sigLevels$pos
		
		if(length(which(sigGains)) > 0){
		
		#now get chromosome region
		#t <- diff(sigGains)
		#t2 <- t - c(2,t[-c(length(t))])
		#t2[1] <- 1
		#t2[length(t2)] <- 1
		#t3 <- which(t2 != 0)
		#t <- t3
		
		t <- .getPeaks(sigGains)
		
		for(j in seq(1,(length(t) - 1), by=2)){
			currentSegment <- vector(mode="list")
			startPosition <- t[j]
			end.position <- t[j+1]
			currentSegment$chromosome <- i
			currentSegment$x <- startPosition:end.position
			currentSegment$y <- signif(spm[[i]]$pos[startPosition:end.position],4)
			currentSegment$avgy <- signif(mean(currentSegment$y, na.rm=TRUE), 4)
			currentSegment$modey <- signif(max(currentSegment$y, na.rm=TRUE), 4)
			currentSegment$start <- (startPosition * sampleDensity) - sampleDensity
			currentSegment$end <- (end.position * sampleDensity) - sampleDensity
			currentSegment$probes <- which((spm@probeAnnotation@maploc[cprobes] >= currentSegment$start) & (spm@probeAnnotation@maploc[cprobes] <= currentSegment$end))
			currentSegment$probenames <- spm@probeAnnotation@name[cprobes][currentSegment$probes]
			sigSegments@gains <- c(sigSegments@gains, list(currentSegment))
		}
		}
		
		
		#losses
		sigLosses <- spm[[i]]$neg <= sigLevels$neg
		
		if(length(which(sigLosses)) > 0){
		
		t <- .getPeaks(sigLosses)
		
		#sigSegments[[i]]$negsegments <- vector(mode='list', length=length(t)/2)

		for(j in seq(1,(length(t) - 1), by=2)){
			currentSegment <- vector(mode="list")
			startPosition <- t[j]
			end.position <- t[j+1]
			currentSegment$chromosome <- i
			currentSegment$x <- startPosition:end.position
			currentSegment$y <- signif(spm[[i]]$neg[startPosition:end.position], 4)
			currentSegment$avgy <- signif(mean(currentSegment$y, na.rm=TRUE), 4)
			currentSegment$modey <- signif(min(currentSegment$y, na.rm=TRUE), 4)
			currentSegment$start <- (startPosition * sampleDensity) - sampleDensity
			currentSegment$end <- (end.position * sampleDensity) - sampleDensity
			currentSegment$probes <- which((spm@probeAnnotation@maploc[cprobes] >= currentSegment$start) & (spm@probeAnnotation@maploc[cprobes] <= currentSegment$end))
			currentSegment$probenames <- spm@probeAnnotation@name[cprobes][currentSegment$probes]
			
			sigSegments@losses <- c(sigSegments@losses, list(currentSegment))
		}
		}
	}
	
	sigSegments@sigma <- spm@sigma
	sigSegments@sigLevels <- sigLevels
	return(sigSegments)
}

.getPeaks <- 	function(x){
		x[is.na(x)] <- F
		trans_vec <- diff(x)
		start_pos <- which(trans_vec == 1) + 1
		end_pos <- which(trans_vec == -1)
		
		#fix for regions starting at first or ending at last position
		if(x[1]){start_pos <- c(1, start_pos)}
		if(x[length(x)]){end_pos <- c(end_pos, length(x))}
		
		peakIntervals <- as.vector(rbind(start_pos, end_pos))
		
		return(peakIntervals)		
}

.getSigRegions <- function(spm, sigLevels, chromosomes=NULL){
	sigRegions <- new("sigRegions")
	mirrorLocs <- spm@mirrorLocs
	
	if(is.null(chromosomes)){
		chromosomes <- names(spm@data)
		chromNames <- attr(mirrorLocs, 'chromNames')
		chromosomesOrdered <- chromNames[chromNames %in% chromosomes]
		chromosomesOrdered <- c(chromosomesOrdered, chromosomes[!(chromosomes %in% chromNames)])
		chromosomes <- chromosomesOrdered
	}
	else{
		chromosomes <- as.character(chromosomes)
		spmLengths <- unlist(lapply(spm@data, function(x){return(length(x$pos))}))
		total <- sum(spmLengths[chromosomes])
	}
	
	for(i in chromosomes){
		sigRegions[[i]]$posx <- which(spm[[i]]$pos >= sigLevels$pos)
		sigRegions[[i]]$posy <- spm[[i]]$pos[sigRegions[[i]]$posx]
		sigRegions[[i]]$negx <- which(spm[[i]]$neg <= sigLevels$neg)
		sigRegions[[i]]$negy <- spm[[i]]$neg[sigRegions[[i]]$negx]
		chromIndex  <- which(attr(mirrorLocs, 'chromNames') == i)
		if(length(chromIndex ) == 0){
			chromIndex  = as.numeric(i)
			warning(paste('Could not find chromosome',i,'in mirror locations object'))
		}
		attr(sigRegions[[i]], 'chromosome') <- chromIndex 
	}
	
	sigRegions@sigma <- spm@sigma
	return(sigRegions)
}

#plot functions
plotScaleSpace <- function(spms, sigLevels, chromosomes=NULL, type='b'){
	mirrorLocs <- spms[[1]]@mirrorLocs
	scaleSpace <- new("scaleSpace")
	
	for(i in 1:length(spms)){
		#since only calculating the gains or the losses (instead of both) in the .getSigRegions function hardly makes any difference at all (performance-wise)
		#the 'type' distinction is not propagated into that function
		scaleSpace@data[[i]] <- KCsmart:::.getSigRegions(spms[[i]], sigLevels[[i]], chromosomes)
	}
	
	#plot sig regions, pass any spm as argument
	plot(scaleSpace, spm=spms[[1]], type=type)
}

idPoints <- function(spm, mode='pos', dev=2, chromosomes=NULL){
	options(locatorBell = FALSE)
	if(!is.null(chromosomes)){
		chromosomes <- as.character(chromosomes)
	}
	else{
		chromosomes <- names(spm@data)
		mirrorLocs <- spm@mirrorLocs
		chromNames <- attr(mirrorLocs, 'chromNames')
		chromosomesOrdered <- chromNames[chromNames %in% chromosomes]
		chromosomesOrdered <- c(chromosomesOrdered, chromosomes[!(chromosomes %in% chromNames)])
		chromosomes <- chromosomesOrdered
	}
	
	spmLengths <- unlist(lapply(spm@data, function(x){return(length(x$pos))}))
	spmLengths <- spmLengths[chromosomes]
	colin <- cumsum(spmLengths)
	
	yvalues <- vector()
	for(i in chromosomes){
		if(mode=='pos'){
			yvalues <- c(yvalues, signif(spm[[i]]$pos, 3))
		}
		else{
			yvalues <- c(yvalues, signif(spm[[i]]$neg, 3))
		}
	}
	totalLength <- length(yvalues)
	
	dev.set(dev)
	identifiedPoints <- identify(seq(1,totalLength), yvalues, labels=yvalues)
	if(length(identifiedPoints) > 0){
		selectedy <- yvalues[identifiedPoints]
		selectedPoints <- data.frame(colin=identifiedPoints, KCscore=selectedy)
		row.names(selectedPoints) <- NULL
		#get chromosome + chromosome position
		chromFind <- apply(matrix(selectedPoints$colin),1, function(x){return(abs(x-colin))})
		chromFind <- apply(matrix(chromFind, ncol=length(selectedPoints$colin)), 2, which.min)
		
		chromDistance <- selectedPoints$colin - colin[chromFind]
		chromFind[chromDistance > 0] <- chromFind[chromDistance > 0] + 1
		selectedPoints$chromosome <- chromosomes[chromFind]
		selectedPoints$chromPosition <- (selectedPoints$colin - c(0,colin)[chromFind]) * (spm@sampleDensity / 1000)
		selectedPoints <- selectedPoints[,c('KCscore', 'chromosome','chromPosition','colin')]
		return(selectedPoints)
	}
}

compareSpmCollection <- function(spmCollection, nperms=20, method=c("siggenes", "perm"), siggenes.args=NULL, altcl=NULL) {

	method <- match.arg(method)
	stopifnot(is(spmCollection,"spmCollection"))
	if(!is.null(altcl)){
		if((sum(altcl == 0 | altcl == 1)) != length(spmCollection@cl)){stop('Invalid class vector given')}
		else{
			spmCollection@cl <- altcl
		}
	}

	res <- switch(method,
		siggenes = KCsmart:::.comparativeKcSiggenes(spmCollection@data, spmCollection@cl, nperms=nperms, siggenes.args),
		perm = KCsmart:::.comparativeKcPerms(spmCollection@data, spmCollection@cl , nperms=nperms))

	new("compKc", spmCollection, method, res)
}

getSigRegionsCompKC <- function(compKc, fdr=.01, maxRegionGap=10) {

	stopifnot(is(compKc,"compKc"))

	#prepare the result stuff
	siglist <- list()
	
	if(compKc@method == "siggenes") {
		#get all dvalues from sam
		a <- compKc@siggenesResult
		siglist$testval <- a@d
		#find delat, suppress output
		sink(tempfile())
		deltas <- findDelta(a,fdr=fdr)
		sink()
		siglist$cutoff <- ifelse(is.matrix(deltas), deltas[2,1], deltas[1])
		siggeneslist <- summary(a, siglist$cutoff)
		siglist$issignificant <- 1:nrow(compKc@spmCollection@data) %in% siggeneslist@mat.sig$Row
	}
	else {
		siglist$cutoff <- KCsmart:::.findfdrcutoff(compKc@snrResult@permutations, compKc@snrResult@snrValues, fdr)
		siglist$testval <- compKc@snrResult@snrValues
		siglist$issignificant <- abs(siglist$testval) > siglist$cutoff
	}
	
	#replace NA in sig vector with FALSE
	siglist$issignificant[is.na(siglist$issignificant)] <- FALSE
	#determine regions
	regions <- KCsmart:::.getRegions(siglist$issignificant, maxRegionGap)

	#add annotation to regiontable
	regions$startchrom <- compKc@spmCollection@annotation@chromosome[regions$startrow]
	regions$endchrom <- compKc@spmCollection@annotation@chromosome[regions$endrow]

	#split regions on chromsome border
	if(any(regions$startchrom != regions$endchrom)) {
		tosplit <- subset(regions, regions$startchrom != regions$endchrom)
		notsplit <- subset(regions, regions$startchrom == regions$endchrom)
	
		for(i in 1:nrow(tosplit)) {
			s <- tosplit[i,]
			done <- F
			repeat {
				chendrow <- max(which(compKc@spmCollection@annotation@chromosome == s$startchrom))
				notsplit <- rbind(notsplit, c(s$startrow, chendrow, s$startchrom, s$startchrom))
				if(done) break;

				s$startchrom <- compKc@spmCollection@annotation@chromosome[chendrow+1]
				s$startrow <- chendrow+1

				if(s$startchrom == s$endchrom) done <- T
			}
		}
		#sort table
		regions <- notsplit[order(notsplit$startrow),]
		
	}
	regions$startrow <- as.numeric(regions$startrow)
  regions$endrow <- as.numeric(regions$endrow)
    
	startposition <- compKc@spmCollection@annotation@maploc[regions$startrow]
	endposition <- compKc@spmCollection@annotation@maploc[regions$endrow]

	new("compKcSigRegions", 
	   regionTable=data.frame(startrow=regions$startrow, endrow=regions$endrow, chromosome=regions$startchrom, startposition, endposition), 
	   method=compKc@method, fdr=fdr, cutoff=siglist$cutoff)
}

.comparativeKcSiggenes <- function(data, cl, nperms, args=NULL) {
	require(siggenes)

	callargs <- c(list(data, cl, B=nperms), args)
	do.call(sam, callargs)
}

.comparativeKcPerms <- function(data, cl, nperms) {

	snrresults <- KCsmart:::.snr(data[,cl==0], data[,cl==1])

	#prepare the permutations
	perms <- KCsmart:::.makePermutations(sum(cl==0), sum(cl==1), nperms)

	m <- matrix(NA, nrow=nrow(data), ncol=length(perms))
	for(p in 1:length(perms)) {
		m[,p] <- KCsmart:::.snr(data[,perms[[p]]$a], data[,perms[[p]]$b], s0=snrresults$s0)
	}
	
	new("snrResult", snrValues=snrresults$snr, fudge=snrresults$s0, permutations=m)
}

.makePermutations <- function(sizeA, sizeB, nperms=2000) {
	#How many perms are there?
	totalperms <- choose(sizeA+sizeB, sizeA)
	n <- sizeA+sizeB
	a <- 1:sizeA
	b <- (sizeA+1):n

	#FIXME: if totalperms !>> nperms we should select perms from allperms to avoid duplicates
	if(totalperms > nperms) {
		return(lapply(1:nperms, function(x){z <- sample(n); return(list(a=z[a], b=z[b]))}))
	}
	else {
		warning("There are only ", totalperms," permutations. Returning all")
		#TODO: implemtent all perms

	}
}

.snr <- function(a, b, s0=NULL) {
	n1 <- ncol(a)
	n2 <- ncol(b)
	
	n1row <- rep(n1, nrow(a)) - rowSums(is.na(a))
	n2row <- rep(n2, nrow(a)) - rowSums(is.na(b))

	m1 <- rowMeans(a,na.rm=T)
	m2 <- rowMeans(b,na.rm=T)

	var1 <- KCsmart:::.varr(a, meanx=m1, n1row)
	var2 <- KCsmart:::.varr(b, meanx=m2, n2row)
	
	sigma12 <- sqrt( (var1/n1row) +  (var2/n2row ) )

	if(is.null(s0)){
		s0 <- quantile(sigma12, .95, na.rm=T)
		return(list(snr=(m1-m2)/(sigma12 + s0), s0=s0))
	} else {
		return(	(m1-m2)/(sigma12 + s0))
	}
}

.varr <- function(x, meanx, nna=NULL) {
	n <- ncol(x)
	p <- nrow(x)
	Y <-matrix(1,nrow=1,ncol=n)
	#calc nna vector if not provided
	if(is.null(nna)) nna <- rep(n, p) - rowSums(is.na(x))
	nnam <- (1/(nna-1))
	xdif <- x - (meanx %*% Y)
	ans <- rowSums(xdif^2, na.rm=T) * nnam
	ans[nnam==1] <- NA
	drop(ans)
}

.findfdrcutoff <- function(permdata, realdata, fdr=.01, tails=c("both", "up", "down")) {
	tails <- match.arg(tails)

	if(tails == "down") {
		realdata <- -realdata
		permdata <- -permdata
	}

	if(tails == "both") {
		permdata <- abs(permdata)
	}

	#sort by default removes NA values
	sr <- switch(tails, 
	   both=sort(abs(realdata), decreasing=T),
	   sort(realdata, decreasing=T))

	l <- length(sr)
	#pick the 50% point to test for fdr
	p <- floor(l/2)

	cat("Start fdr test value =", sr[p])
	tfdr <- mean(colSums(permdata > sr[p], na.rm=T))/p
	cat(sr[p]," fdr =", tfdr,"\n")
	stepfraction <- 0.5
	stepsize <- ceiling(p*stepfraction)
	up <- 0
	while(stepsize > 1) {
		#up or down?
		newup <- ifelse(( tfdr - fdr) > 0,0,1)
		if(newup != up) stepfraction <- stepfraction/2
		up <- newup
		stepsize <- ceiling(p*stepfraction)

		p <- ifelse(up == 0,p-stepsize, p+stepsize)
		tfdr <- mean(colSums(abs(permdata) > sr[p], na.rm=T))/p
		cat("p=", p, "step=", stepsize, ", ", sr[p], " fdr =", tfdr,"\n")
	}
	return (sr[p])
}

.getRegions <- function(v, allowedGapsize=10) {
		ss <- c(0,v) - c(v,0)
		starts <- which(ss == -1)
		ends <- which(ss == 1)-1
		if(length(starts) == 0) {
			return(data.frame())
		}
		
		if(length(starts) == 1) {
			return(data.frame(startrow=starts, endrow=ends))
		}
		gapsizes <- c(0, starts[-1] - ends[-length(ends)] -1)

		joinedstarts <- numeric()
		joinedends <- numeric()
		pos <- 1
		ingap <- 0
		
		if(allowedGapsize > 0) {
			for(i in 1:(length(starts)-1)) {
				if(ingap == 0) joinedstarts[pos] <- starts[i]

				if(gapsizes[i+1] <= allowedGapsize) {
					joinedends[pos] <- ends[i+1]
					ingap <- 1
				}
				else {
					joinedends[pos] <- ends[i]
					ingap <- 0
					pos <- pos+1
					if(i == (length(starts)-1)) {
						joinedstarts[pos] <- starts[i+1]
						joinedends[pos] <- ends[i+1]
					}
				}
			}
			return(data.frame(startrow=joinedstarts, endrow=joinedends)) 
		}
	return(data.frame(startrow=starts, endrow=ends)) 
}

Try the KCsmart package in your browser

Any scripts or data that you put into this service are public.

KCsmart documentation built on Nov. 8, 2020, 7:08 p.m.