R/dataManagementFunctions.r

Defines functions ExpressionSet2order ExpressionSet2subset getSegFeatures matchAnn2Ann uniqGenomicInfo matchCGHcall2ExpressionSet ExpressionSet2weightedSubset expandMatching2singleIDs nBreakpoints merge2ExpressionSets splitMatchingAtBreakpoints cghSeg2order cghCall2order merge2cghCalls cghCall2subset cghCall2maximumSubset cghCall2weightedSubset cghCall2maximumSubset

Documented in cghCall2maximumSubset cghCall2order cghCall2subset cghCall2weightedSubset cghSeg2order expandMatching2singleIDs ExpressionSet2order ExpressionSet2subset ExpressionSet2weightedSubset getSegFeatures matchAnn2Ann matchCGHcall2ExpressionSet merge2cghCalls merge2ExpressionSets nBreakpoints splitMatchingAtBreakpoints uniqGenomicInfo

cghCall2maximumSubset <- function(CNdata, featuresAndWeights, chr, bpstart, bpend, ncpus=1, verbose=TRUE){
   	############################################################################
	# function shrinks cghCall-object to a subset of the features
	############################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if (as.character(class(CNdata)) != "cghCall"){ stop("CNdata not of class cghCall.") }
	# if (!(as.character(class(featureSubset))=="numeric" | as.character(class(featureSubset))=="integer" )){ stop("featureSubset of wrong class.") }
	# if (!(as.character(class(weights))=="numeric" | as.character(class(weights))=="integer" )){ stop("weights of wrong class.") }
	# if ( !(length(featureSubset) >= 1) ){ stop("featureSubset contains no row numbers.") }
	# if ( !(min(featureSubset) >= 1 | max(featureSubset) <= nrow(fData(CNdata))) ){ stop("featureSubset contains illegal row numbers.") }
	# if ( length(featureSubset) != length(weights) ){ stop("featureSubset and weights of unequal length.") }

	if (!(as.character(class(chr))=="numeric" | as.character(class(chr))=="integer" )){ stop("chr of wrong class.") }
	if (length(chr) != 1){ stop("chr of wrong length.") }
	if ( !(chr >= 1 | chr <= ncol(fData(CNdata))) ){ stop("chr exceeds number of columns in featureData.") }
	if (!(as.character(class(bpstart))=="numeric" | as.character(class(bpstart))=="integer" )){ stop("bpstart of wrong class.") }
	if (length(bpstart) != 1){ stop("bpstart of wrong length.") }
	if ( !(bpstart >= 1 | bpstart <= ncol(fData(CNdata))) ){ stop("bpstart exceeds number of columns in featureData.") }
	if (!(as.character(class(bpend))=="numeric" | as.character(class(bpend))=="integer" )){ stop("bpend of wrong class.") }
	if (length(bpend) != 1){ stop("bpend of wrong length.") }
	if ( !(bpend >= 1 | chr <= ncol(fData(CNdata))) ){ stop("bpend exceeds number of columns in featureData.") }

	# disecting the cghCall-object
	amplifications <- FALSE
	copynumberMat <- CGHbase::copynumber(CNdata)
	segmentedMat <- CGHbase::segmented(CNdata)
	problossMat <- CGHbase::probloss(CNdata)
	probnormMat <- CGHbase::probnorm(CNdata)
	probgainMat <- CGHbase::probgain(CNdata)
	if (!is.null(CGHbase::probamp(CNdata))){ probampMat <- CGHbase::probamp(CNdata); amplifications <- TRUE }
	if (!is.null(CGHbase::probdloss(CNdata))){ probdlossMat <- CGHbase::probdloss(CNdata); doubleLoss <- TRUE }
	CNann <- fData(CNdata)
	rm(CNdata)
	gc()

	# identify maximum deviating features
	if (verbose){ cat("identify maximum deviating features ...", "\n") }
	if (ncpus == 1){ idMat <- t(sapply(featuresAndWeights, .slhFuncMaxCGHcallData, segmentedMat, simplify=TRUE)) }
	if (ncpus > 1){ 
		sfInit(parallel=TRUE, cpus=ncpus)
		sfLibrary(package="sigaR", verbose=FALSE, character.only=TRUE)
		idMat <- t(sfSapply(featuresAndWeights, .slhFuncMaxCGHcallData, segmentedMat, simplify=TRUE)) 
	}

	# subsetting of segmented data
	if (verbose){ cat("generate subsetted matrices ...", "\n") }
	if (ncpus == 1){ copynumberMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, copynumberMat, simplify=TRUE)) }
	if (ncpus > 1){ copynumberMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, copynumberMat, simplify=TRUE)) }
	if (ncpus == 1){ segmentedMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, segmentedMat, simplify=TRUE)) }
	if (ncpus > 1){ segmentedMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, segmentedMat, simplify=TRUE)) }
	if (ncpus == 1){ problossMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, problossMat, simplify=TRUE)) }
	if (ncpus > 1){ problossMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, problossMat, simplify=TRUE)) }
	if (ncpus == 1){ probnormMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probnormMat, simplify=TRUE)) }
	if (ncpus > 1){ probnormMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probnormMat, simplify=TRUE)) }
	if (ncpus == 1){ probgainMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probgainMat, simplify=TRUE)) }
	if (ncpus > 1){ probgainMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probgainMat, simplify=TRUE)) }
	if (amplifications){ 
		if (ncpus == 1){ probampMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probampMat, simplify=TRUE)) }
		if (ncpus > 1){ probampMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probampMat, simplify=TRUE)) }
	}
	if (doubleLoss){ 
		if (ncpus == 1){ probdlossMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probdlossMat, simplify=TRUE)) }
		if (ncpus > 1){ probdlossMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probdlossMat, simplify=TRUE)) }
	}

	# subsetting of call data
	if (verbose){ cat("generate subsetted call matrices ...", "\n")	}
	callsMat <- numeric()
	for (i in 1:ncol(problossMat)){
		if (!amplifications & !doubleLoss){
			if (ncpus == 1){ callsMat <- cbind(callsMat, apply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i]), 1, which.max) - 2) }
			if (ncpus > 1){ callsMat <- cbind(callsMat, sfApply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i]), 1, which.max) - 2) }
		} 
		if (amplifications & !doubleLoss){
			if (ncpus == 1){ callsMat <- cbind(callsMat, apply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i], probampMat[,i]), 1, which.max) - 2) }
			if (ncpus > 1){ callsMat <- cbind(callsMat, sfApply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i], probampMat[,i]), 1, which.max) - 2) }
		}
		if (!amplifications & doubleLoss){
			if (ncpus == 1){ callsMat <- cbind(callsMat, apply(cbind(probdlossMat[,i], problossMat[,i], probnormMat[,i], probgainMat[,i]), 1, which.max) - 3) }
			if (ncpus > 1){ callsMat <- cbind(callsMat, sfApply(cbind(probdlossMat[,i], problossMat[,i], probnormMat[,i], probgainMat[,i]), 1, which.max) - 3) }
		}
		if (amplifications & doubleLoss){
			if (ncpus == 1){ callsMat <- cbind(callsMat, apply(cbind(probdlossMat[,i], problossMat[,i], probnormMat[,i], probgainMat[,i], probampMat[,i]), 1, which.max) - 3) }
			if (ncpus > 1){ callsMat <- cbind(callsMat, sfApply(cbind(probdlossMat[,i], problossMat[,i], probnormMat[,i], probgainMat[,i], probampMat[,i]), 1, which.max) - 3) }
		}
	}	

	# weighted subsetting of annotation data
	if (verbose){ cat("reorganize annotation ...", "\n") }
	if (ncpus == 1){ annotation <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetAnnotation, CNann, chr, bpstart, bpend, simplify=TRUE)) }
	if (ncpus > 1){	annotation <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetAnnotation, CNann, chr, bpstart, bpend, simplify=TRUE)) }
	colnames(annotation) <- c("Chromosome", "Start", "End")
	if (ncpus == 1){ rownames(annotation) <- as.character(sapply(featuresAndWeights, .slhFuncCombineFeatureNames, rownames(CNann), simplify=TRUE)) }
	if (ncpus > 1){	rownames(annotation) <- as.character(sfSapply(featuresAndWeights, .slhFuncCombineFeatureNames, rownames(CNann), simplify=TRUE)) }
	if (length(rownames(annotation)) != length(unique(rownames(annotation)))){
		warning("modified feature names are returned")
		newRowNames <- rownames(annotation)
		slh <- rep(1, length(newRowNames))
		for (i in 1:length(unique(newRowNames))){
			ids <- which(newRowNames == unique(newRowNames)[i])
			slh[ids] <- 1:length(ids)
		}
		newRowNames <- paste(newRowNames, slh, sep="__")		
		rownames(annotation) <- newRowNames
	}
	fInfo <- new("AnnotatedDataFrame", data=data.frame(annotation), varMetadata=data.frame(labelDescription=c("Chromosome", "Start", "End")))
	if (ncpus > 1){ sfStop() }

	rownames(copynumberMat) <- rownames(annotation)
	rownames(segmentedMat) <- rownames(annotation)
	rownames(problossMat) <- rownames(annotation)
	rownames(probnormMat) <- rownames(annotation)
	rownames(probgainMat) <- rownames(annotation)
	rownames(callsMat) <- rownames(annotation)
	if (amplifications){ rownames(probampMat) <- rownames(annotation) }
	if (doubleLoss){ rownames(probdlossMat) <- rownames(annotation) }

	# make cghCall object
	if (verbose){ cat("merge into cghCall object ...", "\n") }
	if (!amplifications & !doubleLoss){ 
		CNdata <- new('cghCall', 
			copynumber = data.matrix(data.frame(copynumberMat, row.names=rownames(annotation))), 
			segmented = data.matrix(data.frame(segmentedMat, row.names=rownames(annotation))), 
			calls = data.matrix(data.frame(callsMat, row.names=rownames(annotation))), 
			probloss = data.matrix(data.frame(problossMat, row.names=rownames(annotation))), 
			probnorm = data.matrix(data.frame(probnormMat, row.names=rownames(annotation))), 
			probgain = data.matrix(data.frame(probgainMat, row.names=rownames(annotation))), 
			featureData = fInfo) 
	}
	if (amplifications & !doubleLoss){ 
		CNdata <- new('cghCall', 
			copynumber = data.matrix(data.frame(copynumberMat, row.names=rownames(annotation))), 
			segmented = data.matrix(data.frame(segmentedMat, row.names=rownames(annotation))), 
			calls = data.matrix(data.frame(callsMat, row.names=rownames(annotation))), 
			probloss = data.matrix(data.frame(problossMat, row.names=rownames(annotation))), 
			probnorm = data.matrix(data.frame(probnormMat, row.names=rownames(annotation))), 
			probgain = data.matrix(data.frame(probgainMat, row.names=rownames(annotation))), 
			probamp = data.matrix(data.frame(probampMat, row.names=rownames(annotation))), 
			featureData = fInfo) 
	}
	if (!amplifications & doubleLoss){ 
		CNdata <- new('cghCall', 
			copynumber = data.matrix(data.frame(copynumberMat, row.names=rownames(annotation))), 
			segmented = data.matrix(data.frame(segmentedMat, row.names=rownames(annotation))), 
			calls = data.matrix(data.frame(callsMat, row.names=rownames(annotation))), 
			probdloss = data.matrix(data.frame(probdlossMat, row.names=rownames(annotation))), 
			probloss = data.matrix(data.frame(problossMat, row.names=rownames(annotation))), 
			probnorm = data.matrix(data.frame(probnormMat, row.names=rownames(annotation))), 
			probgain = data.matrix(data.frame(probgainMat, row.names=rownames(annotation))), 
			featureData = fInfo) 
	}
	if (amplifications & doubleLoss){ 
		CNdata <- new('cghCall', 
			copynumber = data.matrix(data.frame(copynumberMat, row.names=rownames(annotation))), 
			segmented = data.matrix(data.frame(segmentedMat, row.names=rownames(annotation))), 
			calls = data.matrix(data.frame(callsMat, row.names=rownames(annotation))), 
			probdloss = data.matrix(data.frame(probdlossMat, row.names=rownames(annotation))), 
			probloss = data.matrix(data.frame(problossMat, row.names=rownames(annotation))), 
			probnorm = data.matrix(data.frame(probnormMat, row.names=rownames(annotation))), 
			probgain = data.matrix(data.frame(probgainMat, row.names=rownames(annotation))), 
			probamp = data.matrix(data.frame(probampMat, row.names=rownames(annotation))), 
			featureData = fInfo) 
	}
	return(CNdata)
}






cghCall2weightedSubset <- function(CNdata, featuresAndWeights, chr, bpstart, bpend, ncpus=1, verbose=TRUE){
   	############################################################################
	# function shrinks cghCall-object to a subset of the features
	############################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if (as.character(class(CNdata)) != "cghCall"){ stop("CNdata not of class cghCall.") }
	# if (!(as.character(class(featureSubset))=="numeric" | as.character(class(featureSubset))=="integer" )){ stop("featureSubset of wrong class.") }
	# if (!(as.character(class(weights))=="numeric" | as.character(class(weights))=="integer" )){ stop("weights of wrong class.") }
	# if ( !(length(featureSubset) >= 1) ){ stop("featureSubset contains no row numbers.") }
	# if ( !(min(featureSubset) >= 1 | max(featureSubset) <= nrow(fData(CNdata))) ){ stop("featureSubset contains illegal row numbers.") }
	# if ( length(featureSubset) != length(weights) ){ stop("featureSubset and weights of unequal length.") }

	if (!(as.character(class(chr))=="numeric" | as.character(class(chr))=="integer" )){ stop("chr of wrong class.") }
	if (length(chr) != 1){ stop("chr of wrong length.") }
	if ( !(chr >= 1 | chr <= ncol(fData(CNdata))) ){ stop("chr exceeds number of columns in featureData.") }
	if (!(as.character(class(bpstart))=="numeric" | as.character(class(bpstart))=="integer" )){ stop("bpstart of wrong class.") }
	if (length(bpstart) != 1){ stop("bpstart of wrong length.") }
	if ( !(bpstart >= 1 | bpstart <= ncol(fData(CNdata))) ){ stop("bpstart exceeds number of columns in featureData.") }
	if (!(as.character(class(bpend))=="numeric" | as.character(class(bpend))=="integer" )){ stop("bpend of wrong class.") }
	if (length(bpend) != 1){ stop("bpend of wrong length.") }
	if ( !(bpend >= 1 | chr <= ncol(fData(CNdata))) ){ stop("bpend exceeds number of columns in featureData.") }

	# disecting the cghCall-object
	amplifications <- FALSE
	doubleLoss <- FALSE
	copynumberMat <- copynumber(CNdata)
	segmentedMat <- segmented(CNdata)
	problossMat <- probloss(CNdata)
	probnormMat <- probnorm(CNdata)
	probgainMat <- probgain(CNdata)
	if (!is.null(probamp(CNdata))){ probampMat <- probamp(CNdata); amplifications <- TRUE }
	if (!is.null(CGHbase::probdloss(CNdata))){ probdlossMat <- CGHbase::probdloss(CNdata); doubleLoss <- TRUE }
	CNann <- fData(CNdata)
	rm(CNdata)
	gc()

	# weighted subsetting of raw copy number data
	if (verbose){ cat("generate subsetted raw copy number matrix ...", "\n") }
	if (ncpus == 1){ copynumberMat <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, copynumberMat, simplify=TRUE))	}
	if (ncpus > 1){
		sfInit(parallel=TRUE, cpus=ncpus)
		sfLibrary(package="sigaR", verbose=FALSE, character.only=TRUE)
		copynumberMat <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, copynumberMat, simplify=TRUE))	
	}

	# weighted subsetting of segmented data
	if (verbose){ cat("generate subsetted segmented matrix ...", "\n") }
	if (ncpus == 1){ segmentedMat <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, segmentedMat, simplify=TRUE)) }
	if (ncpus > 1){ segmentedMat <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, segmentedMat, simplify=TRUE)) }

	# weighted subsetting of call probability data
	if (verbose){ cat("generate subsetted call probability matrices ...", "\n") }
	if (ncpus == 1){ problossMat <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, problossMat, simplify=TRUE)) }
	if (ncpus > 1){ problossMat <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, problossMat, simplify=TRUE)) }
	if (ncpus == 1){ probnormMat <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, probnormMat, simplify=TRUE)) }
	if (ncpus > 1){ probnormMat <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, probnormMat, simplify=TRUE)) }
	if (ncpus == 1){ probgainMat <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, probgainMat, simplify=TRUE)) }
	if (ncpus > 1){ probgainMat <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, probgainMat, simplify=TRUE)) }
	if (amplifications){ 
		if (ncpus == 1){ probampMat <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, probampMat, simplify=TRUE)) }
		if (ncpus > 1){ probampMat <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, probampMat, simplify=TRUE)) }
	}
	if (doubleLoss){ 
		if (ncpus == 1){ probdlossMat <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, probdlossMat, simplify=TRUE)) }
		if (ncpus > 1){ probdlossMat <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, probdlossMat, simplify=TRUE)) }
	}

	# subsetting of call data
	if (verbose){ cat("generate subsetted call matrices ...", "\n")	}
	callsMat <- numeric()
	for (i in 1:ncol(problossMat)){
		if (!amplifications & !doubleLoss){
			if (ncpus == 1){ callsMat <- cbind(callsMat, apply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i]), 1, which.max) - 2) }
			if (ncpus > 1){ callsMat <- cbind(callsMat, sfApply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i]), 1, which.max) - 2) }
		} 
		if (amplifications & !doubleLoss){
			if (ncpus == 1){ callsMat <- cbind(callsMat, apply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i], probampMat[,i]), 1, which.max) - 2) }
			if (ncpus > 1){ callsMat <- cbind(callsMat, sfApply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i], probampMat[,i]), 1, which.max) - 2) }
		}
		if (!amplifications & doubleLoss){
			if (ncpus == 1){ callsMat <- cbind(callsMat, apply(cbind(probdlossMat[,i], problossMat[,i], probnormMat[,i], probgainMat[,i]), 1, which.max) - 3) }
			if (ncpus > 1){ callsMat <- cbind(callsMat, sfApply(cbind(probdlossMat[,i], problossMat[,i], probnormMat[,i], probgainMat[,i]), 1, which.max) - 3) }
		}
		if (amplifications & doubleLoss){
			if (ncpus == 1){ callsMat <- cbind(callsMat, apply(cbind(probdlossMat[,i], problossMat[,i], probnormMat[,i], probgainMat[,i], probampMat[,i]), 1, which.max) - 3) }
			if (ncpus > 1){ callsMat <- cbind(callsMat, sfApply(cbind(probdlossMat[,i], problossMat[,i], probnormMat[,i], probgainMat[,i], probampMat[,i]), 1, which.max) - 3) }
		}
	}	

	# weighted subsetting of annotation data
	if (verbose){ cat("reorganize annotation ...", "\n") }
	if (ncpus == 1){ annotation <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetAnnotation, CNann, chr, bpstart, bpend, simplify=TRUE)) }
	if (ncpus > 1){	annotation <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetAnnotation, CNann, chr, bpstart, bpend, simplify=TRUE)) }
	colnames(annotation) <- c("Chromosome", "Start", "End")
	if (ncpus == 1){ rownames(annotation) <- as.character(sapply(featuresAndWeights, .slhFuncCombineFeatureNames, rownames(CNann), simplify=TRUE)) }
	if (ncpus > 1){	rownames(annotation) <- as.character(sfSapply(featuresAndWeights, .slhFuncCombineFeatureNames, rownames(CNann), simplify=TRUE)) }
	if (length(rownames(annotation)) != length(unique(rownames(annotation)))){
		warning("modified feature names are returned")
		newRowNames <- rownames(annotation)
		slh <- rep(1, length(newRowNames))
		for (i in 1:length(unique(newRowNames))){
			ids <- which(newRowNames == unique(newRowNames)[i])
			slh[ids] <- 1:length(ids)
		}
		newRowNames <- paste(newRowNames, slh, sep="__")		
		rownames(annotation) <- newRowNames
	}
	fInfo <- new("AnnotatedDataFrame", data=data.frame(annotation), varMetadata=data.frame(labelDescription=c("Chromosome", "Start", "End")))
	if (ncpus > 1){ sfStop() }

	rownames(copynumberMat) <- rownames(annotation)
	rownames(segmentedMat) <- rownames(annotation)
	rownames(problossMat) <- rownames(annotation)
	rownames(probnormMat) <- rownames(annotation)
	rownames(probgainMat) <- rownames(annotation)
	rownames(callsMat) <- rownames(annotation)
	if (amplifications){ rownames(probampMat) <- rownames(annotation) }
	if (doubleLoss){ rownames(probdlossMat) <- rownames(annotation) }

	# make cghCall object
	if (verbose){ cat("merge into cghCall object ...", "\n") }
	if (!amplifications & !doubleLoss){ 
		CNdata <- new('cghCall', 
			copynumber = data.matrix(data.frame(copynumberMat, row.names=rownames(annotation))), 
			segmented = data.matrix(data.frame(segmentedMat, row.names=rownames(annotation))), 
			calls = data.matrix(data.frame(callsMat, row.names=rownames(annotation))), 
			probloss = data.matrix(data.frame(problossMat, row.names=rownames(annotation))), 
			probnorm = data.matrix(data.frame(probnormMat, row.names=rownames(annotation))), 
			probgain = data.matrix(data.frame(probgainMat, row.names=rownames(annotation))), 
			featureData = fInfo) 
	}
	if (amplifications & !doubleLoss){ 
		CNdata <- new('cghCall', 
			copynumber = data.matrix(data.frame(copynumberMat, row.names=rownames(annotation))), 
			segmented = data.matrix(data.frame(segmentedMat, row.names=rownames(annotation))), 
			calls = data.matrix(data.frame(callsMat, row.names=rownames(annotation))), 
			probloss = data.matrix(data.frame(problossMat, row.names=rownames(annotation))), 
			probnorm = data.matrix(data.frame(probnormMat, row.names=rownames(annotation))), 
			probgain = data.matrix(data.frame(probgainMat, row.names=rownames(annotation))), 
			probamp = data.matrix(data.frame(probampMat, row.names=rownames(annotation))), 
			featureData = fInfo) 
	}
	if (!amplifications & doubleLoss){ 
		CNdata <- new('cghCall', 
			copynumber = data.matrix(data.frame(copynumberMat, row.names=rownames(annotation))), 
			segmented = data.matrix(data.frame(segmentedMat, row.names=rownames(annotation))), 
			calls = data.matrix(data.frame(callsMat, row.names=rownames(annotation))), 
			probdloss = data.matrix(data.frame(probdlossMat, row.names=rownames(annotation))), 
			probloss = data.matrix(data.frame(problossMat, row.names=rownames(annotation))), 
			probnorm = data.matrix(data.frame(probnormMat, row.names=rownames(annotation))), 
			probgain = data.matrix(data.frame(probgainMat, row.names=rownames(annotation))), 
			featureData = fInfo) 
	}
	if (amplifications & doubleLoss){ 
		CNdata <- new('cghCall', 
			copynumber = data.matrix(data.frame(copynumberMat, row.names=rownames(annotation))), 
			segmented = data.matrix(data.frame(segmentedMat, row.names=rownames(annotation))), 
			calls = data.matrix(data.frame(callsMat, row.names=rownames(annotation))), 
			probdloss = data.matrix(data.frame(probdlossMat, row.names=rownames(annotation))), 
			probloss = data.matrix(data.frame(problossMat, row.names=rownames(annotation))), 
			probnorm = data.matrix(data.frame(probnormMat, row.names=rownames(annotation))), 
			probgain = data.matrix(data.frame(probgainMat, row.names=rownames(annotation))), 
			probamp = data.matrix(data.frame(probampMat, row.names=rownames(annotation))), 
			featureData = fInfo) 
	}
	return(CNdata)
}




cghCall2maximumSubset <- function(CNdata, featuresAndWeights, chr, bpstart, bpend, ncpus=1, verbose=TRUE){
   	############################################################################
	# function shrinks cghCall-object to a subset of the features
	############################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if (as.character(class(CNdata)) != "cghCall"){ stop("CNdata not of class cghCall.") }
	# if (!(as.character(class(featureSubset))=="numeric" | as.character(class(featureSubset))=="integer" )){ stop("featureSubset of wrong class.") }
	# if (!(as.character(class(weights))=="numeric" | as.character(class(weights))=="integer" )){ stop("weights of wrong class.") }
	# if ( !(length(featureSubset) >= 1) ){ stop("featureSubset contains no row numbers.") }
	# if ( !(min(featureSubset) >= 1 | max(featureSubset) <= nrow(fData(CNdata))) ){ stop("featureSubset contains illegal row numbers.") }
	# if ( length(featureSubset) != length(weights) ){ stop("featureSubset and weights of unequal length.") }

	if (!(as.character(class(chr))=="numeric" | as.character(class(chr))=="integer" )){ stop("chr of wrong class.") }
	if (length(chr) != 1){ stop("chr of wrong length.") }
	if ( !(chr >= 1 | chr <= ncol(fData(CNdata))) ){ stop("chr exceeds number of columns in featureData.") }
	if (!(as.character(class(bpstart))=="numeric" | as.character(class(bpstart))=="integer" )){ stop("bpstart of wrong class.") }
	if (length(bpstart) != 1){ stop("bpstart of wrong length.") }
	if ( !(bpstart >= 1 | bpstart <= ncol(fData(CNdata))) ){ stop("bpstart exceeds number of columns in featureData.") }
	if (!(as.character(class(bpend))=="numeric" | as.character(class(bpend))=="integer" )){ stop("bpend of wrong class.") }
	if (length(bpend) != 1){ stop("bpend of wrong length.") }
	if ( !(bpend >= 1 | chr <= ncol(fData(CNdata))) ){ stop("bpend exceeds number of columns in featureData.") }

	# disecting the cghCall-object
	amplifications <- FALSE
	copynumberMat <- copynumber(CNdata)
	segmentedMat <- segmented(CNdata)
	problossMat <- probloss(CNdata)
	probnormMat <- probnorm(CNdata)
	probgainMat <- probgain(CNdata)
	if (!is.null(probamp(CNdata))){ probampMat <- probamp(CNdata); amplifications <- TRUE }
	CNann <- fData(CNdata)
	rm(CNdata)
	gc()

	# identify maximum deviating features
	if (verbose){ cat("generate subsetted segmented matrix ...", "\n") }
	if (ncpus == 1){ idMat <- t(sapply(featuresAndWeights, .slhFuncMaxCGHcallData, segmentedMat, simplify=TRUE)) }
	if (ncpus > 1){ 
		sfInit(parallel=TRUE, cpus=ncpus)
		sfLibrary(package="sigaR", verbose=FALSE, character.only=TRUE)
		idMat <- t(sfSapply(featuresAndWeights, .slhFuncMaxCGHcallData, segmentedMat, simplify=TRUE)) 
	}

	# subsetting of segmented data
	if (ncpus == 1){ copynumberMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, copynumberMat, simplify=TRUE)) }
	if (ncpus > 1){ copynumberMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, copynumberMat, simplify=TRUE)) }
	if (ncpus == 1){ segmentedMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, segmentedMat, simplify=TRUE)) }
	if (ncpus > 1){ segmentedMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, segmentedMat, simplify=TRUE)) }
	if (ncpus == 1){ problossMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, problossMat, simplify=TRUE)) }
	if (ncpus > 1){ problossMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, problossMat, simplify=TRUE)) }
	if (ncpus == 1){ probnormMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probnormMat, simplify=TRUE)) }
	if (ncpus > 1){ probnormMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probnormMat, simplify=TRUE)) }
	if (ncpus == 1){ probgainMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probgainMat, simplify=TRUE)) }
	if (ncpus > 1){ probgainMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probgainMat, simplify=TRUE)) }
	if (amplifications){ 
		if (ncpus == 1){ probampMat <- t(sapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probampMat, simplify=TRUE)) }
		if (ncpus > 1){ probampMat <- t(sfSapply(1:nrow(idMat), .slhFunctMaxFeatures, featuresAndWeights, idMat, probampMat, simplify=TRUE)) }
	}

	# subsetting of call data
	if (verbose){ cat("generate subsetted call matrices ...", "\n")	}
	callsMat <- numeric()
	for (i in 1:ncol(problossMat)){
		if (!amplifications){
			if (ncpus == 1){ callsMat <- cbind(callsMat, apply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i]), 1, which.max) - 2) }
			if (ncpus > 1){ callsMat <- cbind(callsMat, sfApply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i]), 1, which.max) - 2) }
		} else {
			if (ncpus == 1){ callsMat <- cbind(callsMat, apply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i], probampMat[,i]), 1, which.max) - 2) }
			if (ncpus > 1){ callsMat <- cbind(callsMat, sfApply(cbind(problossMat[,i], probnormMat[,i], probgainMat[,i], probampMat[,i]), 1, which.max) - 2) }
		}
	}	

	# weighted subsetting of annotation data
	if (verbose){ cat("reorganize annotation ...", "\n") }
	if (ncpus == 1){ annotation <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetAnnotation, CNann, chr, bpstart, bpend, simplify=TRUE)) }
	if (ncpus > 1){	annotation <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetAnnotation, CNann, chr, bpstart, bpend, simplify=TRUE)) }
	colnames(annotation) <- c("Chromosome", "Start", "End")
	if (ncpus == 1){ rownames(annotation) <- as.character(sapply(featuresAndWeights, .slhFuncCombineFeatureNames, rownames(CNann), simplify=TRUE)) }
	if (ncpus > 1){	rownames(annotation) <- as.character(sfSapply(featuresAndWeights, .slhFuncCombineFeatureNames, rownames(CNann), simplify=TRUE)) }
	if (length(rownames(annotation)) != length(unique(rownames(annotation)))){
		warning("modified feature names are returned")
		newRowNames <- rownames(annotation)
		slh <- rep(1, length(newRowNames))
		for (i in 1:length(unique(newRowNames))){
			ids <- which(newRowNames == unique(newRowNames)[i])
			slh[ids] <- 1:length(ids)
		}
		newRowNames <- paste(newRowNames, slh, sep="__")		
		rownames(annotation) <- newRowNames
	}
	fInfo <- new("AnnotatedDataFrame", data=data.frame(annotation), varMetadata=data.frame(labelDescription=c("Chromosome", "Start", "End")))
	if (ncpus > 1){ sfStop() }

	rownames(copynumberMat) <- rownames(annotation)
	rownames(segmentedMat) <- rownames(annotation)
	rownames(problossMat) <- rownames(annotation)
	rownames(probnormMat) <- rownames(annotation)
	rownames(probgainMat) <- rownames(annotation)
	rownames(callsMat) <- rownames(annotation)
	if (amplifications){ rownames(probampMat) <- rownames(annotation) }

	# make cghCall object
	if (verbose){ cat("merge into cghCall object ...", "\n") }
	if (!amplifications){ 
		CNdata <- new('cghCall', 
			copynumber = data.matrix(data.frame(copynumberMat, row.names=rownames(annotation))), 
			segmented = data.matrix(data.frame(segmentedMat, row.names=rownames(annotation))), 
			calls = data.matrix(data.frame(callsMat, row.names=rownames(annotation))), 
			probloss = data.matrix(data.frame(problossMat, row.names=rownames(annotation))), 
			probnorm = data.matrix(data.frame(probnormMat, row.names=rownames(annotation))), 
			probgain = data.matrix(data.frame(probgainMat, row.names=rownames(annotation))), 
			featureData = fInfo) 
	} else { 
		CNdata <- new('cghCall', 
			copynumber = data.matrix(data.frame(copynumberMat, row.names=rownames(annotation))), 
			segmented = data.matrix(data.frame(segmentedMat, row.names=rownames(annotation))), 
			calls = data.matrix(data.frame(callsMat, row.names=rownames(annotation))), 
			probloss = data.matrix(data.frame(problossMat, row.names=rownames(annotation))), 
			probnorm = data.matrix(data.frame(probnormMat, row.names=rownames(annotation))), 
			probgain = data.matrix(data.frame(probgainMat, row.names=rownames(annotation))), 
			probamp = data.matrix(data.frame(probampMat, row.names=rownames(annotation))), 
			featureData = fInfo) 
	}
	return(CNdata)
}


cghCall2subset <- function(CNdata, featureSubset, verbose=TRUE){
   	############################################################################
	# function shrinks cghCall-object to a subset of the features
	############################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if (as.character(class(CNdata)) != "cghCall"){ stop("CNdata not of class cghCall.") }
	if (!(as.character(class(featureSubset))=="numeric" | as.character(class(featureSubset))=="integer" )){ stop("featureSubset of wrong class.") }
	if ( !(length(featureSubset) >= 1) ){ stop("featureSubset contains no row numbers.") }
	if ( !(min(featureSubset) >= 1 | max(featureSubset) <= nrow(fData(CNdata))) ){ stop("featureSubset contains illegal row numbers.") }

	# sort out new annotation info
	fd <- fData(CNdata)[featureSubset, , drop=FALSE]
	newRowNames <- rownames(copynumber(CNdata))[featureSubset]
	if (length(newRowNames) != length(unique(newRowNames))){
		warning("modified feature names are returned")
		slh <- rep(1, length(newRowNames))
		for (i in 1:length(unique(newRowNames))){
			ids <- which(newRowNames == unique(newRowNames)[i])
			slh[ids] <- 1:length(ids)
		}
		newRowNames <- paste(newRowNames, slh, sep="__")		
	}
	rownames(fd) <- newRowNames
	# sNames <- sampleNames(CNdata)
	metaData <- data.frame(labelDescription=colnames(fd)) 
	fd <- new("AnnotatedDataFrame", data=data.frame(fd), varMetadata=metaData)

	if (is.null(CGHbase::probdloss(CNdata)[featureSubset, , drop=FALSE])){ 
		if (is.null(probamp(CNdata)[featureSubset, , drop=FALSE])){ 
			CNdata <- new('cghCall', 
				copynumber = data.matrix(data.frame(copynumber(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				segmented = data.matrix(data.frame(segmented(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				calls = data.matrix(data.frame(calls(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probloss = data.matrix(data.frame(probloss(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probnorm = data.matrix(data.frame(probnorm(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probgain = data.matrix(data.frame(probgain(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				featureData = fd) 
		} else { 
			CNdata <- new('cghCall', 
				copynumber = data.matrix(data.frame(copynumber(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				segmented = data.matrix(data.frame(segmented(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				calls = data.matrix(data.frame(calls(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probloss = data.matrix(data.frame(probloss(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probnorm = data.matrix(data.frame(probnorm(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probgain = data.matrix(data.frame(probgain(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probamp = data.matrix(data.frame(probamp(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				featureData = fd) 
		}
	} else {
		if (is.null(probamp(CNdata)[featureSubset, , drop=FALSE])){ 
			CNdata <- new('cghCall', 
				copynumber = data.matrix(data.frame(copynumber(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				segmented = data.matrix(data.frame(segmented(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				calls = data.matrix(data.frame(calls(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probdloss = data.matrix(data.frame(CGHbase::probdloss(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)),  
				probloss = data.matrix(data.frame(probloss(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)),
				probnorm = data.matrix(data.frame(probnorm(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probgain = data.matrix(data.frame(probgain(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				featureData = fd) 
		} else { 
			CNdata <- new('cghCall', 
				copynumber = data.matrix(data.frame(copynumber(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				segmented = data.matrix(data.frame(segmented(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				calls = data.matrix(data.frame(calls(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probdloss = data.matrix(data.frame(CGHbase::probdloss(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)),  
				probloss = data.matrix(data.frame(probloss(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probnorm = data.matrix(data.frame(probnorm(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probgain = data.matrix(data.frame(probgain(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				probamp = data.matrix(data.frame(probamp(CNdata)[featureSubset, , drop=FALSE], row.names=newRowNames)), 
				featureData = fd) 
		}		
	}
	return(CNdata)
}





merge2cghCalls <- function(CNdata1, CNdata2, verbose=TRUE){
   	############################################################################
	# function merges cghCall-objects into one cghCall-object
	############################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if (as.character(class(CNdata1)) != "cghCall"){ stop("CNdata1 not of class cghCall.") }
	if (as.character(class(CNdata2)) != "cghCall"){ stop("CNdata2 not of class cghCall.") }
	if ( dim(CNdata1)[2] != dim(CNdata2)[2] ){ stop("CNdata1 and CNdata2 have an unequal number of samples.") }
	if ( dim(fData(CNdata1))[2] != dim(fData(CNdata2))[2] ){ stop("CNdata1 and CNdata2 have an unequal annotation columns.") }
	amp1 <- is.null(probamp(CNdata1))
	amp2 <- is.null(probamp(CNdata2))
	if (!(amp1 == amp2)){ stop("One of the cghCall-objects has amplification probabilities whereas the other has not.") }

	# prepare new annotation
	annotation <- rbind(fData(CNdata1), fData(CNdata2))
	newRowNames <- c(rownames(fData(CNdata1)), rownames(fData(CNdata2)))
	if (length(newRowNames) != length(unique(newRowNames))){
		warning("modified feature names are returned")
		slh <- rep(1, length(newRowNames))
		for (i in 1:length(unique(newRowNames))){
			ids <- which(newRowNames == unique(newRowNames)[i])
			slh[ids] <- 1:length(ids)
		}
		newRowNames <- paste(newRowNames, slh, sep="__")		
	}
	rownames(annotation) <- newRowNames
	metaData <- data.frame(labelDescription=colnames(annotation)) 
	annotation <- new("AnnotatedDataFrame", data=data.frame(annotation), varMetadata=metaData)

	if (is.null(CGHbase::probdloss(CNdata1))){ 
		if (is.null(probamp(CNdata1))){ 
			CNdata <- new('cghCall', 
				copynumber = data.matrix(data.frame(rbind(copynumber(CNdata1), copynumber(CNdata2)), row.names=newRowNames)), 
				segmented = data.matrix(data.frame(rbind(segmented(CNdata1), segmented(CNdata2)), row.names=newRowNames)), 
				calls = data.matrix(data.frame(rbind(calls(CNdata1), calls(CNdata2)), row.names=newRowNames)), 
				probloss = data.matrix(data.frame(rbind(probloss(CNdata1), probloss(CNdata2)), row.names=newRowNames)), 
				probnorm = data.matrix(data.frame(rbind(probnorm(CNdata1), probnorm(CNdata2)), row.names=newRowNames)), 
				probgain = data.matrix(data.frame(rbind(probgain(CNdata1), probgain(CNdata2)), row.names=newRowNames)), 
				featureData = annotation) 
		} else { 
			CNdata <- new('cghCall', 
				copynumber = data.matrix(data.frame(rbind(copynumber(CNdata1), copynumber(CNdata2)), row.names=newRowNames)), 
				segmented = data.matrix(data.frame(rbind(segmented(CNdata1), segmented(CNdata2)), row.names=newRowNames)), 
				calls = data.matrix(data.frame(rbind(calls(CNdata1), calls(CNdata2)), row.names=newRowNames)), 
				probloss = data.matrix(data.frame(rbind(probloss(CNdata1), probloss(CNdata2)), row.names=newRowNames)), 
				probnorm = data.matrix(data.frame(rbind(probnorm(CNdata1), probnorm(CNdata2)), row.names=newRowNames)), 
				probgain = data.matrix(data.frame(rbind(probgain(CNdata1), probgain(CNdata2)), row.names=newRowNames)), 
				probamp = data.matrix(data.frame(rbind(probamp(CNdata1), probamp(CNdata2)), row.names=newRowNames)), 
				featureData = annotation) 
		}
	} else {
		if (is.null(probamp(CNdata1))){ 
			CNdata <- new('cghCall', 
				copynumber = data.matrix(data.frame(rbind(copynumber(CNdata1), copynumber(CNdata2)), row.names=newRowNames)), 
				segmented = data.matrix(data.frame(rbind(segmented(CNdata1), segmented(CNdata2)), row.names=newRowNames)), 
				calls = data.matrix(data.frame(rbind(calls(CNdata1), calls(CNdata2)), row.names=newRowNames)), 
				probdloss = data.matrix(data.frame(rbind(CGHbase::probdloss(CNdata1), probloss(CNdata2)), row.names=newRowNames)), 
				probloss = data.matrix(data.frame(rbind(probloss(CNdata1), probloss(CNdata2)), row.names=newRowNames)), 
				probnorm = data.matrix(data.frame(rbind(probnorm(CNdata1), probnorm(CNdata2)), row.names=newRowNames)), 
				probgain = data.matrix(data.frame(rbind(probgain(CNdata1), probgain(CNdata2)), row.names=newRowNames)), 
				featureData = annotation) 
		} else { 
			CNdata <- new('cghCall', 
				copynumber = data.matrix(data.frame(rbind(copynumber(CNdata1), copynumber(CNdata2)), row.names=newRowNames)), 
				segmented = data.matrix(data.frame(rbind(segmented(CNdata1), segmented(CNdata2)), row.names=newRowNames)), 
				calls = data.matrix(data.frame(rbind(calls(CNdata1), calls(CNdata2)), row.names=newRowNames)), 
				probdloss = data.matrix(data.frame(rbind(CGHbase::probdloss(CNdata1), probloss(CNdata2)), row.names=newRowNames)), 
				probloss = data.matrix(data.frame(rbind(probloss(CNdata1), probloss(CNdata2)), row.names=newRowNames)), 
				probnorm = data.matrix(data.frame(rbind(probnorm(CNdata1), probnorm(CNdata2)), row.names=newRowNames)), 
				probgain = data.matrix(data.frame(rbind(probgain(CNdata1), probgain(CNdata2)), row.names=newRowNames)), 
				probamp = data.matrix(data.frame(rbind(probamp(CNdata1), probamp(CNdata2)), row.names=newRowNames)), 
				featureData = annotation) 
		}
	}

	return(CNdata)
}




cghCall2order <- function(CNdata, chr, bpstart, verbose=TRUE){
   	###########################################################################################
	# function that orders cghCall-object genomically
	###########################################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if (as.character(class(CNdata)) != "cghCall"){ stop("CNdata not of class cghCall.") }
	if (!(as.character(class(chr))=="numeric" | as.character(class(chr))=="integer" )){ stop("chr of wrong class.") }
	if (length(chr) != 1){ stop("chr of wrong length.") }
	if ( !(chr >= 1 | chr <= ncol(fData(CNdata))) ){ stop("chr exceeds number of columns in featureData.") }
	if (!(as.character(class(bpstart))=="numeric" | as.character(class(bpstart))=="integer" )){ stop("bpstart of wrong class.") }
	if (length(bpstart) != 1){ stop("bpstart of wrong length.") }
	if ( !(bpstart >= 1 | bpstart <= ncol(fData(CNdata))) ){ stop("GEbpstart exceeds number of columns in featureData.") }

	# extract annotation data for ordering
	CNann <- fData(CNdata)[, c(chr, bpstart), drop=FALSE]

	# in case columns are of wrong class ....
	for (i in 1:2){
		if (is.factor(CNann[,i])){ CNann[,i] <- as.numeric(levels(CNann[,i]))[CNann[,i]] }
		if (is.character(CNann[,i])){ CNann[,i] <- as.numeric(CNann[,i]) }
	}

	# order ExpressionSet-object genomically	
	fData(CNdata) <- fData(CNdata)[order(CNann[,1], CNann[,2]),]
	copynumber(CNdata) <- copynumber(CNdata)[order(CNann[,1], CNann[,2]),]
	segmented(CNdata) <- segmented(CNdata)[order(CNann[,1], CNann[,2]),]
	calls(CNdata) <- calls(CNdata)[order(CNann[,1], CNann[,2]),]
	probloss(CNdata) <- probloss(CNdata)[order(CNann[,1], CNann[,2]),]
	probnorm(CNdata) <- probnorm(CNdata)[order(CNann[,1], CNann[,2]),]
	probgain(CNdata) <- probgain(CNdata)[order(CNann[,1], CNann[,2]),]
	if (is.null(probamp(CNdata))){ } else { probamp(CNdata) <- probamp(CNdata)[order(CNann[,1], CNann[,2]),] }
	if (is.null(CGHbase::probdloss(CNdata))){ } else { CGHbase::probdloss(CNdata) <- CGHbase::probdloss(CNdata)[order(CNann[,1], CNann[,2]),] }
	return(CNdata)
}



cghSeg2order <- function(CNdata, chr, bpstart, verbose=TRUE){
   	###########################################################################################
	# function that orders cghCall-object genomically
	###########################################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if (as.character(class(CNdata)) != "cghSeg"){ stop("CNdata not of class cghSeg.") }
	if (!(as.character(class(chr))=="numeric" | as.character(class(chr))=="integer" )){ stop("chr of wrong class.") }
	if (length(chr) != 1){ stop("chr of wrong length.") }
	if ( !(chr >= 1 | chr <= ncol(fData(CNdata))) ){ stop("chr exceeds number of columns in featureData.") }
	if (!(as.character(class(bpstart))=="numeric" | as.character(class(bpstart))=="integer" )){ stop("bpstart of wrong class.") }
	if (length(bpstart) != 1){ stop("bpstart of wrong length.") }
	if ( !(bpstart >= 1 | bpstart <= ncol(fData(CNdata))) ){ stop("bpstart exceeds number of columns in featureData.") }

	# extract annotation data for ordering
	CNann <- fData(CNdata)[, c(chr, bpstart), drop=FALSE]

	# in case columns are of wrong class ....
	for (i in 1:2){
		if (is.factor(CNann[,i])){ CNann[,i] <- as.numeric(levels(CNann[,i]))[CNann[,i]] }
		if (is.character(CNann[,i])){ CNann[,i] <- as.numeric(CNann[,i]) }
	}

	# order ExpressionSet-object genomically	
	fData(CNdata) <- fData(CNdata)[order(CNann[,1], CNann[,2]),]
	copynumber(CNdata) <- copynumber(CNdata)[order(CNann[,1], CNann[,2]),]
	segmented(CNdata) <- segmented(CNdata)[order(CNann[,1], CNann[,2]),]
	return(CNdata)
}



cghCall2cghSeg <- function (CNdata, verbose = TRUE){
    if (verbose) {
        cat("perform input checks...", "\n")
    }
    if (as.character(class(CNdata)) != "cghCall") {
        stop("CNdata not of class cghCall.")
    }
   

    fd <- fData(CNdata)
    newRowNames <- rownames(copynumber(CNdata))
    if (length(newRowNames) != length(unique(newRowNames))) {
        warning("modified feature names are returned")
        slh <- rep(1, length(newRowNames))
        for (i in 1:length(unique(newRowNames))) {
            ids <- which(newRowNames == unique(newRowNames)[i])
            slh[ids] <- 1:length(ids)
        }
        newRowNames <- paste(newRowNames, slh, sep = "__")
    }
    rownames(fd) <- newRowNames
    metaData <- data.frame(labelDescription = colnames(fd))
    fd <- new("AnnotatedDataFrame", data = data.frame(fd), varMetadata = metaData)
    CNdata <- new("cghSeg", copynumber = data.matrix(data.frame(copynumber(CNdata), row.names = newRowNames)), 
		segmented = data.matrix(data.frame(segmented(CNdata), row.names = newRowNames)), featureData = fd)

    return(CNdata)
}



cghSeg2subset <- function (CNdata, featureSubset, verbose = TRUE){
    if (verbose) {
        cat("perform input checks...", "\n")
    }
    if (as.character(class(CNdata)) != "cghSeg") {
        stop("CNdata not of class cghSeg.")
    }
    if (!(as.character(class(featureSubset)) == "numeric" | as.character(class(featureSubset)) == 
        "integer")) {
        stop("featureSubset of wrong class.")
    }
    if (!(length(featureSubset) >= 1)) {
        stop("featureSubset contains no row numbers.")
    }
    if (!(min(featureSubset) >= 1 | max(featureSubset) <= nrow(fData(CNdata)))) {
        stop("featureSubset contains illegal row numbers.")
    }
    fd <- fData(CNdata)[featureSubset, , drop = FALSE]
    newRowNames <- rownames(copynumber(CNdata))[featureSubset]
    if (length(newRowNames) != length(unique(newRowNames))) {
        warning("modified feature names are returned")
        slh <- rep(1, length(newRowNames))
        for (i in 1:length(unique(newRowNames))) {
            ids <- which(newRowNames == unique(newRowNames)[i])
            slh[ids] <- 1:length(ids)
        }
        newRowNames <- paste(newRowNames, slh, sep = "__")
    }
    rownames(fd) <- newRowNames
    metaData <- data.frame(labelDescription = colnames(fd))
    fd <- new("AnnotatedDataFrame", data = data.frame(fd), varMetadata = metaData)
    CNdata <- new("cghSeg", copynumber = data.matrix(data.frame(copynumber(CNdata)[featureSubset, 
            , drop = FALSE], row.names = newRowNames)), segmented = data.matrix(data.frame(segmented(CNdata)[featureSubset, 
            , drop = FALSE], row.names = newRowNames)), featureData = fd)

    return(CNdata)
}




cghSeg2weightedSubset <- function (CNdata, featuresAndWeights, chr, bpstart, bpend, ncpus = 1, verbose = TRUE){
    if (verbose) {
        cat("perform input checks...", "\n")
    }
    if (as.character(class(CNdata)) != "cghSeg") {
        stop("CNdata not of class cghSeg.")
    }
    if (as.character(class(CNdata)) != "cghSeg") {
        stop("CNdata not of class cghSeg.")
    }
    if (!(as.character(class(chr)) == "numeric" | as.character(class(chr)) == 
        "integer")) {
        stop("chr of wrong class.")
    }
    if (length(chr) != 1) {
        stop("chr of wrong length.")
    }
    if (!(chr >= 1 | chr <= ncol(fData(CNdata)))) {
        stop("chr exceeds number of columns in featureData.")
    }
    if (!(as.character(class(bpstart)) == "numeric" | as.character(class(bpstart)) == 
        "integer")) {
        stop("bpstart of wrong class.")
    }
    if (length(bpstart) != 1) {
        stop("bpstart of wrong length.")
    }
    if (!(bpstart >= 1 | bpstart <= ncol(fData(CNdata)))) {
        stop("bpstart exceeds number of columns in featureData.")
    }
    if (!(as.character(class(bpend)) == "numeric" | as.character(class(bpend)) == 
        "integer")) {
        stop("bpend of wrong class.")
    }
    if (length(bpend) != 1) {
        stop("bpend of wrong length.")
    }
    if (!(bpend >= 1 | chr <= ncol(fData(CNdata)))) {
        stop("bpend exceeds number of columns in featureData.")
    }
    copynumberMat <- copynumber(CNdata)
    segmentedMat <- segmented(CNdata)
    CNann <- fData(CNdata)
    rm(CNdata)
    gc()
    if (verbose) {
        cat("generate subsetted raw copy number matrix ...", 
            "\n")
    }
    if (ncpus == 1) {
        copynumberMat <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, 
            copynumberMat, simplify = TRUE))
    }
    if (ncpus > 1) {
        sfInit(parallel = TRUE, cpus = ncpus)
        sfLibrary(package="sigaR", verbose = FALSE, character.only=TRUE)
        copynumberMat <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, 
            copynumberMat, simplify = TRUE))
    }
    if (verbose) {
        cat("generate subsetted segmented matrix ...", "\n")
    }
    if (ncpus == 1) {
        segmentedMat <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, 
            segmentedMat, simplify = TRUE))
    }
    if (ncpus > 1) {
        segmentedMat <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, 
            segmentedMat, simplify = TRUE))
    }
    if (verbose) {
        cat("generate subsetted call matrices ...", "\n")
    }
    if (verbose) {
        cat("reorganize annotation ...", "\n")
    }
    if (ncpus == 1) {
        annotation <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetAnnotation, 
            CNann, chr, bpstart, bpend, simplify = TRUE))
    }
    if (ncpus > 1) {
        annotation <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetAnnotation, 
            CNann, chr, bpstart, bpend, simplify = TRUE))
    }
    colnames(annotation) <- c("Chromosome", "Start", "End")
    if (ncpus == 1) {
        rownames(annotation) <- as.character(sapply(featuresAndWeights, 
            .slhFuncCombineFeatureNames, rownames(CNann), simplify = TRUE))
    }
    if (ncpus > 1) {
        rownames(annotation) <- as.character(sfSapply(featuresAndWeights, 
            .slhFuncCombineFeatureNames, rownames(CNann), simplify = TRUE))
    }
    if (length(rownames(annotation)) != length(unique(rownames(annotation)))) {
        warning("modified feature names are returned")
        newRowNames <- rownames(annotation)
        slh <- rep(1, length(newRowNames))
        for (i in 1:length(unique(newRowNames))) {
            ids <- which(newRowNames == unique(newRowNames)[i])
            slh[ids] <- 1:length(ids)
        }
        newRowNames <- paste(newRowNames, slh, sep = "__")
        rownames(annotation) <- newRowNames
    }
    fInfo <- new("AnnotatedDataFrame", data = data.frame(annotation), 
        varMetadata = data.frame(labelDescription = c("Chromosome", 
            "Start", "End")))
    if (ncpus > 1) {
        sfStop()
    }
    rownames(copynumberMat) <- rownames(annotation)
    rownames(segmentedMat) <- rownames(annotation)
    if (verbose) {
        cat("merge into cghSeg object ...", "\n")
    }
    CNdata <- new("cghSeg", copynumber = data.matrix(data.frame(copynumberMat, 
		row.names = rownames(annotation))), segmented = data.matrix(data.frame(segmentedMat, 
		row.names = rownames(annotation))), featureData = fInfo)
    return(CNdata)
}





splitMatchingAtBreakpoints <- function(matchedIDs, CNdata){
   	############################################################################
	# function splits matching list-object at breakpoints
	############################################################################

	nSamplesWithBreakpoint <- function(X, CNdata){ 
		sum(apply(segmented(CNdata)[X[,2], , drop=FALSE], 2, function(Z){ length(unique(Z)) }) > 1) 
	}

	matrixRows2listwithReg <- function(X, regInfo){
		subList <- sapply(1:nrow(regInfo), function(u, Z, regInfo){ Z[regInfo[u,1]:regInfo[u,2], , drop=FALSE] }, Z=X, regInfo, simplify=FALSE)
		names(subList) <- rep(X[1, 1], nrow(X))
		return(subList)
	}
	
	makeRegions <- function(CNsegData){
		# determine regions
		splitter <- list()
		splitter[[1]] <- c(1)
		index.temp <- 1
		j <- 1
		for (i in 1:(dim(CNsegData)[1]-1)){
			if (all(CNsegData[i,] == CNsegData[i+1,])){
    				index.temp <- c(index.temp,i+1)
	           		splitter[[j]] <- index.temp
			} else {
				index.temp <- i+1
	     			j <- j + 1
      	     		splitter[[j]] <- index.temp
			}
		}
		regDetails <- NULL
		for (i in 1:length(splitter)){
			regDetails <- rbind(regDetails, c(min(splitter[[i]]),max(splitter[[i]])))
		}
		return(regDetails)
	}

	expandMatches <- function(id, matchedIDs, CNdata){
		nEntries <- length(matchedIDs)
		mFeatures <- nrow(matchedIDs[[id]])
		regionsWithinGene <- makeRegions(segmented(CNdata)[as.numeric(matchedIDs[[id]][,2]),,drop=FALSE])
		mIDsInsert <- matrixRows2listwithReg(matchedIDs[[id]], regionsWithinGene)
		if (id < nEntries){ mIDsEnd <- matchedIDs[(id+1):length(matchedIDs)] } else { mIDsEnd <- NULL }
		if (id > 1){ mIDsBegin <- matchedIDs[1:(id-1)] } else { mIDsBegin <- NULL }
		if (id > 1 & id < nEntries){ 
			matchedIDs <- mIDsBegin
			matchedIDs[(length(matchedIDs)+1):(length(matchedIDs)+mFeatures)] <- mIDsInsert
			names(matchedIDs)[(length(matchedIDs)-mFeatures+1):length(matchedIDs)] <- names(mIDsInsert)
			matchedIDs[(length(matchedIDs)+1):(length(matchedIDs)+length(mIDsEnd))] <- mIDsEnd
			names(matchedIDs)[(length(matchedIDs)-length(mIDsEnd)+1):length(matchedIDs)] <- names(mIDsEnd)
		}
		if (id == 1 & id < nEntries){ 
			matchedIDs <- mIDsInsert
			matchedIDs[(length(matchedIDs)+1):(length(matchedIDs)+length(mIDsEnd))] <- mIDsEnd
			names(matchedIDs)[(length(matchedIDs)-length(mIDsEnd)+1):length(matchedIDs)] <- names(mIDsEnd)
		}
		if (id > 1 & id == nEntries){ 
			matchedIDs <- mIDsBegin
			matchedIDs[(length(matchedIDs)+1):(length(matchedIDs)+mFeatures)] <- mIDsInsert
			names(matchedIDs)[(length(matchedIDs)-mFeatures+1):length(matchedIDs)] <- names(mIDsInsert)
		}
		return(matchedIDs)
	}

	# determine matches with breakpoints
	IDs <- sort(which(sapply(matchedIDs, nSamplesWithBreakpoint, CNdata, simplify=TRUE) > 1), decreasing=TRUE)

	# split at breakpoints
	if (length(IDs) > 0){
		for (j in 1:length(IDs)){
			matchedIDs <- expandMatches(IDs[j], matchedIDs, CNdata)
		}
	}
	return(matchedIDs)
}






merge2ExpressionSets <- function(GEdata1, GEdata2, verbose=TRUE){
   	############################################################################
	# function merges ExpressionSet-objects into one ExpressionSet-object
	############################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if ( !(as.character(class(GEdata1)) == "ExpressionSet" | as.character(class(GEdata1)) == "eSet") ){ stop("GEdata1 not of class ExpressionSet.") }
	if ( !(as.character(class(GEdata2)) == "ExpressionSet" | as.character(class(GEdata2)) == "eSet") ){ stop("GEdata2 not of class ExpressionSet.") }
	if ( dim(GEdata1)[2] != dim(GEdata2)[2] ){ stop("GEdata1 and GEdata2 have an unequal number of samples.") }
	if ( dim(fData(GEdata1))[2] != dim(fData(GEdata2))[2] ){ stop("GEdata1 and GEdata2 have an unequal annotation columns.") }

	# prepare new annotation
	annotation <- rbind(fData(GEdata1), fData(GEdata2))
	newRowNames <- c(rownames(fData(GEdata1)), rownames(fData(GEdata2)))
	if (length(newRowNames) != length(unique(newRowNames))){
		warning("modified feature names are returned")
		slh <- rep(1, length(newRowNames))
		for (i in 1:length(unique(newRowNames))){
			ids <- which(newRowNames == unique(newRowNames)[i])
			slh[ids] <- 1:length(ids)
		}
		newRowNames <- paste(newRowNames, slh, sep="__")		
	}
	rownames(annotation) <- newRowNames
	metaData <- data.frame(labelDescription=colnames(annotation)) 
	probeinfo <- new("AnnotatedDataFrame", data=data.frame(annotation), varMetadata=metaData)
	expMat <- rbind(exprs(GEdata1), exprs(GEdata2))
	rownames(expMat) <- newRowNames
	return(new("ExpressionSet", exprs=expMat, featureData = probeinfo))
}





nBreakpoints <- function(featuresAndWeights, CNdata){
   	############################################################################
	# function calculates the number of samples with at least one breakpoint 
	# within the transcript to which the DNA copy number data have been matched
	############################################################################

	nSamplesWithBreakpoint <- function(X, CNdata){ 
		sum(apply(segmented(CNdata)[X[,1], , drop=FALSE], 2, function(Z){ length(unique(Z)) }) > 1) 
	}
	return(sapply(featuresAndWeights, nSamplesWithBreakpoint, CNdata, simplify=TRUE))
}


expandMatching2singleIDs <- function(matchedIDs){
   	############################################################################
	# function unfolds matching list-object 
	############################################################################

	expandMatches <- function(id, matchedIDs){
		nEntries <- length(matchedIDs)
		mFeatures <- nrow(matchedIDs[[id]])
		mIDsInsert <- .matrixRows2list(matchedIDs[[id]])
		if (id < nEntries){ mIDsEnd <- matchedIDs[(id+1):length(matchedIDs)] } else { mIDsEnd <- NULL }
		if (id > 1){ mIDsBegin <- matchedIDs[1:(id-1)] } else { mIDsBegin <- NULL }
		if (id > 1 & id < nEntries){ 
			matchedIDs <- mIDsBegin
			matchedIDs[(length(matchedIDs)+1):(length(matchedIDs)+mFeatures)] <- mIDsInsert
			names(matchedIDs)[(length(matchedIDs)-mFeatures+1):length(matchedIDs)] <- names(mIDsInsert)
			matchedIDs[(length(matchedIDs)+1):(length(matchedIDs)+length(mIDsEnd))] <- mIDsEnd
			names(matchedIDs)[(length(matchedIDs)-length(mIDsEnd)+1):length(matchedIDs)] <- names(mIDsEnd)
		}
		if (id == 1 & id < nEntries){ 
			matchedIDs <- mIDsInsert
			matchedIDs[(length(matchedIDs)+1):(length(matchedIDs)+length(mIDsEnd))] <- mIDsEnd
			names(matchedIDs)[(length(matchedIDs)-length(mIDsEnd)+1):length(matchedIDs)] <- names(mIDsEnd)
		}
		if (id > 1 & id == nEntries){ 
			matchedIDs <- mIDsBegin
			matchedIDs[(length(matchedIDs)+1):(length(matchedIDs)+mFeatures)] <- mIDsInsert
			names(matchedIDs)[(length(matchedIDs)-mFeatures+1):length(matchedIDs)] <- names(mIDsInsert)
		}
		return(matchedIDs)
	}

	# actual expansion
	IDs <- sort(as.numeric(which(sapply(matchedIDs, function(X){ nrow(X) }, simplify=TRUE) > 1)), decreasing=TRUE)
	if (length(IDs) > 1){
		for (j in 1:length(IDs)){
			matchedIDs <- expandMatches(IDs[j], matchedIDs)
		}
	}
	return(matchedIDs)
}





ExpressionSet2weightedSubset <- function(GEdata, featuresAndWeights, chr, bpstart, bpend, ncpus=1, verbose=TRUE){
   	############################################################################
	# function shrinks ExpressionSet-object to a weighted subset of the features
	############################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if ( !(as.character(class(GEdata)) == "ExpressionSet" | as.character(class(GEdata)) == "eSet") ){ stop("GEdata not of class ExpressionSet.") }
	if (!(as.character(class(featuresAndWeights))=="list")){ stop("featuresAndWeights of wrong class.") }

	if (!(as.character(class(chr))=="numeric" | as.character(class(chr))=="integer" )){ stop("chr of wrong class.") }
	if (length(chr) != 1){ stop("chr of wrong length.") }
	if ( !(chr >= 1 | chr <= ncol(fData(GEdata))) ){ stop("chr exceeds number of columns in featureData.") }
	if (!(as.character(class(bpstart))=="numeric" | as.character(class(bpstart))=="integer" )){ stop("bpstart of wrong class.") }
	if (length(bpstart) != 1){ stop("bpstart of wrong length.") }
	if ( !(bpstart >= 1 | bpstart <= ncol(fData(GEdata))) ){ stop("bpstart exceeds number of columns in featureData.") }
	if (!(as.character(class(bpend))=="numeric" | as.character(class(bpend))=="integer" )){ stop("bpend of wrong class.") }
	if (length(bpend) != 1){ stop("bpend of wrong length.") }
	if ( !(bpend >= 1 | chr <= ncol(fData(GEdata))) ){ stop("bpend exceeds number of columns in featureData.") }

	# disecting the ExpressionSet-object
	exprMat <- exprs(GEdata)
	GEann <- fData(GEdata)
	rm(GEdata)
	gc()

	# weighted subsetting of expression data
	if (verbose){ cat("generate matched expression matrix ...", "\n") }
	if (ncpus == 1){ exprMat <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, exprMat, simplify=TRUE)) }
	if (ncpus > 1){ 
		sfInit(parallel=TRUE, cpus=ncpus)
		sfLibrary(package="sigaR", verbose=FALSE, character.only=TRUE)
		exprMat <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetData, exprMat, simplify=TRUE))
	}

	# weighted subsetting of annotation data
	if (verbose){ cat("reorganize annotation ...", "\n") }
	if (ncpus == 1){ annotation <- t(sapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetAnnotation, GEann, chr, bpstart, bpend, simplify=TRUE)) }
	if (ncpus > 1){ annotation <- t(sfSapply(featuresAndWeights, .slhFuncWeightedCGHcallExpressionSetAnnotation, GEann, chr, bpstart, bpend, simplify=TRUE)) }
	colnames(annotation) <- c("Chromosome", "Start", "End")
	if (ncpus == 1){ rownames(annotation) <- as.character(sapply(featuresAndWeights, .slhFuncCombineFeatureNames, rownames(GEann), simplify=TRUE)) }
	if (ncpus > 1){ rownames(annotation) <- as.character(sfSapply(featuresAndWeights, .slhFuncCombineFeatureNames, rownames(GEann), simplify=TRUE)) }
	if (length(rownames(annotation)) != length(unique(rownames(annotation)))){
		warning("modified feature names are returned")
		newRowNames <- rownames(annotation)
		slh <- rep(1, length(newRowNames))
		for (i in 1:length(unique(newRowNames))){
			ids <- which(newRowNames == unique(newRowNames)[i])
			slh[ids] <- 1:length(ids)
		}
		newRowNames <- paste(newRowNames, slh, sep="__")		
		rownames(annotation) <- newRowNames
	}
	fInfo <- new("AnnotatedDataFrame", data=data.frame(annotation), varMetadata=data.frame(labelDescription=c("Chromosome", "Start", "End")))
	if (ncpus > 1){ sfStop() }
	rownames(exprMat) <- rownames(annotation)

	# make ExpressionSet object
	if (verbose){ cat("merge into ExpressionSet object ...", "\n") }
	return(new("ExpressionSet", exprs=exprMat, featureData=fInfo))
}






matchCGHcall2ExpressionSet <- function(CNdata, GEdata, CNchr, CNbpstart, CNbpend, GEchr, GEbpstart, GEbpend, method="distance", reference=1, ncpus=1, verbose=TRUE){
   	###########################################################################################
	# function that matches features from cghCall and ExpressionSet-objects on genomic location
	###########################################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if (as.character(class(CNdata)) != "cghCall"){ stop("CNdata not of class cghCall.") }
	if ( !(as.character(class(GEdata)) == "ExpressionSet" | as.character(class(GEdata)) == "eSet") ){ stop("GEdata not of class ExpressionSet.") }
	if ( !(method %in% c("distance", "overlap", "overlapPlus")) ){ stop("method wrongly specified.") }

	if (!(as.character(class(GEchr))=="numeric" | as.character(class(GEchr))=="integer" )){ stop("GEchr of wrong class.") }
	if (length(GEchr) != 1){ stop("GEchr of wrong length.") }
	if ( !(GEchr >= 1 | GEchr <= ncol(fData(GEdata))) ){ stop("GEchr exceeds number of columns in featureData.") }

	if (!(as.character(class(GEbpstart))=="numeric" | as.character(class(GEbpstart))=="integer" )){ stop("GEbpstart of wrong class.") }
	if (length(GEbpstart) != 1){ stop("GEbpstart of wrong length.") }
	if ( !(GEbpstart >= 1 | GEbpstart <= ncol(fData(GEdata))) ){ stop("GEbpstart exceeds number of columns in featureData.") }

	if (!(as.character(class(GEbpend))=="numeric" | as.character(class(GEbpend))=="integer" )){ stop("GEbpend of wrong class.") }
	if (length(GEbpend) != 1){ stop("GEbpend of wrong length.") }
	if ( !(GEbpend >= 1 | GEbpend <= ncol(fData(GEdata))) ){ stop("GEbpend exceeds number of columns in featureData.") }

	if (!(as.character(class(CNchr))=="numeric" | as.character(class(CNchr))=="integer" )){ stop("CNchr of wrong class.") }
	if (length(CNchr) != 1){ stop("CNchr of wrong length.") }
	if ( !(CNchr >= 1 | CNchr <= ncol(fData(CNdata))) ){ stop("CNchr exceeds number of columns in featureData.") }

	if (!(as.character(class(CNbpstart))=="numeric" | as.character(class(CNbpstart))=="integer" )){ stop("CNbpstart of wrong class.") }
	if (length(CNbpstart) != 1){ stop("CNbpstart of wrong length.") }
	if ( !(CNbpstart >= 1 | CNbpstart <= ncol(fData(CNdata))) ){ stop("GEbpstart exceeds number of columns in featureData.") }

	if (!(as.character(class(CNbpend))=="numeric" | as.character(class(CNbpend))=="integer" )){ stop("CNbpend of wrong class.") }
	if (length(CNbpend) != 1){ stop("CNbpend of wrong length.") }
	if ( !(CNbpend >= 1 | CNbpend <= ncol(fData(CNdata))) ){ stop("CNbpend exceeds number of columns in featureData.") }

	# extract annotation info
	CNann <- cbind(fData(CNdata)[, c(CNchr, CNbpstart, CNbpend), drop=FALSE], c(1:nrow(fData(CNdata))))
	GEann <- cbind(fData(GEdata)[, c(GEchr, GEbpstart, GEbpend), drop=FALSE], c(1:nrow(fData(GEdata))))

	# in case columns are of wrong class ....
	for (i in 1:4){
		if (is.factor(CNann[,i])){ CNann[,i] <- as.numeric(levels(CNann[,i]))[CNann[,i]] }
		if (is.factor(GEann[,i])){ GEann[,i] <- as.numeric(levels(GEann[,i]))[GEann[,i]] }			
		if (is.character(CNann[,i])){ CNann[,i] <- as.numeric(CNann[,i]) }
		if (is.character(GEann[,i])){ GEann[,i] <- as.numeric(GEann[,i]) }			
	}

	# ensure data from gene expression platform is genomicly ordered.
	if (verbose){ cat("order both data sets genomically...", "\n") }
	GEdata <- ExpressionSet2order(GEdata, GEchr, GEbpstart, verbose=verbose)
	GEann <- GEann[order(GEann[,1], GEann[,2]), ]

	# ensure data from copy number platform is genomicly ordered.	
	CNdata <- cghCall2order(CNdata, CNchr, CNbpstart, verbose=verbose)
	CNann <- CNann[order(CNann[,1], CNann[,2]), ]

	# filter genes with complete annotation
	if (verbose){ cat("select probes with complete annotation...", "\n") }
	GEfeaturesKept <- which(apply(GEann, 1, function(x){ all(is.na(x) == FALSE) }))
	if (length(GEfeaturesKept) < 1){ stop("no annotation info for GE-array features provided") }
	GEann <- GEann[GEfeaturesKept, , drop=FALSE]
	CNfeaturesKept <- which(apply(CNann, 1, function(x){ all(is.na(x) == FALSE) }))
	if (length(CNfeaturesKept) < 1){ stop("no annotation info for CN-array features provided") }
	CNann <- CNann[CNfeaturesKept, , drop=FALSE]

	# chromosomes with genes on the expression array
	if (verbose){ cat("select probes on chromosomes present in both data sets...", "\n") }
	GEchrs <- sort(unique(GEann[,1]))
	CNfeaturesKept.2 <- which(CNann[,1] %in% GEchrs)
	CNann <- CNann[CNfeaturesKept.2, , drop=FALSE]
	CNfeaturesKept <- CNfeaturesKept[CNfeaturesKept.2]

	# chromosomes with features on the CN array
	CNchrs <- sort(unique(CNann[,1]))
	GEfeaturesKept.2 <- which(GEann[,1] %in% CNchrs)
	GEann <- GEann[GEfeaturesKept.2, , drop=FALSE]
	GEfeaturesKept <- GEfeaturesKept[GEfeaturesKept.2]
	
	# match by percentage of overlap
	if (verbose){ cat("start actual matching procedure...", "\n") }
	if (ncpus > 1){
		sfInit(parallel=TRUE, cpus=ncpus)
		sfLibrary(package="sigaR", verbose=FALSE, character.only=TRUE)
	}
	if (method == "overlap"){
		# find features with biggest overlap and remove features without any overlap
		if (ncpus == 1){ CNGEmatched.features <- apply(GEann[,-4], 1, .overlapPercentage_CGHcall2ExpressionSet, CNann[,-4], reference) }
		if (ncpus > 1){ CNGEmatched.features <- sfApply(GEann[,-4], 1, .overlapPercentage_CGHcall2ExpressionSet, CNann[,-4], reference) }
		no.match.found <- (is.na(CNGEmatched.features) == FALSE)
		GEfeaturesKept <- GEfeaturesKept[no.match.found]		
		CNGEmatched.features <- CNGEmatched.features[no.match.found]
		CNfeaturesKept <- CNfeaturesKept[CNGEmatched.features]
	}

	# match by percentage of overlap with some interpolation
	if (method == "overlapPlus"){
			# find features with biggest overlap and remove features without any overlap
			if (ncpus == 1){ CNGEmatched.features <- apply(GEann[,-4], 1, .overlapPlusInterpolation_CGHcall2ExpressionSet, CNann[,-4], segmented(CNdata)[CNfeaturesKept, , drop=FALSE], reference) }
			if (ncpus > 1){ CNGEmatched.features <- sfApply(GEann[,-4], 1, .overlapPlusInterpolation_CGHcall2ExpressionSet, CNann[,-4], segmented(CNdata)[CNfeaturesKept, , drop=FALSE], reference) }
			no.match.found <- (is.na(CNGEmatched.features) == FALSE)
			GEfeaturesKept <- GEfeaturesKept[no.match.found]		
			CNGEmatched.features <- CNGEmatched.features[no.match.found]
			CNfeaturesKept <- CNfeaturesKept[CNGEmatched.features]
	}

	# match by distance of mid base pair
	if (method == "distance"){
		# determine mid base pair
		CNannTemp <- cbind(CNann[,1], apply(CNann[,2:3,drop=FALSE], 1, mean, na.rm=TRUE))
		GEannTemp <- cbind(GEann[,1], apply(GEann[,2:3,drop=FALSE], 1, mean, na.rm=TRUE))
		
		# per chromosome find CN-features closed to GE-features
		if (ncpus == 1){ CNGEmatched.features <- apply(GEannTemp, 1, .minDistCghFeature_CGHcall2ExpressionSet, CNannTemp) }
		if (ncpus > 1){ CNGEmatched.features <- sfApply(GEannTemp, 1, .minDistCghFeature_CGHcall2ExpressionSet, CNannTemp) }
		CNfeaturesKept <- CNfeaturesKept[CNGEmatched.features]
	}
	if (ncpus > 1){ sfStop() }

	# in the unlikely event that some genes could not be matched: remove
	if (sum(is.na(CNfeaturesKept)) > 0){
		GEfeaturesKept <- GEfeaturesKept[is.na(CNfeaturesKept) == FALSE]		
		CNfeaturesKept <- CNfeaturesKept[is.na(CNfeaturesKept) == FALSE]
	}

	# in the unlikely event that nothing could be matched: stop
	if ((length(CNfeaturesKept) == 0) | (length(GEfeaturesKept) == 0)){
		stop("No features could be matched. Check format of annotation information.")
	}
	matchedFeatures <- cbind(CNann[CNfeaturesKept,4], GEann[GEfeaturesKept,4])
	return(matchedFeatures)
}





uniqGenomicInfo <- function(chr, bpstart, bpend, verbose=FALSE){
   	############################################################################
	# function shrinks ExpressionSet-object to a weighted subset of the features
	############################################################################

	# check input
	if (verbose){ cat("perform input checks ...", "\n") }
	if (!(as.character(class(chr))=="numeric" | as.character(class(chr))=="integer" )){ stop("chr of wrong class.") }
	if (!(as.character(class(bpstart))=="numeric" | as.character(class(bpstart))=="integer" )){ stop("bpstart of wrong class.") }
	if (!(as.character(class(bpend))=="numeric" | as.character(class(bpend))=="integer" )){ stop("bpend of wrong class.") }
	if (length(chr) != length(bpstart) | length(chr) != length(bpend) | length(bpstart) != length(bpend)){ stop("chr, bpstart and bpend do not have identical length.") }

	# create identifier of unique genomic info's
	if (verbose){ cat("find features with identical genomic info ...", "\n") }
	ann <- cbind(chr, bpstart, bpend)[order(chr, bpstart, bpend),]
	uniques <- apply(cbind(ann[-nrow(ann),], ann[-1,]), 1, function(Z){ !all(Z[1:3] == Z[4:6]) })
	uniques <- c(1, cumsum(uniques) + 1)

	# put features with identical genomic info in list object
	ann <- cbind(1:nrow(ann), ann)
	colnames(ann)[1] <- "featureNo"
	slhFunc <- function(Z, uniques, ann){ ann[which(uniques==Z), , drop=FALSE] }
	uniqueGenomicInfo <- lapply(1:max(uniques), slhFunc, uniques, ann)
	return(uniqueGenomicInfo)
}







matchAnn2Ann <- function(chr1, bpstart1, bpend1, chr2, bpstart2, bpend2, method="distance", maxDist=10000, minPerc=0, reference=1, ncpus=1, verbose=TRUE){
   	###########################################################################################
	# function that matches annotation of to sets of features on genomic location
	###########################################################################################

	# check input
	if (verbose){ cat("perform input checks ...", "\n") }
	if (as.character(class(chr1)) != "numeric" & as.character(class(chr1)) != "integer"){ stop("chr1 of wrong class.") }
	if (as.character(class(bpstart1)) != "numeric" & as.character(class(bpstart1)) != "integer"){ stop("bpstart1 of wrong class.") }
	if (as.character(class(bpend1)) != "numeric" & as.character(class(bpend1)) != "integer"){ stop("bpend1 of wrong class.") }
	if ((length(chr1) != length(bpstart1)) & (length(chr1) != length(bpend1)) & (length(bpstart1) != length(bpend1))){ stop("length of chr1, bpstart1, and bpend1 do not match") }
	if (any(bpstart1 > bpend1)){ stop("inconsistency in bpstart1 and bpend1: for some feature bpstart1 > bpend1") }

	if (as.character(class(chr2)) != "numeric" & as.character(class(chr2)) != "integer"){ stop("chr2 of wrong class.") }
	if (as.character(class(bpstart2)) != "numeric" & as.character(class(bpstart2)) != "integer"){ stop("bpstart2 of wrong class.") }
	if (as.character(class(bpend2)) != "numeric" & as.character(class(bpend2)) != "integer"){ stop("bpend2 of wrong class.") }
	if ((length(chr2) != length(bpstart2)) & (length(chr2) != length(bpend2)) & (length(bpstart2) != length(bpend2))){ stop("length of chr2, bpstart2, and bpend2 do not match") }
	if (any(bpstart2 > bpend2)){ stop("inconsistency in bpstart2 and bpend2: for some feature bpstart2 > bpend2") }

	if ( !(method %in% c("distance", "overlap")) ){ stop("method wrongly specified.") }
	if (as.character(class(maxDist)) != "numeric" & as.character(class(maxDist)) != "integer"){ stop("maxDist of wrong class.") }
	if (as.character(class(minPerc)) != "numeric" & as.character(class(minPerc)) != "integer"){ stop("minPerc of wrong class.") }
	if (as.character(class(ncpus)) != "numeric" & as.character(class(ncpus)) != "integer"){ stop("ncpus of wrong class.") }
	if (maxDist < 1){ stop("maxDist wrongly specified.") }
	if (minPerc < 0 & minPerc > 1){ stop("minPerc wrongly specified.") }

	# extract annotation info
	ann1 <- cbind(chr1, bpstart1, bpend1, 1:length(chr1))
	colnames(ann1)[4] <- "probeNo1"
	ann2 <- cbind(chr2, bpstart2, bpend2, 1:length(chr2))
	colnames(ann2)[4] <- "probeNo2"
	# rm(chr1, chr2, bpstart1, bpstart2, bpend1, bpend2)

	# in case columns are of wrong class ....
	for (i in 1:3){
		if (is.factor(ann1[,i])){ ann1[,i] <- as.numeric(levels(ann1[,i]))[ann1[,i]] }
		if (is.factor(ann2[,i])){ ann2[,i] <- as.numeric(levels(ann2[,i]))[ann2[,i]] }			
		if (is.character(ann1[,i])){ ann1[,i] <- as.numeric(ann1[,i]) }
		if (is.character(ann2[,i])){ ann2[,i] <- as.numeric(ann2[,i]) }			
	}

	# ensure data from gene expression platform is genomicly ordered.
	if (verbose){ cat("order both data sets genomically ...", "\n")	}
	ann1 <- ann1[order(ann1[,1], ann1[,2]), ]
	ann2 <- ann2[order(ann2[,1], ann2[,2]), ]

	# filter features with complete annotation for platform 1
	if (verbose){ cat("select features with complete annotation ...", "\n")	}
	featuresKept1 <- which(apply(ann1, 1, function(x){ all(is.na(x) == FALSE) }))
	if (length(featuresKept1) < 1){ stop("no annotation info for features of platform 1 provided") }
	ann1 <- ann1[featuresKept1, , drop=FALSE]
	ann1removed <- ann1[-featuresKept1, , drop=FALSE]

	# filter features with complete annotation for platform 2
	featuresKept2 <- which(apply(ann2, 1, function(x){ all(is.na(x) == FALSE) }))
	if (length(featuresKept2) < 1){ stop("no annotation info for features of platform 2 provided") }
	ann2removed <- ann2[-featuresKept2, , drop=FALSE]
	ann2 <- ann2[featuresKept2, , drop=FALSE]

	# chromosomes with features on platform 2
	if (verbose){ cat("select features on chromosomes present in both data sets ...", "\n")	}
	chrs2 <- sort(unique(ann2[,1]))
	featuresKept1 <- which(ann1[,1] %in% chrs2)
	ann1removed <- rbind(ann1removed, ann1[-featuresKept1, , drop=FALSE])
	ann1 <- ann1[featuresKept1, , drop=FALSE]

	# chromosomes with features on platform 1
	chrs1 <- sort(unique(ann1[,1]))
	featuresKept2 <- which(ann2[,1] %in% chrs1)
	ann2removed <- rbind(ann2removed, ann2[-featuresKept2, , drop=FALSE])
	ann2 <- ann2[featuresKept2, , drop=FALSE]

	# match by distance of mid base pair
	if (verbose){ cat("start actual matching procedure ...", "\n") }
	if (ncpus > 1){	
		sfInit(parallel=TRUE, cpus=ncpus)
		sfLibrary(package="sigaR", verbose=FALSE, character.only=TRUE)
	}
	if (method == "distance"){
		# determine mid base pair
		ann1temp <- cbind(ann1[,1], apply(ann1[,2:3,drop=FALSE], 1, mean, na.rm=TRUE), ann1[,4])
		ann2temp <- cbind(ann2[,1], apply(ann2[,2:3,drop=FALSE], 1, mean, na.rm=TRUE), ann2[,4])
		
		# per chromosome find CN-features closed to GE-features
		if (ncpus == 1){ matchedFeatures <- apply(ann2temp, 1, .minDistFeature, ann1temp, maxDist) }
		if (ncpus > 1){	matchedFeatures <- sfApply(ann2temp, 1, .minDistFeature, ann1temp, maxDist) }
	}

	# match by percentage of overlap
	if (method == "overlap"){
		# find features with biggest overlap and remove features without any overlap
		if (ncpus == 1){ matchedFeatures <- apply(ann2, 1, .overlapPercentage, ann1, minPerc, reference) }
		if (ncpus > 1){ matchedFeatures <- sfApply(ann2, 1, .overlapPercentage, ann1, minPerc, reference) }
	}
	if (ncpus > 1){ sfStop() }

	if (!is.list(matchedFeatures)){ matchedFeatures <- .matrixRows2list(matchedFeatures) }

	# reformatting
	if (verbose){ cat("reformat matching results ...", "\n") }
	names(matchedFeatures) <- as.character(ann2[,4])
	idsNull <- as.numeric(which(sapply(matchedFeatures, is.null, simplify=TRUE)))
	if (length(idsNull) > 1){ matchedFeatures <- matchedFeatures[-idsNull] }

	return(matchedFeatures)
}



getSegFeatures <- function(featureNo, CNdata, verbose=TRUE){
	###########################################################
	# function that determine features with similar CN signature as featureNo 
	###########################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if (as.character(class(CNdata)) != "cghCall"){ stop("CNdata not of class cghCall.") }
	if (!(as.character(class(featureNo))=="numeric" | as.character(class(featureNo))=="integer" )){ stop("featureSubset of wrong class.") }
	if ( length(featureNo) != 1 ){ stop("featureNo wrongly specified.") }
	if ( !(featureNo >= 1 | featureNo <= nrow(fData(CNdata))) ){ stop("featureNo out of range.") }

	# get features
	CNfeaturesKept <- which(chromosomes(CNdata) == chromosomes(CNdata)[featureNo])
	segSignature <- segmented(CNdata)[featureNo, , drop=FALSE]
	CNfeaturesKept <- CNfeaturesKept[apply(segmented(CNdata)[CNfeaturesKept, , drop=FALSE], 1, function(Y, Z){ all(as.numeric(Y) == as.numeric(Z)) }, Z=segSignature)]

	# in case possible gap between features with same signature
	idTemp <- which(CNfeaturesKept == featureNo)
	if (idTemp == 1){
		checkSeq <- c(0:(length(CNfeaturesKept)-idTemp))
	} else {
		checkSeq <- c(-c((idTemp-1):1), c(0:(length(CNfeaturesKept)-idTemp)))
	}
	CNfeaturesKept <- CNfeaturesKept[which(checkSeq == (CNfeaturesKept - featureNo))]
	return(CNfeaturesKept)
}



ExpressionSet2subset <- function(GEdata, featureSubset, verbose=TRUE){
   	############################################################################
	# function shrinks ExpressionSet-object to a subset of the features
	############################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if ( !(as.character(class(GEdata)) == "ExpressionSet" | as.character(class(GEdata)) == "eSet") ){ stop("GEdata not of class ExpressionSet.") }
	if (!(as.character(class(featureSubset))=="numeric" | as.character(class(featureSubset))=="integer" )){ stop("featureSubset of wrong class.") }
	if ( !(length(featureSubset) >= 1) ){ stop("featureSubset contains no row numbers.") }
	if ( !(min(featureSubset) >= 1 | max(featureSubset) <= nrow(fData(GEdata))) ){ stop("featureSubset contains illegal row numbers.") }

	# subsetting
	# fData(GEdata) <- fData(GEdata)[featureSubset, , drop=FALSE]	
	# exprs(GEdata) <- exprs(GEdata)[featureSubset, , drop=FALSE]
	GEdata <- GEdata[featureSubset,,drop=FALSE]	
	return(GEdata)
}



ExpressionSet2order <- function(GEdata, chr, bpstart, verbose=TRUE){
   	###########################################################################################
	# function that orders ExpressionSet-object genomically
	###########################################################################################

	# check input
	if (verbose){ cat("perform input checks...", "\n") }
	if ( !(class(GEdata) == "ExpressionSet" | class(GEdata) == "eSet") ){ stop("GEdata not of class ExpressionSet.") }
	if (!(class(chr)=="numeric" | class(chr)=="integer" )){ stop("chr of wrong class.") }
	if (length(chr) != 1){ stop("chr of wrong length.") }
	if ( !(chr >= 1 | chr <= ncol(fData(GEdata))) ){ stop("chr exceeds number of columns in featureData.") }
	if (!(class(bpstart)=="numeric" | class(bpstart)=="integer" )){ stop("bpstart of wrong class.") }
	if (length(bpstart) != 1){ stop("bpstart of wrong length.") }
	if ( !(bpstart >= 1 | bpstart <= ncol(fData(GEdata))) ){ stop("bpstart exceeds number of columns in featureData.") }

	# extract annotation data for ordering
	GEann <- fData(GEdata)[, c(chr, bpstart), drop=FALSE]

	# in case columns are of wrong class ....
	for (i in 1:2){
		if (is.factor(GEann[,i])){ GEann[,i] <- as.numeric(levels(GEann[,i]))[GEann[,i]] }			
		if (is.character(GEann[,i])){ GEann[,i] <- as.numeric(GEann[,i]) }			
	}

	# order ExpressionSet-object genomically
	exprs(GEdata) <- exprs(GEdata)[order(GEann[,1], GEann[,2]), ]
	fData(GEdata) <- fData(GEdata)[order(GEann[,1], GEann[,2]), ]
	return(GEdata)
}

Try the sigaR package in your browser

Any scripts or data that you put into this service are public.

sigaR documentation built on April 28, 2020, 6:05 p.m.