R/Est.PI.R
In HMP: Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

Documented in Est.PI

Est.PI <-
function(group.data, conf=.95){
	if(missing(group.data))
		stop("group.data is missing.")
	
	# Check the number of groups
	numGroups <- length(group.data)
	
	# Make sure we have the same columns
	taxaCounts <- sapply(group.data, ncol)
	numTaxa	<- taxaCounts[1]
	if(any(taxaCounts != numTaxa)){
		warning("Group columns do not match, running formatDataSets.")
		group.data <- formatDataSets(group.data)
	}
	
	# Make sure we have group names
	if(is.null(names(group.data))){
		grpNames <- paste("Data Set", 1:numGroups)
	}else{
		grpNames <- names(group.data)
	}
	
	# Calculate the pi and error bars for each group
	allParamsMLE <- data.frame(matrix(0, 0, 6))
	allParamsMOM <- data.frame(matrix(0, 0, 6))
	thetaMLE <- data.frame(matrix(0, numGroups, 3))
	thetaMOM <- data.frame(matrix(0, numGroups, 3))
	for(i in 1:numGroups){
		tempData <- group.data[[i]]
		
		# Check the data has samples
		numSub <- nrow(tempData)
		if(numSub < 1)
			stop("At least one data set in group.data is empty")
		
		tempParam1 <- data.frame(matrix(0, ncol(tempData), 6))
		tempParam2 <- data.frame(matrix(0, ncol(tempData), 6))
		
		tempParam2[,2] <- grpNames[i]
		tempParam1[,2] <- grpNames[i]
		
		# Check for taxa with 0 column sums (add 1 to everything if this happens)
		badTaxa <- which(colSums(tempData) == 0)
		if(length(badTaxa) != 0)
			tempData <- tempData + 1
		
		# Handle having 1 sample
		if(numSub == 1){
			tempParam1[,1] <- colnames(tempData)
			tempParam1[,3] <- unlist(tempData[1,]/sum(tempData))
			tempParam1[,4] <- NA
			tempParam1[,5] <- tempParam1[,3]
			tempParam1[,6] <- tempParam1[,3]
			tempParam1 <- tempParam1[order(tempParam1[,1]),]
			
			tempTheta1 <- c(0, NA)
			
			tempParam2 <- tempParam1
			tempTheta2 <- tempTheta1
		}else{	
			# Get the MoM and MLE for every taxa
			fsum <- dirmult::dirmult.summary(tempData, dirmult::dirmult(tempData, trace=FALSE))
			tempTheta <- fsum[nrow(fsum),]
			fsum <- fsum[-nrow(fsum),] 
			
			# Turn the summary into a data frame we can plot from
			tempParam1[,1] <- rownames(fsum)
			tempParam1[,3] <- fsum$MLE
			tempParam1[,4] <- fsum$se.MLE
			tempTheta1 <- tempTheta[,2:3]
			
			tempParam2[,1] <- rownames(fsum)
			tempParam2[,3] <- fsum$MoM
			tempParam2[,4] <- fsum$se.MOM
			tempTheta2 <- tempTheta[,4:5]
			
			# Calc Upper and Lower bounds for CI
			minSubj <- min(sapply(group.data, function(x) nrow(x)))
			if(minSubj < 30){
				val <- stats::qt(0.5 + conf *0.5, df=minSubj-1)
			}else{
				val <- stats::qnorm(0.5 + conf*0.5)
			}
			
			tempParam1[,5] <- tempParam1[,3] + val*tempParam1[,4]
			tempParam1[,6] <- tempParam1[,3] - val*tempParam1[,4]
			
			tempParam2[,5] <- tempParam2[,3] + val*tempParam2[,4]
			tempParam2[,6] <- tempParam2[,3] - val*tempParam2[,4]
		}
		
		# Save outside of loop
		allParamsMLE <- rbind(allParamsMLE, tempParam1)
		thetaMLE[i,] <- c(grpNames[i], tempTheta1)
		
		allParamsMOM <- rbind(allParamsMOM, tempParam2)
		thetaMOM[i,] <- c(grpNames[i], tempTheta2)
	}
	colnames(allParamsMLE) <- c("Taxa", "Group", "PI", "SE", "Upper", "Lower")
	colnames(thetaMLE) <- c("Group", colnames(tempTheta1))
	colnames(allParamsMOM) <- c("Taxa", "Group", "PI", "SE", "Upper", "Lower")
	colnames(thetaMOM) <- c("Group", colnames(tempTheta2))
	
	# Make sure none of our error bars go over 100 or below 0
	allParamsMLE$Upper <- ifelse(allParamsMLE$Upper > 1, 1, allParamsMLE$Upper)
	allParamsMLE$Lower <- ifelse(allParamsMLE$Lower < 0, 0, allParamsMLE$Lower)
	allParamsMOM$Upper <- ifelse(allParamsMOM$Upper > 1, 1, allParamsMOM$Upper)
	allParamsMOM$Lower <- ifelse(allParamsMOM$Lower < 0, 0, allParamsMOM$Lower)
	
	# Factor the data so it stays in the right order
	allParamsMLE$Group <- factor(allParamsMLE$Group, levels=grpNames)
	allParamsMLE$Taxa <- factor(allParamsMLE$Taxa, levels=unique(colnames(group.data[[1]])))
	allParamsMOM$Group <- factor(allParamsMOM$Group, levels=grpNames)
	allParamsMOM$Taxa <- factor(allParamsMOM$Taxa, levels=unique(colnames(group.data[[1]])))
	
	MLE <- list(params=allParamsMLE, theta=thetaMLE)
	MOM <- list(params=allParamsMOM, theta=thetaMOM)
	
	return(list(MLE=MLE, MOM=MOM))
}

Any scripts or data that you put into this service are public.

HMP documentation built on Aug. 31, 2019, 5:05 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

HMP
Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

R/Est.PI.R
In HMP: Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

Defines functions Est.PI

Documented in Est.PI

Try the HMP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

HMP Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

R/Est.PI.R In HMP: Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

Defines functions Est.PI

Documented in Est.PI

Try the HMP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

HMP
Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

R/Est.PI.R
In HMP: Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP