R/fitGAPLM.R
In plmDE: Additive partially linear models for differential gene expression analysis

Documented in fitGAPLM

fitGAPLM <- function(dataObject, generalizedLM = FALSE, family = poisson(link = log), NegativeBinomialUnknownDispersion = FALSE, test = "LRT", weights = NULL, offset = NULL, pValueAdjustment = "fdr", significanceLevel = 0.05, indicators.fullModel = as.character(unique(dataObject$sampleInfo[,2])[-1]), continuousCovariates.fullModel = NULL, groups.fullModel = as.character(unique(dataObject$sampleInfo[,2])[-1]), groupFunction.fullModel = rep("AdditiveSpline", length(groups.fullModel)), fitSplineFromData.fullModel = TRUE, splineDegrees.fullModel = rep(3, length(groups.fullModel)), splineKnots.fullModel = rep(0, length(groups.reducedModel)), compareToReducedModel = FALSE, indicators.reducedModel = as.character(unique(dataObject$sampleInfo[,2])[-1]), continuousCovariates.reducedModel = NULL, groups.reducedModel = as.character(unique(dataObject$sampleInfo[,2])[-1]), groupFunction.reducedModel = rep("AdditiveSpline", length(groups.reducedModel)), fitSplineFromData.reducedModel = TRUE, splineDegrees.reducedModel = rep(3, length(groups.reducedModel)), splineKnots.reducedModel = rep(0, length(groups.reducedModel)), splineKnotSpread = "quantile") { 	
	
	if ((length(indicators.fullModel) > 0) && (!(indicators.fullModel %in% dataObject$sampleInfo[,2]))) {
		stop("invalid indicator variable")
	}
	if ((length(indicators.reducedModel) > 0) && (!(indicators.reducedModel %in% dataObject$sampleInfo[,2]))) {
		stop("invalid indicator variable")
	}
	## format the expression levels:
	expression = as.matrix(dataObject$expressionValues)
	## stores formula for call to lm();
	stringFormula.full = ""
    Indicator.full = list()
	if (length(indicators.fullModel) > 0) { 
		for (condition in indicators.fullModel) { 
			## create indicator variables:
			condition = toString(condition)
			Indicator.full[[condition]] = as.numeric(dataObject$sampleInfo[,2] == condition)
			## incorporate indicator into formula:
			stringFormula.full = paste(stringFormula.full, "  Indicator.full[[\'", condition, "\']] + ", sep = "")
		}
	}
	quantitativeCovariateList = list()
	if (length(continuousCovariates.fullModel > 0)) {
		for (covariate in continuousCovariates.fullModel) {
			## get covariate:
			covariate = toString(covariate)
			covariateValues = dataObject$sampleInfo[,covariate]
			## coerce NA values to zero:
			covariateValues[is.na(covariateValues)] = 0
			if (length(groups.fullModel) > 0) {
				## identify the different groups:					
				if (length(groups.fullModel) != length(groupFunction.fullModel)) {
					stop("groups.fullModel must have same length as groupFunction.fullModel")
				}
				GroupsSharingLinearFunction = character(0)
				GroupsSharingSplineFunction = character(0)
				for (i in 1:length(groups.fullModel)) {
					group = groups.fullModel[i]
					groupLocation = grep(group, dataObject$sampleInfo[,2])
					if (length(groupLocation) == 0) {
						stop("Invalid group name")
					}
					if (groupFunction.fullModel[i] == "AdditiveLinear") {
						additiveLinearCovariate = rep(0, length(dataObject$sampleInfo[,2]))
						additiveLinearCovariate[groupLocation] = covariateValues[groupLocation]
						covarName = paste(covariate, ".", group, sep = "")
						quantitativeCovariateList[[covarName]] = additiveLinearCovariate
					} else if (groupFunction.fullModel[i] == "CommonLinear") {							GroupsSharingLinearFunction = c(GroupsSharingLinearFunction, group)
					} else if (groupFunction.fullModel[i] == "CommonSpline") {
						GroupsSharingSplineFunction = c(GroupsSharingSplineFunction, group)
					} else if (groupFunction.fullModel[i] == "AdditiveSpline") {
						if (sum(abs(covariateValues[groupLocation]) > 0 ) == 0) {
							## all covariate values in this group are 0
							stop("Cannot fit spline to group whose only covariate value is zero")
						}
						## fit B spline:
						if (fitSplineFromData.fullModel) {
							splineBasis = fitBspline(covariateValues[groupLocation], continuousCovariates.fullModel, indicators.fullModel, group, covariate); 
						} else {
							numKnots = splineKnots.fullModel[i];
							if (numKnots == 0) {
								KnotLocations = NULL
							} else {
								if (splineKnotSpread == "uniform") {
									KnotInterval = (max(covariateValues[groupLocation]) - min(covariateValues[groupLocation])) / (numKnots + 1)
								KnotLocations = seq(min(covariateValues[groupLocation]), max(covariateValues[groupLocation]), by = KnotInterval)[-c(1, numKnots + 2)]	
								} else if (splineKnotSpread == "quantile") {
									KnotLocations = as.vector(quantile(covariateValues[groupLocation], seq(0, 1, by = 1 / (numKnots + 1))))
									KnotLocations = KnotLocations[-c(1, numKnots + 2)]
								} else {
									stop("splineKnotSpread must be one of: uniform, quantile")
								}
							}
							splineBasis = splines::bs(covariateValues[groupLocation], knots = KnotLocations, degree = splineDegrees.fullModel[i], intercept = FALSE)
						}
						OverallBasis = matrix(rep(0, length(covariateValues) * length(splineBasis[1,])), ncol = length(splineBasis[1,]), nrow = length(covariateValues))
						OverallBasis[groupLocation, ] = splineBasis
						for (k in 1:length(OverallBasis[1,])) {
							covarName = paste(group, "BasisFunction", k, ".", covariate, sep = "")
							quantitativeCovariateList[[covarName]] = OverallBasis[,k]
						} 	
					} else {
						stop("groupFunction must be one of: CommonLinear, CommonSpline, AdditiveLinear, AdditiveSpline")
					}
				}
				## fit common spline to groups that share it:
				groupLocation = integer(0)
				commongroup = character(0)
				for (group in GroupsSharingSplineFunction) {
					groupLocation = c(groupLocation, grep(group, dataObject$sampleInfo[,2]))
					commongroup = c(commongroup, group)
				}
				if (length(groupLocation) > 0) {
					## fit B spline:
					if (sum(abs(covariateValues[groupLocation]) > 0 ) == 0) {
						## all covariate values in this group are 0
						stop("Cannot fit spline to group whose only covariate value is zero")
					}
					if (fitSplineFromData.fullModel) {
						splineBasis = fitBspline(covariateValues[groupLocation], continuousCovariates.fullModel, indicators.fullModel, commongroup, covariate); 
					} else {
						numKnots = splineKnots.fullModel[i];
						if (numKnots == 0) {
							KnotLocations = NULL
						} else {
							if (splineKnotSpread == "uniform") {
								KnotInterval = (max(covariateValues[groupLocation]) - min(covariateValues[groupLocation])) / (numKnots + 1)
								KnotLocations = seq(min(covariateValues[groupLocation]), max(covariateValues[groupLocation]), by = KnotInterval)[-c(1, numKnots + 2)]	
							} else if (splineKnotSpread == "quantile") {
								KnotLocations = as.vector(quantile(covariateValues[groupLocation], seq(0, 1, by = 1 / (numKnots + 1))))
								KnotLocations = KnotLocations[-c(1, numKnots + 2)]
							} else {
								stop("splineKnotSpread must be one of: uniform, quantile")
							}
						}
						splineBasis = splines::bs(covariateValues[groupLocation], knots = KnotLocations, degree = splineDegrees.fullModel[i], intercept = FALSE)
					}					
					OverallBasis = matrix(rep(0, length(covariateValues) * length(splineBasis[1,])), ncol = length(splineBasis[1,]), nrow = length(covariateValues))
					OverallBasis[groupLocation, ] = splineBasis
					for (k in 1:length(OverallBasis[1,])) {
						covarName = paste("commonBasisFunction", k, ".", covariate, sep = "")
						quantitativeCovariateList[[covarName]] = OverallBasis[,k]
					}
				} 		
				## fit common linear function to groups that share it:
				groupLocation = integer(0)
				for (group in GroupsSharingLinearFunction) {
					groupLocation = c(groupLocation, grep(group, dataObject$sampleInfo[,2]))
				}
				if (length(groupLocation) > 0) {
					additiveLinearCovariate = rep(0, length(dataObject$sampleInfo[,2]))
					additiveLinearCovariate[groupLocation] = covariateValues[groupLocation]
					covarName = paste(covariate, ".common", sep = "")
					quantitativeCovariateList[[covarName]] = additiveLinearCovariate
				}
			} else { # length of groups.fullModel is zero
				stop("Each covariate requires specification of group and function fit")
			}
		}
		for (covarName in names(quantitativeCovariateList)) { 
			## add continuous covariates to formula for lm:
			stringFormula.full = paste(stringFormula.full, " quantitativeCovariateList[[\'", covarName, "\']] + ", sep = "")
		}
	}
	## Check if there is a model to fit:
	if (nchar(stringFormula.full) == 0) {
		stop("full model contains no indicators or numerical covariates")
	}
	stringFormula.full = R.oo::trim(stringFormula.full)
	length = nchar(stringFormula.full)
	if (substring(stringFormula.full, length, length) == "+") {
		stringFormula.full = substring(stringFormula.full, 1, length - 1)
	}	
	## Check if Reduced Model is requested:
	if (!compareToReducedModel) { # test all coefficients for significance
		PvaluesForCoefficientsSignificance = rep(0, length(dataObject$genes))
		if (generalizedLM) { # fit generalized linear model
			if (NegativeBinomialUnknownDispersion) {
				SignificanceReport = "Pr(Chi)"
				for (j in 1:length(dataObject$genes)) {
				PvaluesForCoefficientsSignificance[j] = anova(MASS::glm.nb(as.formula(paste("expression[j, ] ~ ", stringFormula.full)), weights = weights), MASS::glm.nb(expression[j, ] ~ 1, weights = weights), test = test)[[SignificanceReport]][2]
				}		
			} else {
				whichTest = grep(test, c("ChiSq", "LRT", "Rao", "F"))
				if (whichTest < 4) {
					SignificanceReport = "Pr(>Chi)"
				} else if (whichTest == 4) {
					SignificanceReport = "Pr(>F)"
				} else {
					stop("test must be one of: ChiSq, LRT, Rao, F")
				}
				for (j in 1:length(dataObject$genes)) {
					PvaluesForCoefficientsSignificance[j] = anova(glm(as.formula(paste("expression[j, ] ~ ", stringFormula.full)), family = family, weights = weights, offset = offset), glm(expression[j, ] ~ 1, weights = weights, offset = offset), test = test)[[SignificanceReport]][2]
				}
			}
		} else { # fit linear model and test for significance
			for (j in 1:length(dataObject$genes)) {
				PvaluesForCoefficientsSignificance[j] = 1 - pf(summary(lm(as.formula(paste("expression[j, ] ~ ", stringFormula.full)), weights, offset))$fstatistic[1], summary(lm(as.formula(paste("expression[j, ] ~ ", stringFormula.full)), weights = weights, offset = offset))$fstatistic[2], summary(lm(as.formula(paste("expression[j, ] ~ ", stringFormula.full)), weights = weights, offset = offset))$fstatistic[3])		
			}
		}
		# data frame of genes and p-values
		DifferentiallyExpressed = data.frame(gene = dataObject$genes, p.val = PvaluesForCoefficientsSignificance)
		stringFormula.reduced = NULL
		
	} else { # Construct another formula for Reduced Model:
        Indicator.reduced = list()
		stringFormula.reduced = ""
		if (length(indicators.reducedModel > 0)) { 
			for (condition in indicators.reducedModel) { 
				## create indicator variables:
				condition = toString(condition)
				Indicator.reduced[[condition]] = as.numeric(dataObject$sampleInfo[,2] == condition)
				## incorporate indicator into formula:
				stringFormula.reduced = paste(stringFormula.reduced, " Indicator.reduced[[\'", condition, "\']] + ", sep = "")
			}
		}
		quantitativeCovariateList.reduced = list()
		if (length(continuousCovariates.reducedModel > 0)) {
			for (covariate in continuousCovariates.reducedModel) {
				## get covariate:
				covariate = toString(covariate)
				covariateValues = dataObject$sampleInfo[,covariate]
				## coerce NA values to zero:
				covariateValues[is.na(covariateValues)] = 0
				if (length(groups.reducedModel) > 0) {
					## identify the different groups:					
					if (length(groups.reducedModel) != length(groupFunction.reducedModel)) {
						stop("groups.reducedModel must have same length as groupFunction.reducedModel")
					}
					GroupsSharingLinearFunction = character(0)
					GroupsSharingSplineFunction = character(0)
					for (i in 1:length(groups.reducedModel)) {
						group = groups.reducedModel[i]
						groupLocation = grep(group, dataObject$sampleInfo[,2])
						if (length(groupLocation) == 0) {
							stop("Invalid group name")
						}
						if (groupFunction.reducedModel[i] == "AdditiveLinear") {
							additiveLinearCovariate = rep(0, length(dataObject$sampleInfo[,2]))
							additiveLinearCovariate[groupLocation] = covariateValues[groupLocation]
							covarName = paste(covariate, ".", group, sep = "")
							quantitativeCovariateList.reduced[[covarName]] = additiveLinearCovariate
						} else if (groupFunction.reducedModel[i] == "CommonLinear") {		
							GroupsSharingLinearFunction = c(GroupsSharingLinearFunction, group)
						} else if (groupFunction.reducedModel[i] == "CommonSpline") {
							GroupsSharingSplineFunction = c(GroupsSharingSplineFunction, group)
						} else if (groupFunction.reducedModel[i] == "AdditiveSpline") {
							if (sum(abs(covariateValues[groupLocation]) > 0 ) == 0) {
								## all covariate values in this group are 0
								stop("Cannot fit spline to group whose only covariate value is zero")
							}
							## fit B spline:
						if (fitSplineFromData.reducedModel) {
							splineBasis = fitBspline(covariateValues[groupLocation], continuousCovariates.reducedModel, indicators.reducedModel, group, covariate); 
						} else {
							numKnots = splineKnots.reducedModel[i];
							if (numKnots == 0) {
								KnotLocations = NULL
							} else {
								if (splineKnotSpread == "uniform") {
									KnotInterval = (max(covariateValues[groupLocation]) - min(covariateValues[groupLocation])) / (numKnots + 1)
								KnotLocations = seq(min(covariateValues[groupLocation]), max(covariateValues[groupLocation]), by = KnotInterval)[-c(1, numKnots + 2)]	
								} else if (splineKnotSpread == "quantile") {
									KnotLocations = as.vector(quantile(covariateValues[groupLocation], seq(0, 1, by = 1 / (numKnots + 1))))
									KnotLocations = KnotLocations[-c(1, numKnots + 2)]
								} else {
									stop("splineKnotSpread must be one of: uniform, quantile")
								}
							}
							splineBasis = splines::bs(covariateValues[groupLocation], knots = KnotLocations, degree = splineDegrees.reducedModel[i], intercept = FALSE)
						}
							OverallBasis = matrix(rep(0, length(covariateValues) * length(splineBasis[1,])), ncol = length(splineBasis[1,]), nrow = length(covariateValues))
							OverallBasis[groupLocation, ] = splineBasis
							for (k in 1:length(OverallBasis[1,])) {
								covarName = paste(group, "BasisFunction", k, ".", covariate, sep = "")
								quantitativeCovariateList.reduced[[covarName]] = OverallBasis[,k]
							} 	
						} else {
							stop("groupFunction must be one of: CommonLinear, CommonSpline, AdditiveLinear, AdditiveSpline")
						}
					}

					## fit common spline to groups that share it:
					groupLocation = integer(0)
					commongroup = character(0)
					for (group in GroupsSharingSplineFunction) {
						groupLocation = c(groupLocation, grep(group, dataObject$sampleInfo[,2]))
						commongroup = c(commongroup, group)
					}
					if (length(groupLocation) > 0) {
						## fit B spline:
						if (sum(abs(covariateValues[groupLocation]) > 0 ) == 0) {
								## all covariate values in this group are 0
								stop("Cannot fit spline to group whose only covariate value is zero")
							}
							## fit B spline:
						if (fitSplineFromData.reducedModel) {
							splineBasis = fitBspline(covariateValues[groupLocation], continuousCovariates.reducedModel, indicators.reducedModel, group, covariate);
						} else {
							numKnots = splineKnots.reducedModel[i];
							if (numKnots == 0) {
								KnotLocations = NULL
							} else {
								if (splineKnotSpread == "uniform") {
									KnotInterval = (max(covariateValues[groupLocation]) - min(covariateValues[groupLocation])) / (numKnots + 1)
								KnotLocations = seq(min(covariateValues[groupLocation]), max(covariateValues[groupLocation]), by = KnotInterval)[-c(1, numKnots + 2)]	
								} else if (splineKnotSpread == "quantile") {
									KnotLocations = as.vector(quantile(covariateValues[groupLocation], seq(0, 1, by = 1 / (numKnots + 1))))
									KnotLocations = KnotLocations[-c(1, numKnots + 2)]
								} else {
									stop("splineKnotSpread must be one of: uniform, quantile")
								}
							}
							splineBasis = splines::bs(covariateValues[groupLocation], knots = KnotLocations, degree = splineDegrees.reducedModel[i], intercept = FALSE)
						}
						OverallBasis = matrix(rep(0, length(covariateValues) * length(splineBasis[1,])), ncol = length(splineBasis[1,]), nrow = length(covariateValues))
						OverallBasis[groupLocation, ] = splineBasis
						for (k in 1:length(OverallBasis[1,])) {
							covarName = paste("commonBasisFunction", k, ".", covariate, sep = "")
							quantitativeCovariateList.reduced[[covarName]] = OverallBasis[,k]
						}
					} 		
					## fit common linear function to groups that share it:
					groupLocation = integer(0)
					for (group in GroupsSharingLinearFunction) {
						groupLocation = c(groupLocation, grep(group, dataObject$sampleInfo[,2]))
					}
					if (length(groupLocation) > 0) {
						additiveLinearCovariate = rep(0, length(dataObject$sampleInfo[,2]))
						additiveLinearCovariate[groupLocation] = covariateValues[groupLocation]
						covarName = paste(covariate, ".common", sep = "")
						quantitativeCovariateList.reduced[[covarName]] = additiveLinearCovariate
					}
				} else { # length of groups.reducedModel is zero
					stop("Each covariate requires specification of group and function fit")
				}
			}
			for (covarName in names(quantitativeCovariateList.reduced)) { 
				## add continuous covariates to formula for lm:
				stringFormula.reduced = paste(stringFormula.reduced, " quantitativeCovariateList.reduced[[\'", covarName, "\']] + ", sep = "")
			}
		}
		## Ensure model is not empty:
		if (nchar(stringFormula.reduced) == 0) {
			stop("reduced model has no indicators or numerical covariates")		
		}
		stringFormula.reduced = R.oo::trim(stringFormula.reduced)
		length = nchar(stringFormula.reduced)
		if (substring(stringFormula.reduced, length, length) == "+") {
			stringFormula.reduced = substring(stringFormula.reduced, 1, length - 1)
		}	
				
		## Comparing the full and reduced models
		PvaluesForSignificanceOfFullModel = rep(0, length(dataObject$genes))
		if (generalizedLM) { # fit generalized linear model
			if (NegativeBinomialUnknownDispersion) {
				SignificanceReport = "Pr(Chi)"
				for (j in 1:length(dataObject$genes)) {
				PvaluesForSignificanceOfFullModel[j] = anova(MASS::glm.nb(as.formula(paste("expression[j, ] ~ ", stringFormula.full)), weights = weights), MASS::glm.nb(as.formula(paste("expression[j, ] ~ ", stringFormula.reduced)), weights = weights), test = test)[[SignificanceReport]][2]
				}
			} else {
				whichTest = grep(test, c("ChiSq", "LRT", "Rao", "F"))
				if (whichTest < 4) {
					SignificanceReport = "Pr(>Chi)"
				} else if (whichTest == 4) {
					SignificanceReport = "Pr(>F)"
				} else {
					stop("test must be one of: ChiSq, LRT, Rao, F")
				}
				for (j in 1:length(dataObject$genes)) {
					PvaluesForSignificanceOfFullModel[j] = anova(glm(as.formula(paste("expression[j, ] ~ ", stringFormula.full)), family = family, weights = weights, offset = offset), glm(as.formula(paste("expression[j, ] ~ ", stringFormula.reduced)), weights = weights, offset = offset), test = test)[[SignificanceReport]][2]
				}
			}
		} else { # use linear model
			for (j in 1:length(dataObject$genes)) {	
				PvaluesForSignificanceOfFullModel[j] = anova(lm(as.formula(paste("expression[j,] ~ ", stringFormula.full)), weights = weights, offset = offset), lm(as.formula(paste("expression[j,] ~ ", stringFormula.reduced)), weights = weights, offset = offset))$"Pr(>F)"[2]	
			}
		}
		DifferentiallyExpressed = data.frame(gene = dataObject$genes, p.val = PvaluesForSignificanceOfFullModel)
	}
	
	## Apply p-value cut-off to determine significance
	## of differential expression level and find DE genes.
	AdjustedP = p.adjust(DifferentiallyExpressed$p.val, method = pValueAdjustment)
	DifferentiallyExpressed$p.val.Adjusted = AdjustedP
	DifferentiallyExpressed$Significant = (AdjustedP < significanceLevel)
	DEgenes = data.frame(gene = DifferentiallyExpressed$gene[DifferentiallyExpressed$Significant], p.val.Adjusted = AdjustedP[DifferentiallyExpressed$Significant])
	
	## order DEgenes in terms of increasing p-value
	## so that most differentially expressed genes appear at the top
	DEgenes = DEgenes[order(DEgenes$p.val.Adjusted), ]
	row.names(DEgenes) = NULL;
	
	## keep track of the model's form:
	modelForm.fullModel = list(Indicator.full, quantitativeCovariateList)
	if (length(stringFormula.reduced) > 0) {
		modelForm.reducedModel = list(Indicator.reduced, quantitativeCovariateList.reduced)
	} else {
		modelForm.reducedModel = NULL
	}
	## keep track of whether model is generalized linear model:
	GLMinfo = list(generalizedLM, family, NegativeBinomialUnknownDispersion)
	
	## store results in class DEresults:
	results = list(allgenes = DifferentiallyExpressed, DEgenes = DEgenes, PredictorFormula.fullModel = stringFormula.full, PredictorFormula.reducedModel = stringFormula.reduced, modelForm.fullModel = modelForm.fullModel, modelForm.reducedModel = modelForm.reducedModel, GLMinfo = GLMinfo)
	class(results) = "DEresults"
	return(results)
}
Any scripts or data that you put into this service are public.
plmDE documentation built on May 29, 2017, 6:37 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
plmDE
Additive partially linear models for differential gene expression analysis

R/fitGAPLM.R
In plmDE: Additive partially linear models for differential gene expression analysis

Defines functions fitGAPLM

Documented in fitGAPLM

Try the plmDE package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

plmDE Additive partially linear models for differential gene expression analysis

R/fitGAPLM.R In plmDE: Additive partially linear models for differential gene expression analysis

Defines functions fitGAPLM

Documented in fitGAPLM

Try the plmDE package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

plmDE
Additive partially linear models for differential gene expression analysis

R/fitGAPLM.R
In plmDE: Additive partially linear models for differential gene expression analysis