R/ABneutralSOMA.R

Defines functions ABneutralSOMA

Documented in ABneutralSOMA

#' Model with no selection (outneutral)
#'
#' This model assumes that somatically heritable gains and losses in cytosine methylation are selectively neutral.
#'
#' @param pedigree.data pedigree data.
#' @param p0uu initial proportion of unmethylated cytosines.
#' @param eqp equilibrium proportion of unmethylated cytosines.
#' @param eqp.weight weight assigned to equilibrium function.
#' @param Nstarts iterations for non linear LSQ optimization.
#' @param out.dir output directory.
#' @param out.name output file name.
#' @import optimx
#' @import expm
#' @importFrom stats runif
#' @return ABneutralSoma RData file.
#' @export
#' @examples
#' #Get some toy data
#' inFile <- readRDS(system.file("extdata/soma/","outputSoma.rds", package="AlphaBeta"))
#' pedigree <- inFile$Pdata
#' p0uu_in <- inFile$tmpp0
#' eqp.weight <- 0.001
#' Nstarts <- 2
#' out.name <- "ABneutralSOMA_CG_estimates"
#' out <- ABneutralSOMA(pedigree.data = pedigree,
#'                   p0uu=p0uu_in,
#'                   eqp=p0uu_in,
#'                   eqp.weight=eqp.weight,
#'                   Nstarts=Nstarts,
#'                   out.dir=getwd(),
#'                   out.name=out.name)
#'
#' summary(out)
#'




ABneutralSOMA<-function(pedigree.data, p0uu, eqp, eqp.weight, Nstarts, out.dir, out.name)
{

 allow.neg.intercept="no"

##### Defining the divergence function
	divergence <- function(pedigree, p0mm, p0um, p0uu, param)
	{

	  ## Initializing parameters
	  PrMM <- p0mm
	  PrUM <- p0um
	  PrUU <- p0uu
	  alpha <- param[1]
    bet <- param[2]
    weight <- param[3]


	## State probabilities at G0; first element = PrUU, second element = PrUM, third element = PrMM
	  svGzero   <- c(PrUU, (weight)*PrMM, (1-weight)*PrMM)



	## Defining the generation (or transition) matrix for the mitotic case
	  Genmatrix <- matrix(c((1-alpha)^2, 2*(1-alpha)*alpha,alpha^2,
	                        bet*(1-alpha), (1-alpha)*(1-bet)+alpha*bet, alpha*(1-bet),
							bet^2, 2*(1-bet)*bet, (1-bet)^2),nrow=3, byrow=TRUE)


	## Calculating theoretical divergence for every observed pair in 'pedigree.txt'
	  Dt1t2<-NULL

		  for (p in seq_len(NROW(pedigree)))
		  {

			## Define state vectors for t1,t2 and t0 from pedigree using matrix multiplications from library(expm)
			svt0      <- t(svGzero)  %*% ((Genmatrix)%^% as.numeric(pedigree[p,1]))
			svt1.MM   <- t(c(0,0,1)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,2] - pedigree[p,1]))
			svt2.MM   <- t(c(0,0,1)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,3] - pedigree[p,1]))
			svt1.UM   <- t(c(0,1,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,2] - pedigree[p,1]))
			svt2.UM   <- t(c(0,1,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,3] - pedigree[p,1]))
			svt1.UU   <- t(c(1,0,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,2] - pedigree[p,1]))
			svt2.UU   <- t(c(1,0,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,3] - pedigree[p,1]))

			## Conditional divergences
			dt1t2.MM  <- 1/2*(svt1.MM[,1] * svt2.MM[,2] + svt1.MM[,2] * svt2.MM[,1] + svt1.MM[,2] * svt2.MM[,3] +
								svt1.MM[,3] * svt2.MM[,2]) + 1*(svt1.MM[,1] * svt2.MM[,3]  + svt1.MM[,3] * svt2.MM[,1])

			dt1t2.UM  <- 1/2*(svt1.UM[,1] * svt2.UM[,2] + svt1.UM[,2] * svt2.UM[,1] + svt1.UM[,2] * svt2.UM[,3] +
								svt1.UM[,3] * svt2.UM[,2]) + 1*(svt1.UM[,1] * svt2.UM[,3] +  svt1.UM[,3] * svt2.UM[,1])

			dt1t2.UU  <- 1/2*(svt1.UU[,1] * svt2.UU[,2] + svt1.UU[,2] * svt2.UU[,1] + svt1.UU[,2] * svt2.UU[,3] +
								svt1.UU[,3] * svt2.UU[,2]) + 1*(svt1.UU[,1] * svt2.UU[,3] + svt1.UU[,3] * svt2.UU[,1])

			## Total (weighted) divergence
			Dt1t2[p]<- svt0[,1]*dt1t2.UU + svt0[,2]*dt1t2.UM + svt0[,3]*dt1t2.MM


		  }

	  # Pr(UU) at equilibrium given alpha and beta
	  puuinf.est<-(bet^2)/((alpha+bet)^2)
	  divout<-list(puuinf.est, Dt1t2)

	  return(divout)

	}


###### Defining the Least Square function to be minimized
###### Note the equilibrium constraint, which can be made as small as desired.

		LSE_intercept<-function(param_int)
		{
			sum((pedigree[,4] - param_int[4] - divergence(pedigree, p0mm, p0um, p0uu, param_int[1:3])[[2]])^2) +
			eqp.weight*nrow(pedigree)*((divergence(pedigree, p0mm, p0um, p0uu, param_int[1:3])[[1]]-eqp)^2)
		}



###### Calculating the initial proportions
###### We always assume that:
		# 1. p0mm is larger than actually observed. This means if p0um is available from measurements,
		#    we will just add it to p0mm.
		# 2. As a consequence of (1.) we also assume that p0um = 0.

		p0uu<-p0uu
		p0mm<-1-p0uu
		p0um<-0


   if(is.null(p0mm ==TRUE | is.null(eqp)==TRUE))
   {stop("Both eqp value AND p0mm have to be supplied")}

   if(sum(c(p0mm, p0um, p0uu), na.rm =TRUE) != 1)
  {stop("The initial state probabilities don't sum to 1")}




##### Initializing
	optim.method<-"Nelder-Mead"
	final<-NULL
	counter<-0
	opt.out<-NULL
	pedigree<-pedigree.data


		for (s in seq_len(Nstarts))
		{

			## Draw random starting values
			alpha.start  <-10^(runif(1, log10(10^-9), log10(10^-2)))
			beta.start   <-10^(runif(1, log10(10^-9), log10(10^-2)))
	    weight.start <-runif(1,0,0.5)
	    intercept.start <-runif(1,0,max(pedigree[,4]))
			param_int0 = c(alpha.start, beta.start, weight.start, intercept.start)

			## Initializing
			counter<-counter+1

			message("Progress: ", counter/Nstarts, "\n")


						opt.out  <- suppressWarnings(optimx(par = param_int0, fn = LSE_intercept, method=optim.method))
						alphafinal<-opt.out[1]
						betfinal<-opt.out[2]
						PrMMinf <- (alphafinal^2)/((alphafinal+betfinal)^2)
						PrUMinf <- (2*alphafinal*betfinal)/((alphafinal+betfinal)^2)
						PrUUinf <- (betfinal^2)/((alphafinal+betfinal)^2)
						opt.out <-cbind(opt.out, PrMMinf, PrUMinf, PrUUinf, alpha.start, beta.start, weight.start, intercept.start)
						final[[s]] <- opt.out


		} # End of Nstarts loop
    final <- do.call("rbind", final)
    colnames(final)[1:4]<-c("alpha", "beta", "weight", "intercept")
    colnames(final)[13:15]<-c("PrMMinf", "PrUMinf", "PrUUinf")




##### Calculating the least square of the first part of the minimized function
	 lsqpart<-NULL

	 for (l in seq_len(NROW(final)))
	 {
			  PrMM <- p0mm
			  PrUM <- p0um
	      PrUU <- p0uu
			  alpha  <- final[l, "alpha"]
			  bet    <- final[l, "beta"]
			  weight <- final[l, "weight"]
			  intercept<-final[l,"intercept"]


			## State probabilities at G0; first element = PrUU, second element = PrUM, third element = PrMM
			  svGzero   <- c(PrUU, (weight)*PrMM, (1-weight)*PrMM)


			  ## Defining the generation (or transition) matrix for the mitotic case
				Genmatrix <- matrix(c((1-alpha)^2, 2*(1-alpha)*alpha,alpha^2,
	                        bet*(1-alpha), (1-alpha)*(1-bet)+alpha*bet, alpha*(1-bet),
							bet^2, 2*(1-bet)*bet, (1-bet)^2),nrow=3, byrow=TRUE)

			  ## Calculating theoretical divergence for every observed pair in 'pedigree.txt'
			  Dt1t2<-NULL

				  for (p in seq_len(NROW(pedigree)))
				  {

					## Define state vectors for t1,t2 and t0 from pedigree using matrix multiplications from library(expm)
					svt0      <- t(svGzero)  %*% ((Genmatrix)%^% as.numeric(pedigree[p,1]))
					svt1.MM   <- t(c(0,0,1)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,2] - pedigree[p,1]))
					svt2.MM   <- t(c(0,0,1)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,3] - pedigree[p,1]))
					svt1.UM   <- t(c(0,1,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,2] - pedigree[p,1]))
					svt2.UM   <- t(c(0,1,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,3] - pedigree[p,1]))
					svt1.UU   <- t(c(1,0,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,2] - pedigree[p,1]))
					svt2.UU   <- t(c(1,0,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,3] - pedigree[p,1]))

					## Conditional divergences
					dt1t2.MM  <- 1/2*(svt1.MM[,1] * svt2.MM[,2] + svt1.MM[,2] * svt2.MM[,1] + svt1.MM[,2] * svt2.MM[,3] +
										svt1.MM[,3] * svt2.MM[,2]) + 1*(svt1.MM[,1] * svt2.MM[,3]  + svt1.MM[,3] * svt2.MM[,1])

					dt1t2.UM  <- 1/2*(svt1.UM[,1] * svt2.UM[,2] + svt1.UM[,2] * svt2.UM[,1] + svt1.UM[,2] * svt2.UM[,3] +
										svt1.UM[,3] * svt2.UM[,2]) + 1*(svt1.UM[,1] * svt2.UM[,3] +  svt1.UM[,3] * svt2.UM[,1])

					dt1t2.UU  <- 1/2*(svt1.UU[,1] * svt2.UU[,2] + svt1.UU[,2] * svt2.UU[,1] + svt1.UU[,2] * svt2.UU[,3] +
										svt1.UU[,3] * svt2.UU[,2]) + 1*(svt1.UU[,1] * svt2.UU[,3] + svt1.UU[,3] * svt2.UU[,1])

					## Total (weighted) divergence
					Dt1t2[p]<- svt0[,1]*dt1t2.UU + svt0[,2]*dt1t2.UM + svt0[,3]*dt1t2.MM


				  }


			 ## Calculating the least square part
			 lsqpart[l]<-sum((pedigree[,4] - intercept - Dt1t2)^2)
		}

	 final<-cbind(final, lsqpart)
	 colnames(final)[ncol(final)]<-c("value.part")
	 final<-final[order(final[,"value"]),]

	  if (allow.neg.intercept == "yes")
	  { index.1<-which(final["alpha"] > 0 & final["beta"] > 0 & final["convcode"] == 0)}

	  if (allow.neg.intercept == "no")
	  {index.1<-which(final["alpha"] > 0 & final["beta"] > 0 & final["intercept"] > 0 & final["convcode"] == 0)}


	 #index.1<-which(final["alpha"] > 0 & final["beta"] > 0 & final["intercept"] > 0)
	 index.2<-setdiff(seq_len(NROW(final)), index.1)
	 final.1<-final[index.1,]
	 final.2<-final[index.2,]



##### Calculting the predicted values based on the 'best' model (i.e. that with the lowest least square)
	 PrMM <- p0mm
	 PrUM <- p0um
	 PrUU <- p0uu
	 alpha  <- final.1[1, "alpha"]
	 bet    <- final.1[1, "beta"]
	 weight <- final.1[1, "weight"]
	 intercept<-final.1[1,"intercept"]


			 ## State probabilities at G0; first element = PrUU, second element = PrUM, third element = PrMM
			  svGzero   <- c(PrUU, (weight)*PrMM, (1-weight)*PrMM)



			  ## Defining the generation (or transition) matrix for the mitotic case
				Genmatrix <- matrix(c((1-alpha)^2, 2*(1-alpha)*alpha,alpha^2,
	                        bet*(1-alpha), (1-alpha)*(1-bet)+alpha*bet, alpha*(1-bet),
							bet^2, 2*(1-bet)*bet, (1-bet)^2),nrow=3, byrow=TRUE)

			  ## Calculating theoretical divergence for every observed pair in 'pedigree.txt'
			  Dt1t2<-NULL
			  Residual<-NULL

				  for (p in seq_len(NROW(pedigree)))
				  {

					## Define state vectors for t1,t2 and t0 from pedigree using matrix multiplications from library(expm)
					svt0      <- t(svGzero)  %*% ((Genmatrix)%^% as.numeric(pedigree[p,1]))
					svt1.MM   <- t(c(0,0,1)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,2] - pedigree[p,1]))
					svt2.MM   <- t(c(0,0,1)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,3] - pedigree[p,1]))
					svt1.UM   <- t(c(0,1,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,2] - pedigree[p,1]))
					svt2.UM   <- t(c(0,1,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,3] - pedigree[p,1]))
					svt1.UU   <- t(c(1,0,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,2] - pedigree[p,1]))
					svt2.UU   <- t(c(1,0,0)) %*% ((Genmatrix)%^% as.numeric(pedigree[p,3] - pedigree[p,1]))

					## Conditional divergences
					dt1t2.MM  <- 1/2*(svt1.MM[,1] * svt2.MM[,2] + svt1.MM[,2] * svt2.MM[,1] + svt1.MM[,2] * svt2.MM[,3] +
										svt1.MM[,3] * svt2.MM[,2]) + 1*(svt1.MM[,1] * svt2.MM[,3]  + svt1.MM[,3] * svt2.MM[,1])

					dt1t2.UM  <- 1/2*(svt1.UM[,1] * svt2.UM[,2] + svt1.UM[,2] * svt2.UM[,1] + svt1.UM[,2] * svt2.UM[,3] +
										svt1.UM[,3] * svt2.UM[,2]) + 1*(svt1.UM[,1] * svt2.UM[,3] +  svt1.UM[,3] * svt2.UM[,1])

					dt1t2.UU  <- 1/2*(svt1.UU[,1] * svt2.UU[,2] + svt1.UU[,2] * svt2.UU[,1] + svt1.UU[,2] * svt2.UU[,3] +
										svt1.UU[,3] * svt2.UU[,2]) + 1*(svt1.UU[,1] * svt2.UU[,3] + svt1.UU[,3] * svt2.UU[,1])

					## Total (weighted) divergence
					Dt1t2[p]<- svt0[,1]*dt1t2.UU + svt0[,2]*dt1t2.UM + svt0[,3]*dt1t2.MM

				  }

			 ## Calculating the least square part
			 Residual<-(pedigree[,4] - intercept - Dt1t2)



##### Augmenting pedigree
	delta.t<-pedigree[,2] + pedigree[,3] - 2*pedigree[,1]
	#pedigree<-cbind(pedigree,delta.t)
	pedigree<-cbind(pedigree, delta.t, Dt1t2 + intercept, Residual)
	colnames(pedigree)[c(4,5,6,7)]<-c("div.obs", "delta.t","div.pred", "residual")


##### Making info about settings
		info<-c("p0mm", "p0um", "p0uu", "eqp", "eqp.weight", "Nstarts", "optim.method")
		info2<-c(p0mm, p0um, p0uu, eqp, eqp.weight, Nstarts, optim.method)
		info.out<-data.frame(info, info2)
		colnames(info.out)<-c("Para", "Setting")






###### Generating theoretical fit

			## Reading in pedigree
			obs<-pedigree[,"div.obs"]
			dtime<-pedigree[,"delta.t"]

			## Reading in parameter estimates
			est <-final.1
			alpha <-as.numeric(est[1,1])
	    beta<-as.numeric(est[1,2])
		  weight<-as.numeric(est[1,3])
	    intercept<-as.numeric(est[1,4])

			## Reading initial state vector
			settings<-info.out
			PrMM<-p0mm<-as.numeric(as.character(settings[1,2]))
			PrUM<-p0um<-as.numeric(as.character(settings[2,2]))
			PrUU<-p0uu<-as.numeric(as.character(settings[3,2]))
			time1<- seq(1,max(c(pedigree[,2], pedigree[,3])))
			time2<- seq(1,max(c(pedigree[,2], pedigree[,3])))
			time.out<-expand.grid(time1,time2)
			#time0<- rep(min(pedigree[,1]), nrow(time.out))
			time0<- rep(0, nrow(time.out))
			pedigree.new<-as.matrix(cbind(time0,time.out))
			pedigree.new<-cbind(pedigree.new, c(pedigree.new[,2] + pedigree.new[,3] - 2*pedigree.new[,1]))
			pedigree.new<-pedigree.new[!duplicated(pedigree.new[,4]), ]
			pedigree.new<-pedigree.new[,1:3]

			## State probabilities at G0; first element = PrUU, second element = PrUM, third element = PrMM
			  svGzero   <- c(PrUU, weight*PrMM, (1-weight)*PrMM)

							alphafinal<-alpha
							betfinal<-beta
							interceptfinal<-intercept

							## Defining the generation (or transition) matrix for the mitotic case
							Genmatrix <- matrix(c((1-alpha)^2, 2*(1-alpha)*alpha,alpha^2, bet*(1-alpha), (1-alpha)*(1-bet)+alpha*bet, alpha*(1-bet),
							bet^2, 2*(1-bet)*bet, (1-bet)^2),nrow=3, byrow=TRUE)

							## Calculating theoretical divergence for every observed pair in 'pedigree.txt'
							Dt1t2<-NULL

								for (p in seq_len(NROW(pedigree.new)))
								{

									## Define state vectors for t1,t2 and t0 from pedigree using matrix multiplications from library(expm)
									svt0      <- t(svGzero)  %*% ((Genmatrix)%^% as.numeric(pedigree.new[p,1]))
									svt1.MM   <- t(c(0,0,1)) %*% ((Genmatrix)%^% as.numeric(pedigree.new[p,2] - pedigree.new[p,1]))
									svt2.MM   <- t(c(0,0,1)) %*% ((Genmatrix)%^% as.numeric(pedigree.new[p,3] - pedigree.new[p,1]))
									svt1.UM   <- t(c(0,1,0)) %*% ((Genmatrix)%^% as.numeric(pedigree.new[p,2] - pedigree.new[p,1]))
									svt2.UM   <- t(c(0,1,0)) %*% ((Genmatrix)%^% as.numeric(pedigree.new[p,3] - pedigree.new[p,1]))
									svt1.UU   <- t(c(1,0,0)) %*% ((Genmatrix)%^% as.numeric(pedigree.new[p,2] - pedigree.new[p,1]))
									svt2.UU   <- t(c(1,0,0)) %*% ((Genmatrix)%^% as.numeric(pedigree.new[p,3] - pedigree.new[p,1]))

									## Conditional divergences
									dt1t2.MM  <- 1/2*(svt1.MM[,1] * svt2.MM[,2] + svt1.MM[,2] * svt2.MM[,1] + svt1.MM[,2] * svt2.MM[,3] +
												 svt1.MM[,3] * svt2.MM[,2]) + 1*(svt1.MM[,1] * svt2.MM[,3]  + svt1.MM[,3] * svt2.MM[,1])

									dt1t2.UM  <- 1/2*(svt1.UM[,1] * svt2.UM[,2] + svt1.UM[,2] * svt2.UM[,1] + svt1.UM[,2] * svt2.UM[,3] +
									             svt1.UM[,3] * svt2.UM[,2]) + 1*(svt1.UM[,1] * svt2.UM[,3] +  svt1.UM[,3] * svt2.UM[,1])

									dt1t2.UU  <- 1/2*(svt1.UU[,1] * svt2.UU[,2] + svt1.UU[,2] * svt2.UU[,1] + svt1.UU[,2] * svt2.UU[,3] +
									             svt1.UU[,3] * svt2.UU[,2]) + 1*(svt1.UU[,1] * svt2.UU[,3] + svt1.UU[,3] * svt2.UU[,1])

									## Total (weighted) divergence
									Dt1t2[p]<- svt0[,1]*dt1t2.UU + svt0[,2]*dt1t2.UM + svt0[,3]*dt1t2.MM

								}

			pedigree.new<-cbind(pedigree.new, Dt1t2+interceptfinal, c(pedigree.new[,2] + pedigree.new[,3] - 2*pedigree.new[,1]))
			colnames(pedigree.new)<-c("time0", "time1", "time2", "div.sim", "delta.t")
			pedigree.new<-pedigree.new[order(pedigree.new[,5]),]


	model<-"ABneutralSOMA.R"

	abfree.out<-list(final.1, final.2, pedigree, info.out, model, pedigree.new)
	names(abfree.out)<-c("estimates", "estimates.flagged", "pedigree", "settings", "model", "for.fit.plot")



	## Ouputting result datasets
	dput(abfree.out, paste0(out.dir,"/", out.name, ".Rdata", sep=""))
	return(abfree.out)


} #End of function
jlab-code/AlphaBeta documentation built on April 23, 2022, 11:02 a.m.