vecData = function(data,M,N,B,ref,group,ngroup,pattern,rare){
z = NULL
melted <- data.table::melt(data[,unique(c(paste0('ChIP.',1:N),rep('Dsg.Int',N),paste0('offset.',1:N),rep('PostProb1',N),rep('PostProb2',N),rep('PostProb3',N),rep('z',N))),with=F],
measure = list(paste0('ChIP.',1:N),rep('Dsg.Int',N),paste0('offset.',1:N),rep('PostProb1',N),rep('PostProb2',N),rep('PostProb3',N),rep('z',N)),
value.name = c('ChIP','Dsg.Int','offset','PostProb1','PostProb2','PostProb3','z'))[,-1]
if(is.list(pattern)){
model <- lapply(unlist(lapply(pattern,FUN = function(x){
aux <- rep(F,ngroup);aux[x]<-T
which(apply(as.matrix(ref[,1:ngroup])==1,1,FUN = function(x){all(x==aux)}))
})),FUN = function(x){cbind(rep(t(ref[x,1:ngroup]),times=M*table(group)))})
} else{
if(pattern=='all'){
model <- lapply(1+1:B,FUN = function(x) cbind(rep(t(ref[x,1:ngroup]),times=M*table(group))))
} else if(pattern=='cluster'){
propdiff <- which(data[,table(z)/.N]>rare)
model <- lapply(as.numeric(names(propdiff[!names(propdiff)%in%range(data$z)])),
FUN = function(x) cbind(rep(t(ref[x,1:ngroup]),times=M*table(group))))
}
}
return(melted[,paste0('Dsg.Mix',1:B) := model])
}
HMM.enumerate = function(chain){
Window = NULL
ref = NULL
ref[paste0('ChIP',1:ncol(chain))] = list(NULL)
for(i in names(ref)){ref[[i]] = c(0,1)}
ref = as.data.table(expand.grid(ref))
ref = ref[order(rowSums(ref)),]
ref$Group = 1:nrow(ref)
N = ncol(chain)
data.table::setnames(chain,paste0('ChIP',1:N))
#colnames(chain) = paste0('ChIP',1:N)
chain[,Window := (1:nrow(chain))]
chain = merge(chain,ref,by=paste0('ChIP',1:N),all.x=T)
return(list('Group'=chain[order(Window)][,c('Group'),with=F],'Ref'=ref))
}
agg = function(data,data.unique,rows,agg){
# data <- data.table::copy(data)
# setkey(data,Group)
# data.unique <- data.table::copy(data.unique)
# Filtering rows and aggregating
# This will give me the sum of weights per each Group (possible combination of ChIP, Control, etc.)
# data <- data[eval(parse(text=rows)),][,list(weights=sum(get(agg))),by=Group]
# Bringing the variables from data.unique into the aggregated data
# This will bring the variables back to the data
# return(data.unique[data,on='Group'])
Group = NULL
return(data.unique[data[eval(parse(text=rows)),][,list(weights=sum(get(agg))),by=Group],on='Group'])
}
glm.nb = function(par,Y.vec,X.mat,offset.vec,weights.vec){
l = stats::dnbinom(Y.vec,mu=exp(X.mat%*%par[1:ncol(X.mat)]+offset.vec),size=1/par[length(par)],log=T);l[is.infinite(l)] = log(1e-300)
return(-sum(weights.vec*l))
}
deriv.nb = function(par,Y.vec,X.mat,offset.vec,weights.vec){
mu.vec = exp(X.mat%*%par[1:ncol(X.mat)]+offset.vec)
phi = par[length(par)]
return(-c(colSums(as.numeric(weights.vec)*as.numeric((Y.vec-mu.vec)/(1+phi*mu.vec))*X.mat),sum(as.numeric(weights.vec)*(log(1+phi*mu.vec)+phi*(Y.vec-mu.vec)/(1+phi*mu.vec)-digamma(Y.vec+1/phi)+digamma(1/phi))/(phi^2))))
}
inv.par = function(par,model){
if(model=='nb'){return(c(par[1:(length(par)-1)],1/par[length(par)]))}
if(model=='zinb'){
n = length(par)
return(c(par[1:((n-1)/2)],1/par[(n-1)/2+1],par[((n-1)/2+2):n]))}
}
glm.nb2 = function(par,Y.vec,X.mat,offset.vec,weights.vec){
# This function is the -loglikelihood of a NB model with group-specific dispersions
l = stats::dnbinom(Y.vec,mu=exp(X.mat%*%par[1:ncol(X.mat)]+offset.vec),size=1/exp(X.mat%*%par[(ncol(X.mat)+1):(2*ncol(X.mat))]),log=T);l[is.infinite(l)] = log(1e-300)
return(-sum(weights.vec*l))
}
HMM.chain = function(z,K){
if(max(z)>(K-1)){z=round((K-1)*(z-max(z))/(max(z)-min(z))+(K-1))}
MC = matrix(z,nrow = 1,ncol=length(z))
MC = table(c(MC[,-ncol(MC)]),c(MC[,-1]))
MC = as.matrix(MC/rowSums(MC))
MC = matrix(MC,ncol=ncol(MC),nrow=nrow(MC),byrow=F)
MC = check.prob(MC)
return(MC)
}
check.prob = function(P,min.zero=.Machine$double.xmin){
P = pmax(pmin(P,1),0)
if(sum(P==0)>0){P[P==0] = min.zero}
return(P/rowSums(P))
}
HMM.mean = function(X.mat,offset.vec,psi,N,M,idx,min.zero=min.zero){
K=length(psi)
mu = matrix(0,nrow=M,ncol=N*K)
for(k in 1:K){
mu[,k+0:(N-1)*K] = matrix(exp(X.mat[[idx[k]]]%*%psi[[k]]+offset.vec),nrow=M,ncol=N,byrow=F)
}
if(sum(mu==0)>0){mu[mu==0] = min.zero}
return(mu)
}
HMM.LLmix2 = function(Y.vec,X.mat,mu,delta,disp,dispmix,N,M,zeroinfl=NULL,background='nb',min.zero=.Machine$double.xmin){
if(background=='nb'){
# This function implements the HMM loglikelihood using a NB with group-specific dispersions
B=length(delta);Kmu=ncol(mu)/N;K=3
LL = matrix(0,nrow=M,ncol=K)
LLaux = matrix(0,nrow=M,ncol=B)
#LL from Background
LL[,1] = rowSums(matrix(stats::dnbinom(Y.vec,mu=c(mu[,1+0:(N-1)*Kmu]),size=rep(disp[1],N*M),log=T),nrow=M,ncol=N,byrow=F))
#LL from Enrichment
LL[,K] = rowSums(matrix(stats::dnbinom(Y.vec,mu=c(mu[,Kmu+0:(N-1)*Kmu]),size=rep(disp[2],N*M),log=T),nrow=M,ncol=N,byrow=F))
#LL from Mixture
for(b in 2:(Kmu-1)){
LLaux[,(b-1)] = log(delta[(b-1)])+rowSums(matrix(stats::dnbinom(Y.vec,mu=c(mu[,b+0:(N-1)*Kmu]),
size=1/exp(X.mat%*%matrix(dispmix[(b-1),1:(ncol(X.mat))],ncol=1)),log=T),nrow=M,ncol=N,byrow=F))
}
LL[,2] = log(rowSums(exp(LLaux)))
LL[is.infinite(LL)] = log(min.zero)
return(LL)
}
if(background=='zinb'){
# This function implements the HMM loglikelihood using a NB with group-specific dispersions and ZINB in background component
B=length(delta);Kmu=ncol(mu)/N;K=3
Yvec0 = 1*(Y.vec==0)
Zvec = c(zeroinfl)
LL = matrix(0,nrow=M,ncol=K)
LLaux = matrix(0,nrow=M,ncol=B)
#LL from Background
LogLik00 = stats::dnbinom(0,mu=c(mu[,1+0:(N-1)*Kmu]),size=rep(disp[1],N*M),log=T);LogLik00[is.infinite(LogLik00)] = log(1e-300)
LogLik01 = stats::dnbinom(Y.vec,mu=c(mu[,1+0:(N-1)*Kmu]),size=rep(disp[1],N*M),log=T);LogLik01[is.infinite(LogLik01)] = log(1e-300)
LL[,1] = rowSums(matrix((Yvec0)*log(Zvec + exp(log(1-Zvec)+LogLik00)) + (1-Yvec0)*(log(1-Zvec) + LogLik01),nrow=M,ncol=N,byrow=F))
LL[is.infinite(LL[,1]),1] = log(1e-300)
#LL from Enrichment
LL[,K] = rowSums(matrix(stats::dnbinom(Y.vec,mu=c(mu[,Kmu+0:(N-1)*Kmu]),size=rep(disp[2],N*M),log=T),nrow=M,ncol=N,byrow=F))
#LL from Mixture
for(b in 2:(Kmu-1)){
LLaux[,(b-1)] = log(delta[(b-1)])+rowSums(matrix(stats::dnbinom(Y.vec,mu=c(mu[,b+0:(N-1)*Kmu]),
size=1/exp(X.mat%*%matrix(dispmix[(b-1),1:(ncol(X.mat))],ncol=1)),log=T),nrow=M,ncol=N,byrow=F))
}
LL[,2] = log(rowSums(exp(LLaux)))
LL[is.infinite(LL)] = log(min.zero)
return(LL)
}
}
HMM.prob = function(dt){
PostProb1 = PostProb2 = PostProb3 = NULL
JoinProb11 = JoinProb12 = JoinProb13 = NULL
JoinProb21 = JoinProb22 = JoinProb23 = NULL
JoinProb31 = JoinProb32 = JoinProb33 = NULL
pi <- dt[1,c(PostProb1,PostProb2,PostProb3)]
gamma <- unname(check.prob(as.matrix(t(dt[,list(c(sum(JoinProb11,na.rm=T),sum(JoinProb12,na.rm=T),sum(JoinProb13,na.rm=T))/sum(JoinProb11+JoinProb12+JoinProb13,na.rm=T),
c(sum(JoinProb21,na.rm=T),sum(JoinProb22,na.rm=T),sum(JoinProb23,na.rm=T))/sum(JoinProb21+JoinProb22+JoinProb23,na.rm=T),
c(sum(JoinProb31,na.rm=T),sum(JoinProb32,na.rm=T),sum(JoinProb33,na.rm=T))/sum(JoinProb31+JoinProb32+JoinProb33,na.rm=T))]))))
return(list('pi'=pi,'gamma'=gamma))
}
HMM.mixprob2 = function(Y.vec,X.mat,offset.vec,delta,psi,N,M,min.zero=.Machine$double.xmin){
# This function implements the mixture distrubutions using a NB with group-specific dispersions
# it gives the posterior probabilities of the mixing probabilities for window j=1,...,M (row) and mixture b=1,...,B (column)
B = length(delta);
#rep = rep(1:N,each=M)
eta = matrix(0,nrow=M,ncol=B)
lf = list()
for(b in 1:B){
lf[[b]] = log(delta[b])+rowSums(matrix(stats::dnbinom(Y.vec,mu=exp(X.mat%*%matrix(psi[b,1:ncol(X.mat)],ncol=1)+offset.vec),
size=1/exp(X.mat%*%matrix(psi[b,(ncol(X.mat)+1):(2*ncol(X.mat))],ncol=1)),log=T),ncol=N,byrow=F))
}
for(b in 1:B){
eta[,b] = exp(lf[[b]])/rowSums(do.call(cbind,lapply(lf,exp)))
}
if(sum(eta==0)>0){eta[eta==0] = min.zero}
return(check.prob(eta))
}
Q = function(P1,P2,loglik,pi,gamma){
return(sum(P1[1,]*log(pi))+sum(colSums(P1*loglik))+sum(colSums(P2[2:nrow(loglik),]%*%diag(log(c(t(gamma)))))))
}
# glm.nb2_diffConstr = function(par,dt){
# # This function is the -loglikelihood of the differential component that implements a mixture model with NB distributions
# # NB distributions from the mixture model share the same set of parameters for both mean and dispersion models.
# # Each NB distribution represents one out of all possible combinations of differential patterns
# # exp(Int) is the mean (dispersion) of the group with background counts in that particular NB distribution
# # exp(Int + Slope) is the mean (dispersion) of the group with enrichment counts in that particular NB distribution
# Dsg.Mix = weights = ChIP = Dsg.Int = offset = id = NULL
#
# return(sum(unlist(lapply(seq_len(max(dt[,.id])),FUN = function(x){
# subdt <- dt[(.id == x),]
# subdt[,Dsg.Mix := .SD,.SDcols = paste0('Dsg.Mix',x)]
# -sum(subdt[,weights*stats::dnbinom(ChIP,mu = exp(par[1]*Dsg.Int+par[2]*Dsg.Mix+offset),size = exp(par[3]*Dsg.Int+par[4]*Dsg.Mix),log = T)])
# }))))
# }
# deriv.nb2_diffConstr = function(par,dt){
# # This function is the -loglikelihood of the differential component that implements a mixture model with NB distributions
# # NB distributions from the mixture model share the same set of parameters for both mean and dispersion models.
# # Each NB distribution represents one out of all possible combinations of differential patterns
# # exp(Int) is the mean (dispersion) of the group with background counts in that particular NB distribution
# # exp(Int + Slope) is the mean (dispersion) of the group with enrichment counts in that particular NB distribution
# Dsg.Mix = Dsg.Int = offset = weights = ChIP = mu = phi = id = NULL
#
# return(colSums(do.call(rbind,lapply(seq_len(max(dt[,.id])),FUN = function(x){
# subdt <- dt[(.id == x),]
# subdt[,Dsg.Mix := .SD,.SDcols = paste0('Dsg.Mix',x)]
# subdt[,c('mu','phi') := list(exp(par[1]*Dsg.Int+par[2]*Dsg.Mix+offset),exp(par[3]*Dsg.Int+par[4]*Dsg.Mix))]
#
# -colSums(cbind(subdt[,weights*(ChIP/mu-((ChIP+phi)/(mu+phi)))*mu*cbind(Dsg.Int,Dsg.Mix)],
# subdt[,weights*(digamma(ChIP+phi)-digamma(phi)+1+log(phi)-(ChIP+phi)/(mu+phi)-log(mu+phi))*phi*cbind(Dsg.Int,Dsg.Mix)]))
# }))))
# }
HMM.mean_diffConstr = function(dsg,psi,idx,offset.vec,N,M,min.zero=min.zero){
#(X.mat,offset.vec,psi,N,M,idx,min.zero=min.zero)
K = length(dsg)
mu = matrix(0,nrow=M,ncol=N*K)
for(k in 1:K){
mu[,k+0:(N-1)*K] = matrix(exp(as.matrix(dsg[[k]])%*%psi[[idx[k]]]+offset.vec),nrow=M,ncol=N,byrow=F)
}
if(sum(mu==0)>0){mu[mu==0] = min.zero}
return(mu)
}
HMM.LLmix2_diffConstr = function(Y.vec,dsg,mu,delta,disp,N,M,min.zero=.Machine$double.xmin){
# (Y.vec,X.mat,mu,delta,disp,dispmix,N,M,min.zero=.Machine$double.xmin)
# This function implements the HMM loglikelihood using NB distributions in the mixture component
# that share a unique set of parameters
B=length(delta);Kmu=ncol(mu)/N;K=3
LL = matrix(0,nrow=M,ncol=K)
LLaux = matrix(0,nrow=M,ncol=B)
#LL from Background
LL[,1] = rowSums(matrix(stats::dnbinom(Y.vec,mu=c(mu[,1+0:(N-1)*Kmu]),size=rep(disp[[1]],N*M),log=T),nrow=M,ncol=N,byrow=F))
#LL from Enrichment
LL[,K] = rowSums(matrix(stats::dnbinom(Y.vec,mu=c(mu[,Kmu+0:(N-1)*Kmu]),size=rep(disp[[K]],N*M),log=T),nrow=M,ncol=N,byrow=F))
#LL from Mixture
for(b in 2:(Kmu-1)){
LLaux[,(b-1)] = log(delta[(b-1)])+rowSums(matrix(stats::dnbinom(Y.vec,mu=c(mu[,b+0:(N-1)*Kmu]),
size=exp(as.matrix(dsg[[b]])%*%disp[[2]]),log=T),nrow=M,ncol=N,byrow=F))
}
LL[,2] = log(rowSums(exp(LLaux)))
LL[is.infinite(LL)] = log(min.zero)
return(LL)
}
HMM.mixprob2_diffConstr = function(Y.vec,dsg,offset.vec,delta,psi,disp,N,M,min.zero=.Machine$double.xmin){
# This function implements the mixture distrubutions using a NB with constrained parameters commong to all distributions
# it gives the posterior probabilities of the mixing probabilities for window j=1,...,M (row) and mixture b=1,...,B (column)
B = length(delta);
lf = lapply(1:B,FUN = function(b){log(delta[b])+rowSums(matrix(stats::dnbinom(Y.vec,mu=exp(as.matrix(dsg[[(b+1)]])%*%psi+offset.vec),size=exp(as.matrix(dsg[[(b+1)]])%*%disp),log=T),ncol=N,byrow=F))})
sumlf = Reduce(`+`,lapply(lf,exp))
return(check.prob(sapply(1:B,FUN = function(b){exp(lf[[b]])/sumlf},simplify = T)))
}
glm.nb2_Constr = function(par,dt,maxid,min.zero=.Machine$double.xmin){
# This function is the -loglikelihood of the mixHMMConstr that implements a mixture model in the differential component with NB distributions
# NB distributions from the mixture model share the same set of parameters for both mean and dispersion models.
# Each NB distribution represents one out of all possible combinations of differential patterns
# exp(Int) is the mean (dispersion) of the group with background counts in that particular NB distribution
# exp(Int + Slope) is the mean (dispersion) of the group with enrichment counts in that particular NB distribution
# Additionally, the mean (dispersion) for the first and third HMM components are, respectively, exp(Int) and exp(Int + Slope).
# Hence, only 4 parameters drive the entire distributions of this HMM
weights = ChIP = Dsg.Int = Dsg.Mix = offset = id = NULL
return(sum(dt[(id==1),][,-weights*log(stats::dnbinom(ChIP,mu = exp(par[1]*Dsg.Int+offset),size = exp(par[3]*Dsg.Int),log = F)+min.zero)])+
do.call(sum,lapply(2:(maxid-1),FUN = function(x){sum(dt[(id == x),][,Dsg.Mix := .SD,.SDcols = paste0('Dsg.Mix',(x-1))][,-weights*log(stats::dnbinom(ChIP,mu = exp(par[1]*Dsg.Int+par[2]*Dsg.Mix+offset),size = exp(par[3]*Dsg.Int+par[4]*Dsg.Mix),log = F)+min.zero)])}))+
sum(dt[(id==maxid),][,-weights*log(stats::dnbinom(ChIP,mu = exp((par[1]+par[2])*Dsg.Int+offset),size = exp((par[3]+par[4])*Dsg.Int),log = F)+min.zero)]))
}
deriv.nb2_Constr = function(par,dt,maxid,min.zero=.Machine$double.xmin){
# This function is the -loglikelihood of the mixHMMConstr that implements a mixture model in the differential component with NB distributions
# NB distributions from the mixture model share the same set of parameters for both mean and dispersion models.
# Each NB distribution represents one out of all possible combinations of differential patterns
# exp(Int) is the mean (dispersion) of the group with background counts in that particular NB distribution
# exp(Int + Slope) is the mean (dispersion) of the group with enrichment counts in that particular NB distribution
# Additionally, the mean (dispersion) for the first and third HMM components are, respectively, exp(Int) and exp(Int + Slope).
# Hence, only 4 parameters drive the entire distributions of this HMM
# This derivative was checked with numDeriv::grad and it is correct for glm.nb2_Constr (April 10th 2019)
Dsg.Int = offset = weights = ChIP = mu = phi = Dsg.Mix = id = NULL
return(colSums(do.call(rbind,c(lapply(1,FUN = function(x){
subdt = dt[(id == x),][,c('mu','phi') := list(exp(par[1]*Dsg.Int+offset),exp(par[3]*Dsg.Int))]
colSums(cbind(subdt[,-weights*(ChIP/mu-((ChIP+phi)/(mu+phi)))*mu*cbind(Dsg.Int,0)],
subdt[,-weights*(digamma(ChIP+phi)-digamma(phi)+1+log(phi)-(ChIP+phi)/(mu+phi)-log(mu+phi))*phi*cbind(Dsg.Int,0)]))
}),
lapply(2:(maxid-1),FUN = function(x){
subdt = dt[(id == x),][,Dsg.Mix := .SD,.SDcols = paste0('Dsg.Mix',(x-1))][,c('mu','phi') := list(exp(par[1]*Dsg.Int+par[2]*Dsg.Mix+offset),exp(par[3]*Dsg.Int+par[4]*Dsg.Mix))]
colSums(cbind(subdt[,-weights*(ChIP/mu-((ChIP+phi)/(mu+phi)))*mu*cbind(Dsg.Int,Dsg.Mix)],
subdt[,-weights*(digamma(ChIP+phi)-digamma(phi)+1+log(phi)-(ChIP+phi)/(mu+phi)-log(mu+phi))*phi*cbind(Dsg.Int,Dsg.Mix)]))
}),
lapply(maxid,FUN = function(x){
subdt = dt[(id == x),][,c('mu','phi') := list(exp((par[1]+par[2])*Dsg.Int+offset),exp((par[3]+par[4])*Dsg.Int))]
colSums(cbind(subdt[,-weights*(ChIP/mu-((ChIP+phi)/(mu+phi)))*mu*cbind(Dsg.Int,Dsg.Int)],
subdt[,-weights*(digamma(ChIP+phi)-digamma(phi)+1+log(phi)-(ChIP+phi)/(mu+phi)-log(mu+phi))*phi*cbind(Dsg.Int,Dsg.Int)]))
})))))
}
HMM.mean_Constr = function(dsg,psi,idx,offset.vec,N,M,min.zero=min.zero){
#(X.mat,offset.vec,psi,N,M,idx,min.zero=min.zero)
K = length(dsg)
mu = matrix(0,nrow=M,ncol=N*K)
for(k in 1:K){
mu[,k+0:(N-1)*K] = matrix(exp(as.matrix(dsg[[k]])%*%psi[[idx[k]]]+offset.vec),nrow=M,ncol=N,byrow=F)
}
if(sum(mu==0)>0){mu[mu==0] = min.zero}
return(mu)
}
dzinb = function(x,size,mu,zip,log=T,min.zero=.Machine$double.xmin){
# This function calculates the log-transformed probability dist. of a ZINB model
# It agrees with the function VGAM::dzinegbin, except that I add .Machine$double.xmin to
# stats::dnbinom to avoid having -Inf when passing this function to optim-'L-BFGS-B'.
if(log==T){
return(log(zip*(x==0)+(1-zip)*(stats::dnbinom(x,mu = mu,size = size,log = F)+min.zero)))
} else{
return(zip*(x==0)+(1-zip)*(stats::dnbinom(x,mu = mu,size = size,log = F)+min.zero))
}
}
glm.zinb2_Constr = function(par,dt,maxid,min.zero=.Machine$double.xmin){
# This function is the -loglikelihood of the mixZIHMMConstr that implements a mixture model in the differential component with ZINB and NB distributions
# ZINB and NB distributions from the mixture model share the same set of parameters for both mean and dispersion models.
# Each NB distribution in the differential component represents the upward shift in the background counts from ZINB model
# exp(Int) is the mean (dispersion) of the group with background counts in that particular ZINB distribution, which has zero-inflation probability exp(Int)/(1+exp(Int))
# exp(Int + Slope) is the mean (dispersion) of the group with enrichment counts in that particular NB distribution
# Additionally, the mean (dispersion) for the first and third HMM components are, respectively, exp(Int) and exp(Int + Slope).
# Hence, only 5 parameters drive the entire distributions of this HMM
Dsg.Mix = weights = ChIP = Dsg.Int = offset = id = NULL
return(sum(dt[(id==1),][,-weights*dzinb(ChIP,mu = exp(par[2]*Dsg.Int+offset),size = exp(par[3]*Dsg.Int),zip = exp(par[1])/(1+exp(par[1])))])+
do.call(sum,lapply(2:(maxid-1),FUN = function(x){
sum(dt[(id == x),][,Dsg.Mix := .SD,.SDcols = paste0('Dsg.Mix',(x-1))][,-weights*((Dsg.Mix==1)*log(stats::dnbinom(ChIP,mu = exp(par[4]*Dsg.Int+offset),size = exp(par[5]*Dsg.Int),log = F)+min.zero) + (Dsg.Mix==0)*dzinb(ChIP,mu = exp(par[2]*Dsg.Int+offset),size = exp(par[3]*Dsg.Int),zip = exp(par[1])/(1+exp(par[1]))))])
}))+
sum(dt[(id==maxid),][,-weights*log(stats::dnbinom(ChIP,mu = exp(par[4]*Dsg.Int+offset),size = exp(par[5]*Dsg.Int),log = F)+min.zero)]))
}
deriv.zinb2_Constr = function(par,dt,maxid,min.zero=.Machine$double.xmin){
# Derivatives of glm.zinb2_Constr
# This derivative was checked with numDeriv::grad and it is correct for glm.zinb2_Constr (April 20th 2019)
Dsg.Int = offset = weights = ChIP = mu = phi = zip = Dsg.Mix = muzinb = phizinb = munb = phinb = id = NULL
return(colSums(do.call(rbind,c(lapply(1,FUN = function(x){
subdt = dt[(id == x),][,c('zip','mu','phi') := list(exp(par[1]*Dsg.Int)/(1+exp(par[1]*Dsg.Int)),exp(par[2]*Dsg.Int+offset),exp(par[3]*Dsg.Int))]
colSums(cbind(subdt[,-weights*((ChIP==0)*(((1-stats::dnbinom(x=0,mu=mu,size=phi,log=F))/dzinb(0,mu=mu,size=phi,zip=zip,log=F))*zip*(1-zip))+(!ChIP==0)*(-zip))*cbind(Dsg.Int)], #zip
subdt[,-weights*((ChIP==0)*((-(1-zip)*((phi/(mu+phi))^(phi+1))/dzinb(0,mu=mu,size=phi,zip=zip,log=F))*mu)+(!ChIP==0)*((ChIP/mu-(ChIP+phi)/(mu+phi))*mu))*cbind(Dsg.Int)], #int mean background
subdt[,-weights*((ChIP==0)*((1-zip)*((phi/(mu+phi))^phi)*(log(phi/(mu+phi))+mu/(mu+phi))*(1/dzinb(0,mu=mu,size=phi,zip=zip,log=F))*phi)+(!ChIP==0)*((digamma(ChIP+phi)-digamma(phi)+1+log(phi)-(ChIP+phi)/(mu+phi)-log(mu+phi))*phi))*cbind(Dsg.Int)],#int disp background
0, #int mean enrichment
0)) #int disp enrichment
}),
lapply(2:(maxid-1),FUN = function(x){
subdt = dt[(id == x),][,Dsg.Mix := .SD,.SDcols = paste0('Dsg.Mix',(x-1))][,c('zip','muzinb','phizinb','munb','phinb') := list(exp(par[1]*Dsg.Int)/(1+exp(par[1]*Dsg.Int)),exp(par[2]*Dsg.Int+offset),exp(par[3]*Dsg.Int),exp(par[4]*Dsg.Int+offset),exp(par[5]*Dsg.Int))]
colSums(cbind(subdt[,-weights*(Dsg.Mix==0)*((ChIP==0)*(((1-stats::dnbinom(x=0,mu=muzinb,size=phizinb,log=F))/dzinb(0,mu=muzinb,size=phizinb,zip=zip,log=F))*zip*(1-zip))+(!ChIP==0)*(-zip))*cbind(Dsg.Int)],
subdt[,-weights*(Dsg.Mix==0)*((ChIP==0)*((-(1-zip)*((phizinb/(muzinb+phizinb))^(phizinb+1))/dzinb(0,mu=muzinb,size=phizinb,zip=zip,log=F))*muzinb)+(!ChIP==0)*((ChIP/muzinb-(ChIP+phizinb)/(muzinb+phizinb))*muzinb))*cbind(Dsg.Int)],
subdt[,-weights*(Dsg.Mix==0)*((ChIP==0)*((1-zip)*((phizinb/(muzinb+phizinb))^phizinb)*(log(phizinb/(muzinb+phizinb))+muzinb/(muzinb+phizinb))*(1/dzinb(0,mu=muzinb,size=phizinb,zip=zip,log=F))*phizinb)+(!ChIP==0)*((digamma(ChIP+phizinb)-digamma(phizinb)+1+log(phizinb)-(ChIP+phizinb)/(muzinb+phizinb)-log(muzinb+phizinb))*phizinb))*cbind(Dsg.Int)],
subdt[,-weights*(Dsg.Mix==1)*(ChIP/munb-((ChIP+phinb)/(munb+phinb)))*munb*cbind(Dsg.Int)],
subdt[,-weights*(Dsg.Mix==1)*(digamma(ChIP+phinb)-digamma(phinb)+1+log(phinb)-(ChIP+phinb)/(munb+phinb)-log(munb+phinb))*phinb*cbind(Dsg.Int)]))
}),
lapply(maxid,FUN = function(x){
subdt = dt[(id == x),][,c('mu','phi') := list(exp(par[4]*Dsg.Int+offset),exp(par[5]*Dsg.Int))]
colSums(cbind(0,
0,
0,
subdt[,-weights*(ChIP/mu-((ChIP+phi)/(mu+phi)))*mu*cbind(Dsg.Int)],
subdt[,-weights*(digamma(ChIP+phi)-digamma(phi)+1+log(phi)-(ChIP+phi)/(mu+phi)-log(mu+phi))*phi*cbind(Dsg.Int)]))
})))))
}
HMM.LLmix2_Constr = function(dt,psi,delta,N,M,min.zero=.Machine$double.xmin){
# Y.vec,dsg,mu,delta,disp,N,M,min.zero=.Machine$double.xmin
# This function implements the HMM loglikelihood using NB distributions in the mixture component
# that share a unique set of parameters
ChIP = Dsg.Int = offset = Dsg.Mix = NULL
B=length(delta);K=3
LL = matrix(0,nrow=M,ncol=K)
#LL from Background
LL[,1] = rowSums(matrix(dt[,stats::dnbinom(x = ChIP,mu = exp(psi[1]*Dsg.Int+offset),size = exp(psi[3]*Dsg.Int),log=T)],nrow=M,ncol=N,byrow=F))
#LL from Enrichment
LL[,K] = rowSums(matrix(dt[,stats::dnbinom(x = ChIP,mu = exp((psi[1]+psi[2])*Dsg.Int+offset),size = exp((psi[3]+psi[4])*Dsg.Int),log=T)],nrow=M,ncol=N,byrow=F))
#LL from Mixture
LL[,2] = log(base::Reduce(`+`,lapply(2:(B+1),FUN = function(x){
# exp(log(delta[(x-1)])+rowSums(matrix(dt[,Dsg.Mix := .SD,.SDcols = paste0('Dsg.Mix',(x-1))][,((Dsg.Mix==1)*stats::dnbinom(ChIP,mu = exp(psi2[1]*Dsg.Int+offset),size = exp(psi2[2]*Dsg.Int),log = T) + (Dsg.Mix==0)*dzinb(x = ChIP,mu = exp(psi1[2]*Dsg.Int+offset),size = exp(psi1[3]*Dsg.Int),zip = exp(psi1[1])/(1+exp(psi1[1]))))],nrow=M,ncol=N,byrow=F)))
exp(log(delta[(x-1)])+rowSums(matrix(dt[,Dsg.Mix := .SD,.SDcols = paste0('Dsg.Mix',(x-1))][,stats::dnbinom(x = ChIP,mu = exp(psi[1]*Dsg.Int+psi[2]*Dsg.Mix+offset),size = exp(psi[3]*Dsg.Int+psi[4]*Dsg.Mix),log=T)],nrow=M,ncol=N,byrow=F)))
})))
LL[is.infinite(LL)] = log(min.zero)
return(LL)
}
ZIHMM.LLmix2_Constr = function(dt,psi1,psi2,delta,N,M,min.zero=.Machine$double.xmin){
# This function implements the HMM loglikelihood using ZINB and NB distributions in the mixture component
# that share a unique set of parameters
# dt <- data.table::copy(dt)
ChIP = Dsg.Int = offset = NULL
B=length(delta);K=3
LL = matrix(0,nrow=M,ncol=K)
#LL from Background
LL[,1] = rowSums(matrix(dt[,dzinb(x = ChIP,mu = exp(psi1[2]*Dsg.Int+offset),size = exp(psi1[3]*Dsg.Int),zip = exp(psi1[1])/(1+exp(psi1[1])))],nrow=M,ncol=N,byrow=F))
#LL from Enrichment
LL[,K] = rowSums(matrix(dt[,stats::dnbinom(x = ChIP,mu = exp(psi2[1]*Dsg.Int+offset),size = exp(psi2[2]*Dsg.Int),log = T)],nrow=M,ncol=N,byrow=F))
#LL from Mixture
LL[,2] = log(base::Reduce(`+`,lapply(2:(B+1),FUN = function(x){
# exp(log(delta[(x-1)])+rowSums(matrix(dt[,Dsg.Mix := .SD,.SDcols = paste0('Dsg.Mix',(x-1))][,((Dsg.Mix==1)*stats::dnbinom(ChIP,mu = exp(psi2[1]*Dsg.Int+offset),size = exp(psi2[2]*Dsg.Int),log = T) + (Dsg.Mix==0)*dzinb(x = ChIP,mu = exp(psi1[2]*Dsg.Int+offset),size = exp(psi1[3]*Dsg.Int),zip = exp(psi1[1])/(1+exp(psi1[1]))))],nrow=M,ncol=N,byrow=F)))
exp(log(delta[(x-1)])+rowSums(matrix(dt[,((.SD==1)*stats::dnbinom(ChIP,mu = exp(psi2[1]*Dsg.Int+offset),size = exp(psi2[2]*Dsg.Int),log = T) + (.SD==0)*dzinb(x = ChIP,mu = exp(psi1[2]*Dsg.Int+offset),size = exp(psi1[3]*Dsg.Int),zip = exp(psi1[1])/(1+exp(psi1[1])))),.SDcols = paste0('Dsg.Mix',(x-1))],nrow=M,ncol=N,byrow=F)))
})))
LL[is.infinite(LL)] = log(min.zero)
return(LL)
}
ZIHMM.mixprob2_Constr = function(dt,delta,psi1,psi2,N,M,min.zero=.Machine$double.xmin){
# This function implements the mixture distrubutions using ZINB and NB with constrained parameters commong to all distributions
# it gives the posterior probabilities of the mixing probabilities for window j=1,...,M (row) and mixture b=1,...,B (column)
ChIP = Dsg.Int = offset = NULL
B = length(delta);
# dt <- data.table::copy(dt)
lf = lapply(1:B,FUN = function(b){
# log(delta[b])+rowSums(matrix(dt[,Dsg.Mix := .SD,.SDcols = paste0('Dsg.Mix',b)][,((Dsg.Mix==1)*stats::dnbinom(ChIP,mu = exp(psi2[1]*Dsg.Int+offset),size = exp(psi2[2]*Dsg.Int),log = T) + (Dsg.Mix==0)*dzinb(x = ChIP,mu = exp(psi1[2]*Dsg.Int+offset),size = exp(psi1[3]*Dsg.Int),zip = exp(psi1[1])/(1+exp(psi1[1]))))],nrow=M,ncol=N,byrow=F))
log(delta[b])+rowSums(matrix(dt[,((.SD==1)*stats::dnbinom(ChIP,mu = exp(psi2[1]*Dsg.Int+offset),size = exp(psi2[2]*Dsg.Int),log = T) + (.SD==0)*dzinb(x = ChIP,mu = exp(psi1[2]*Dsg.Int+offset),size = exp(psi1[3]*Dsg.Int),zip = exp(psi1[1])/(1+exp(psi1[1])))),.SDcols = paste0('Dsg.Mix',b)],nrow=M,ncol=N,byrow=F))
})
lf <- rapply(lf,function(x){ifelse(exp(x)==0,log(min.zero),x)}, how = "replace")
sumlf = Reduce(`+`,lapply(lf,exp))
return(check.prob(sapply(1:B,FUN = function(b){exp(lf[[b]])/sumlf},simplify = T)))
}
HMM.mixprob2_Constr = function(dt,delta,psi,N,M,min.zero=.Machine$double.xmin){
# This function implements the mixture distrubutions using a NB with constrained parameters commong to all distributions
# it gives the posterior probabilities of the mixing probabilities for window j=1,...,M (row) and mixture b=1,...,B (column)
ChIP = Dsg.Int = offset = Dsg.Mix = NULL
B = length(delta);
lf = lapply(1:B,FUN = function(b){log(delta[b])+rowSums(matrix(dt[,Dsg.Mix := .SD,.SDcols = paste0('Dsg.Mix',b)][,stats::dnbinom(x = ChIP,mu = exp(psi[1]*Dsg.Int+psi[2]*Dsg.Mix+offset),size = exp(psi[3]*Dsg.Int+psi[4]*Dsg.Mix),log=T)],ncol=N,byrow=F))})
lf <- rapply(lf,function(x){ifelse(exp(x)==0,log(min.zero),x)}, how = "replace")
sumlf = Reduce(`+`,lapply(lf,exp))
return(check.prob(sapply(1:B,FUN = function(b){exp(lf[[b]])/sumlf},simplify = T)))
}
getBIC = function(logF,nPar,group){
# This function computes the BIC of the HMM
m = nrow(logF)
c = max(logF[m,])
return(as.numeric(-2*(c+log(sum(exp(logF[m,]-c))))+nPar*log(length(group)*m)))
}
Mode <- function(x) {
# This function computes the model of a vector x
# NOTE: it is only applicable if x is integer or categorical
# NOTE: if multi model, it returns missing (uncomment for return everything)
ux <- unique(x)
tab <- tabulate(match(x, ux))
if(length(ux[tab == max(tab)])==1){
return(as.character(ux[tab == max(tab)]))
} else{
# out = ux[tab == max(tab)]
# return(paste0(out,collapse=','))
return(NA)
}
}
classify <- function(x,names.mixProb,names.Conditions){
if(nchar(x)==1){
return(paste0(names.Conditions[gregexpr('1',names.mixProb[as.numeric(x)])[[1]][1]],collapse = ','))
} else{
return(do.call(paste,c(lapply(lapply(strsplit(x,',')[[1]],FUN = function(z){as.numeric(gregexpr('1',names.mixProb[as.numeric(z)])[[1]])}),FUN = function(w){paste0(names.Conditions[w],collapse = ',')}),sep='-')))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.