ashnet: Network Learning using adaptive shrinkage

#' @title Adaptive shrinkage of a correlation matrix using Variational EM with Inverse Gamma sigma prior
#'
#' @description This function performs adaptive shrinkage of a correlation matrix
#'
#' @param cormat The estimated sample correlation matrix
#' @param nsamples The number of samples over which the correlation matrix is estimated.
#' @param image if TRUE, plots an image of the shrunk and non-shrunk correlation matrices
#' @param tol The tolerance to check the difference between ash-cor only and ash-cor PD matrices.
#' @param nullweight The weight of the null component of the ash prior.
#' @param null.comp The number of null components. Default is 1.
#'
#' @return Returns a shrunk version of the sample correlation matrix (the output is also a correlation matrix)
#'
#' @keywords adaptive shrinkage, correlation
#' @export

ash_cor3 <- function(cormat, nsamples, image=FALSE, tol=1e-06,
                     nullweight = 10, null.comp =1){
  cor_table <- reshape2::melt(cormat);
  cor_table_non_diag <- cor_table[which(cor_table[,1] !=cor_table[,2]),];
  cor_table_non_diag.val <- cor_table_non_diag[,3];
  cor_table_non_diag.val[which(cor_table_non_diag.val==1)]=(1- 1e-7);
  cor_transform_mean_vec=0.5*log((1+cor_table_non_diag.val)/(1-cor_table_non_diag.val))
  cor_transform_sd_vec=rep(sqrt(1/(nsamples-3)), dim(cor_table_non_diag)[1]);
  options(warn=-1)

  betahat <- cor_transform_mean_vec
  sebetahat <- cor_transform_sd_vec
  completeobs <- which(!is.na(betahat));

  gridmult = sqrt(2);
  mixsd = autoselect.mixsd(betahat[completeobs]-mean(betahat[completeobs]),sebetahat[completeobs],gridmult)
  mixsd = c(0, mixsd[1:(length(mixsd)-1)]);
  null.comp = which.min(mixsd) #which component is the "null"
  k <- length(mixsd)

  prior = "nullbiased";
  prior = setprior(prior,k,nullweight,null.comp)
  randomstart = FALSE
  n = length(betahat)
  pi = initpi(k,n,null.comp,randomstart)

  pi = c(1- (k-1)/100, rep(1/100, k-1))
  g=normalmix(pi,rep(0,k),mixsd)
  df <- NULL
  control=list()


  out <- estimate_mixprop_2(betahat, sebetahat, g, prior)

  ghat = g;
  ghat$pi = out$pi
  ghat$mean = rep(0, k)
  ghat$sd = sqrt(abs(out$nu2/(out$nu1 - 1) - (1/(nsamples -3))))

  postMean.fit <- postmean(ghat,betahat[completeobs],sebetahat[completeobs], v=NULL)

  ash_cor_vec=(exp(2*postMean.fit)-1)/(exp(2*postMean.fit)+1);

  newdata.table <- cor_table_non_diag;
  newdata.table[,3] <- ash_cor_vec;
  ash_cor_only <- reshape2::dcast(newdata.table, Var1~Var2, value.var = "value")[,-1];
  ash_cor_only[is.na(ash_cor_only)]=1;
  pd_completion <- Matrix::nearPD(as.matrix(ash_cor_only), conv.tol=1e-06);
  ash_cor_PD3 <- sweep(pd_completion$mat,diag(as.matrix(pd_completion$mat)), MARGIN=1,"/")

  if(image) {
    image(cormat)
    image(as.matrix(ash_cor_only))
  }

  if(all.equal(target=ash_cor_only, current=ash_cor_PD3, tolerance=tol)==TRUE){
    cat("ash cor only and ash cor PD matrices are same")
  }else{
    cat("ash cor only and ash cor PD matrices are different")
  }

  ll <- list("ash_cor_only" = ash_cor_only, "ash_cor_PD" = ash_cor_PD3)
  return(ll)
}

estimate_mixprop_2 <- function(betahat, sebetahat, g, prior,
                               null.comp=1,df=NULL,control=list()){
  pi_init=NULL
  k=ncomp(g)
  n = length(betahat)
  control$tol = min(0.1/n,1.e-7) # set convergence criteria to be more stringent for larger samples

  matrix_llik = t(log_compdens_conv(g,betahat,sebetahat,df)) #an n by k matrix
  matrix_llik = matrix_llik - apply(matrix_llik,1, max) #avoid numerical issues by subtracting max of each row
  matrix_lik = exp(matrix_llik)

  fit=do.call("mixVBEM2",args = list(matrix_lik= matrix_lik,
                  betahat = betahat, sebetahat = sebetahat,
                  prior=prior, pi_init=pi_init, control=control))


  null.loglik = sum(log(matrix_lik[,null.comp]))
  loglik.final =  penloglik(fit$pihat,matrix_lik,1)
  g$pi=pi

  ll <- list(pi= fit$pihat, nu1 = fit$nu1hat,
             nu2 = fit$nu2hat, loglik=loglik.final,
             null.loglik=null.loglik, matrix_lik=matrix_lik)
  return(ll)
}


mixVBEM2 = function(matrix_lik, betahat, sebetahat, prior, pi_init = NULL,
                    nu1_init = NULL, nu2_init = NULL,
                    nullweight = 10, control=list()){
  control.default=list(K = 1, method=3, square=TRUE, step.min0=1, step.max0=1, mstep=4, kr=1, objfn.inc=1,tol=1.e-07, maxiter=5000, trace=FALSE)
  namc=names(control)
  if (!all(namc %in% names(control.default)))
    stop("unknown names in control: ", namc[!(namc %in% names(control.default))])
  controlinput=modifyList(control.default, control)

  gridmult = sqrt(2);
  mixsd = autoselect.mixsd(betahat-mean(betahat),sebetahat,gridmult)
 # mixsd <- c(0, mixsd[1:(k-1)])
  null.comp = which.min(mixsd)

  k = ncol(matrix_lik)
  n = nrow(matrix_lik)
  if(is.null(pi_init)){  pi_init = rep(1,k)  }# Use as starting point for pi
  if(is.null(nu1_init)){ nu1_init = rep(0.001, k) }
  if(is.null(nu2_init)){ nu2_init = rep(0.001, k) }

  avgpipost = matrix(exp(rep(digamma(pi_init),n)-rep(digamma(sum(pi_init)),k*n)),ncol=k,byrow=TRUE)
  classprob = avgpipost*matrix_lik
  classprob = classprob/rowSums(classprob) # n by k matrix
  pipostnew = colSums(classprob) + prior

  nu1_new <- nu1_init + 0.5*colSums(classprob)
  tmp <- matrix(rep(betahat^2,each=k), ncol=k, byrow=TRUE)*classprob
  nu2_new <- nu2_init + 0.5*colSums(tmp)

  param_init <- c(pipostnew, log(nu1_new + 1e-15, base=2), log(nu2_new+1e-15, base=2))

  res = SQUAREM::squarem(par=param_init,fixptfn=VBfixpoint2, objfn=VBnegpenloglik2,
                matrix_lik=matrix_lik, betahat=betahat, prior=prior,
                control = list(tol=10^(-6), maxiter = 1000))

  pi_new = res$par[1:k]
  nu1_new = 2^(res$par[(k+1):(2*k)]) - 1e-15
  nu2_new = 2^(res$par[(2*k+1):(3*k)]) - 1e-15

  return(list(pihat = pi_new/sum(pi_new),
              nu1hat = nu1_new,
              nu2hat = nu2_new,
              B=res$value.objfn,
              niter = res$iter,
              converged=res$convergence,
              post=res$par))
}


VBfixpoint2 = function(param_post, matrix_lik, betahat, prior){
  n=nrow(matrix_lik)
  k=ncol(matrix_lik)

  pi_post = param_post[1:k];
  nu1_post = 2^(param_post[(k+1):(2*k)]) - 1e-15
  nu2_post = 2^(param_post[(2*k+1):(3*k)]) - 1e-15

  avgpipost = matrix(exp(rep(digamma(pi_post),n)-rep(digamma(sum(pi_post)),k*n)),ncol=k,byrow=TRUE)
  classprob = avgpipost*matrix_lik
  classprob1 = classprob/rowSums(classprob) # n by k matrix
  pi_post_new = colSums(classprob1) + prior


  mat1 <- matrix(exp(rep(digamma(pi_post),n)-rep(digamma(sum(pi_post)),k*n)),ncol=k,byrow=TRUE)
  mat2 <- exp(matrix(0.5*rep(digamma(nu1_post),n) - 0.5* rep(log(nu2_post),n),ncol=k,byrow=TRUE) - (betahat^2/2)%*% t(nu1_post/nu2_post));

  classprob <- mat1*mat2;
  classprob1 = classprob/rowSums(classprob) # n by k matrix

  pi_post_new2 <- prior + colSums(classprob1)
 # cat(pi_post_new2, "\n")

  #  pi_post_new <- pi_post_new/ sum(pi_post_new)

  nu1_new <- nu1_post + 0.5*colSums(classprob1)
  tmp <- matrix(rep(betahat^2,each=k), ncol=k, byrow=TRUE)*classprob1
  nu2_new <- nu2_post + 0.5*colSums(tmp)
 # cat(nu2_new/(nu1_new-1))

  param_new <- c(pi_post_new, log(nu1_new + 1e-15, base=2), log(nu2_new + 1e-15, base=2));
  return(param_new)
}

VBnegpenloglik2=function(param_post,matrix_lik, betahat, prior){
  return(-VBpenloglik2(param_post, matrix_lik, betahat, prior))
}

VBpenloglik2 = function(param_post, matrix_lik, betahat, prior){
  n=nrow(matrix_lik)
  k=ncol(matrix_lik)

  pi_post = param_post[1:k];
  pi_post = pi_post/sum(pi_post);
  nu1_post = exp(param_post[(k+1):(2*k)]) - 1e-15
  nu2_post = exp(param_post[(2*k+1):(3*k)]) - 1e-15

  mat1 <- matrix(exp(rep(digamma(pi_post),n)-rep(digamma(sum(pi_post)),k*n) + 0.5*rep(digamma(nu1_post),n) - 0.5* rep(log(nu2_post),n)),ncol=k,byrow=TRUE)
  mat2 <- exp(- (betahat^2/2)%*% t(nu1_post/nu2_post));

  classprob1 <- mat1*mat2;
  classprob = classprob1/rowSums(classprob1) # n by k matrix

  invgammaloglik = k*(nu1_post + 1)*log(nu2_post) - (nu1_post + 1) *k*digamma(nu1_post) - k * nu1_post

  B= sum(classprob*log(classprob1),na.rm=TRUE) - diriKL(prior,pi_post) - sum(classprob*log(classprob)) + invgammaloglik
  return(B)
}


setprior=function(prior,k,nullweight,null.comp){
  if(!is.numeric(prior)){
    if(prior=="nullbiased"){ # set up prior to favour "null"
      prior = rep(1,k)
      prior[null.comp] = nullweight #prior 10-1 in favour of null by default
    }else if(prior=="uniform"){
      prior = rep(1,k)
    } else if(prior=="unit"){
      prior = rep(1/k,k)
    }
  }
  if(length(prior)!=k | !is.numeric(prior)){
    stop("invalid prior specification")
  }
  return(prior)
}

initpi = function(k,n,null.comp,randomstart){
  if(randomstart){
    pi = stats::rgamma(k,1,1)
  } else {
    if(k<n){
      pi=rep(1,k)/n #default initialization strongly favours null; puts weight 1/n on everything except null
      pi[null.comp] = (n-k+1)/n #the motivation is data can quickly drive away from null, but tend to drive only slowly toward null.
    } else {
      pi=rep(1,k)/k
    }
  }
  pi=normalize(pi)
  return(pi)
}

normalize <- function(x) { return(x/sum(x))}

diriKL = function(p,q){
  p.sum = sum(p)
  q.sum = sum(q)
  k = length(q)
  KL = lgamma(q.sum)-lgamma(p.sum)+sum((q-p)*(digamma(q)-digamma(rep(q.sum,k))))+sum(lgamma(p)-lgamma(q))
  return(KL)
}

autoselect.mixsd = function(betahat,sebetahat,mult){
  sebetahat=sebetahat[sebetahat!=0] #To avoid exact measure causing (usually by mistake)
  sigmaamin = min(sebetahat)/5 #so that the minimum is small compared with measurement precision
  if(all(betahat^2<=sebetahat^2)){
    sigmaamax = 5*sigmaamin #to deal with the occassional odd case where this could happen; 8 is arbitrary
  }else{
    sigmaamax = 2*sqrt(max(betahat^2-sebetahat^2)) #this computes a rough largest value you'd want to use, based on idea that sigmaamax^2 + sebetahat^2 should be at least betahat^2
  }
  if(mult==0){
    return(c(0,sigmaamax/2))
  }else{
    npoint = ceiling(log2(sigmaamax/sigmaamin)/log2(mult))
    return(mult^((-npoint):0) * sigmaamax)
  }
}

################################## GENERIC FUNCTIONS ############################

#' Generic function of calculating the component densities of the
#'     mixture
#' @param m mixture of k components generated by normalmix() or
#'     unimix() or igmix()
#' @param y is an n-vector of location
#' @param log whether to use log-scale on densities
#' @return A k by n matrix of densities
#' @export
compdens = function(m,y,log=FALSE){
  UseMethod("compdens")
}
#' @export
compdens.default = function(m,y,log=FALSE){
  stop(paste("Invalid class", class(m), "for first argument in",  match.call()))
}

#' Generic function of extracting the standard deviations of
#'     components of the mixture
#' @param m a mixture of k components generated by normalmix() or
#'     unimix() or igmix()
#' @return it returns a vector of standard deviations
#' @export
comp_sd = function(m){
  UseMethod("comp_sd")
}
#' @export
comp_sd.default = function(m){
  stop("method comp_sd not written for this class")
}

#' Generic function of calculating the second moment of components of
#'     the mixture
#' @param m a mixture of k components generated by normalmix() or
#'     unimix() or igmix()
#' @return it returns a vector of second moments.
comp_mean2 = function(m){
  UseMethod("comp_mean2")
}
comp_mean2.default = function(m){
  comp_sd(m)^2 + comp_mean(m)^2
}


#' Generic function of calculating the overall mean of the mixture
#'
#'
#' @param m a mixture of k components generated by normalmix() or
#'     unimix() or igmix()
#' @return it returns scalar, the mean of the mixture distribution.
calc_mixmean = function(m){
  UseMethod("calc_mixmean")
}
calc_mixmean.default = function(m){
  sum(m$pi * comp_mean(m))
}


#' Generic function of calculating the overall second moment of the
#'     mixture
#' @param m a mixture of k components generated by normalmix() or
#'     unimix() or igmix()
#' @return it returns scalar
mixmean2 = function(m){
  UseMethod("mixmean2")
}
mixmean2.default = function(m){
  sum(m$pi * comp_mean2(m))
}

#' Generic function of calculating the overall standard deviation of
#'     the mixture
#' @param m a mixture of k components generated by normalmix() or
#'     unimix() or igmix()
#' @return it returns scalar
#' @export
calc_mixsd = function(m){
  UseMethod("calc_mixsd")
}
calc_mixsd.default = function(m){
  sqrt(mixmean2(m)-calc_mixmean(m)^2)
}

#' Generic function of calculating the first moment of components of
#'     the mixture
#' @param m a mixture of k components generated by normalmix() or
#'     unimix() or igmix()
#' @return it returns a vector of means.
#' @export
comp_mean = function(m){
  UseMethod("comp_mean")
}
#' @export
comp_mean.default = function(m){
  stop("method comp_mean not written for this class")
}

#number of components
#' ncomp
#' @param m a mixture of k components generated by normalmix() or
#'     unimix() or igmix()
#' @export
#'
ncomp = function(m){
  UseMethod("ncomp")
}

#' @title ncomp.default
#' @description The default version of \code{\link{ncomp}}.
#' @param m a mixture of k components generated by normalmix() or
#'     unimix() or igmix()
#' @export
#'
ncomp.default = function(m){
  return(length(m$pi))
}


#' Generic function of extracting the mixture proportions
#' @param m a mixture of k components generated by normalmix() or
#'     unimix() or igmix()
#' @return it returns a vector of component probabilities, summing up
#'     to 1.
#' @export
mixprop = function(m){
  UseMethod("mixprop")
}
#' @export
mixprop.default = function(m){
  m$pi
}

#' @title mixcdf
#'
#' @description Returns cdf for a mixture (generic function)
#'
#' @details None
#'
#' @param x a mixture (eg of type normalmix or unimix)
#' @param y locations at which cdf to be computed
#' @param lower.tail boolean indicating whether to report lower tail
#'
#' @return an object of class normalmix
#'
#' @export
#'
#' @examples mixcdf(normalmix(c(0.5,0.5),c(0,0),c(1,2)),seq(-4,4,length=100))
#'
mixcdf = function(x,y,lower.tail=TRUE){
  UseMethod("mixcdf")
}
#' @title mixcdf.default
#' @description The default version of \code{\link{mixcdf}}.
#' @param x a mixture (eg of type normalmix or unimix)
#' @param y locations at which cdf to be computed
#' @param lower.tail boolean indicating whether to report lower tail
#' @export
#'
mixcdf.default = function(x,y,lower.tail=TRUE){
  x$pi %*% comp_cdf(x,y,lower.tail)
}

#find cdf for each component, a generic function
#' Generic function of computing the cdf for each component
#' @param m a mixture (eg of type normalmix or unimix)
#' @param y locations at which cdf to be computed
#' @param lower.tail boolean indicating whether to report lower tail
#' @return it returns a vector of probabilities, with length equals to
#'     number of components in m
#' @export
comp_cdf = function(m,y,lower.tail=TRUE){
  UseMethod("comp_cdf")
}
#' @export
comp_cdf.default = function(m,y,lower.tail=TRUE){
  stop("comp_cdf not implemented for this class")
}


#' Find density at y, a generic function
#' @param x A mixture of k components generated by
#'     \code{\link{normalmix}} or \code{\link{unimix}}.
#' @param y An n-vector of the location.
#' @export
dens = function(x,y){
  UseMethod("dens")
}
#' @export
dens.default = function(x,y){
  return (x$pi %*% compdens(x, y))
}

#find log likelihood of data in x (a vector) for mixture in m
loglik = function(m,x){
  UseMethod("loglik")
}
loglik.default = function(m,x){
  sum(log(dens(m,x)))
}


#' @title loglik_conv
#' @description find log likelihood of data in betahat, when the
#'     mixture m is convolved with a normal with sd betahatsd
#'
#' @param m mixture distribution with k components
#' @param betahat an n vector of observations
#' @param betahatsd an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @param FUN default is "+"
#' @export
#'
loglik_conv = function(m,betahat,betahatsd,v,FUN="+"){
  UseMethod("loglik_conv")
}
#' @title loglik_conv.default
#'
#' @description The default version of \code{\link{loglik_conv}}.
#'
#' @param m mixture distribution with k components
#' @param betahat an n vector of observations
#' @param betahatsd an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @param FUN default is "+"
#' @export
#'
loglik_conv.default = function(m,betahat,betahatsd,v,FUN="+"){
  sum(log(dens_conv(m,betahat,betahatsd,v,FUN)))
}



#' @title compdens_conv
#' @description compute the density of the components of the mixture m
#'     when convoluted with a normal with standard deviation s or a
#'     scaled (se) student.t with df v, the density is evaluated at x
#' @param m mixture distribution
#' @param x an n vector
#' @param s an n vector or integer
#' @param v degree of freedom of error distribution
#' @param FUN default is "+"
#' @return a k by n matrix of densities
compdens_conv = function(m,x,s,v,FUN="+"){
  UseMethod("compdens_conv")
}
compdens_conv.default = function(m,x,s,v,FUN="+"){
  stop(paste("Invalid class", class(m), "for first argument in",  match.call()))
}

#' @title log_compdens_conv
#' @description compute the log density of the components of the
#'     mixture m when convoluted with a normal with standard deviation
#'     s or a scaled (se) student.t with df v, the density is
#'     evaluated at x
#' @param m mixture distribution
#' @param x an n vector
#' @param s an n vector or integer
#' @param v degree of freedom of error distribution
#' @param FUN default is "+"
#' @return a k by n matrix of log densities
log_compdens_conv = function(m,x,s,v,FUN="+"){
  UseMethod("log_compdens_conv")
}
log_compdens_conv.default = function(m,x,s,v,FUN="+"){
  log(compdens_conv(m,x,s,v,FUN))
}



#' @title dens_conv
#' @description compute density of mixture m convoluted with normal of
#'     sd (s) or student t with df v at locations x
#' @param m mixture distribution
#' @param x an n vector
#' @param s an n vector or integer
#' @param v degree of freedom of error distribution
#' @param FUN default is "+"
dens_conv = function(m,x,s,v,FUN="+"){
  UseMethod("dens_conv")
}
dens_conv.default = function(m,x,s,v,FUN="+"){
  colSums(m$pi * compdens_conv(m,x,s,v,FUN))
}


#' @title comppostprob
#' @description compute the posterior prob that each observation came
#'     from each component of the mixture m,output a k by n vector of
#'     probabilities computed by weighting the component densities by
#'     pi and then normalizing
#'
#' @param m mixture distribution
#' @param x an n vector
#' @param s an n vector or integer
#' @param v degree of freedom of error distribution
#' @export
comppostprob=function(m,x,s,v){
  UseMethod("comppostprob")
}

#' The old default version of \code{\link{comppostprob}}.
#'
#' @inheritParams comppostprob
#'
#' @export
#'
old.comppostprob.default = function(m,x,s,v){
  tmp= (t(m$pi * compdens_conv(m,x,s,v))/dens_conv(m,x,s,v))
  ismissing = (is.na(x) | is.na(s))
  tmp[ismissing,]=m$pi
  t(tmp)
}

#' @export
comppostprob.default = function(m,x,s,v){
  lpost = log_compdens_conv(m,x,s,v) + log(m$pi) # lpost is k by n of log posterior prob (unnormalized)
  lpmax = apply(lpost,2,max) #dmax is of length n
  tmp = exp(t(lpost)-lpmax) #subtracting the max of the logs is just done for numerical stability
  tmp = tmp/rowSums(tmp)
  ismissing = (is.na(x) | is.na(s))
  tmp[ismissing,]=m$pi
  t(tmp)
}

#' @title compcdf_post
#' @description evaluate cdf of posterior distribution of beta at c. m
#'     is the prior on beta, a mixture; c is location of evaluation
#'     assumption is betahat | beta ~ t_v(beta,sebetahat)
#' @param m mixture distribution with k components
#' @param c a scalar
#' @param betahat an n vector of observations
#' @param sebetahat an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @return a k by n matrix
#' @examples
#' beta = rnorm(100,0,1)
#' betahat= beta+rnorm(100,0,1)
#' sebetahat=rep(1,100)
#' ash.beta = ash(betahat,1,mixcompdist="normal")
#' compcdf_post(ash.beta$fitted.g,0,betahat,sebetahat,NULL)
#' @export
compcdf_post=function(m,c,betahat,sebetahat,v){
  UseMethod("compcdf_post")
}
#' @export
compcdf_post.default=function(m,c,betahat,sebetahat,v){
  stop("method compcdf_post not written for this class")
}

#' @title cdf_post
#' @description evaluate cdf of posterior distribution of beta at c. m
#'     is the prior on beta, a mixture; c is location of evaluation
#'     assumption is betahat | beta ~ t_v(beta,sebetahat)
#' @param m mixture distribution with k components
#' @param c a scalar
#' @param betahat an n vector of observations
#' @param sebetahat an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @return an n vector containing the cdf for beta_i at c
#' @examples
#' beta = rnorm(100,0,1)
#' betahat= beta+rnorm(100,0,1)
#' sebetahat=rep(1,100)
#' ash.beta = ash(betahat,1,mixcompdist="normal")
#' cdf0 = cdf_post(ash.beta$fitted.g,0,betahat,sebetahat,NULL)
#' graphics::plot(cdf0,1-ash.beta$PositiveProb)
#' @export
cdf_post = function(m,c,betahat,sebetahat,v){
  UseMethod("cdf_post")
}
#' @export
cdf_post.default=function(m,c,betahat,sebetahat,v){
  colSums(comppostprob(m,betahat,sebetahat,v)*compcdf_post(m,c,betahat,sebetahat,v))
}
#' @title vcdf_post
#' @description vectorized version of \code{\link{cdf_post}}
#' @param m mixture distribution with k components
#' @param c a numeric vector
#' @param betahat an n vector of observations
#' @param sebetahat an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @return an n vector containing the cdf for beta_i at c
#' @examples
#' beta = rnorm(100,0,1)
#' betahat= beta+rnorm(100,0,1)
#' sebetahat=rep(1,100)
#' ash.beta = ash(betahat,1,mixcompdist="normal")
#' c = vcdf_post(ash.beta$fitted.g,seq(-5,5,length=1000),betahat,sebetahat,NULL)
#' @export
vcdf_post = function(m,c,betahat,sebetahat,v){
  mapply(cdf_post,c,MoreArgs=list(m=m,betahat=betahat,sebetahat=sebetahat,v=v))
}
#' @title pcdf_post
#' @description ``parallel" vector version of \code{\link{cdf_post}} where c is a vector, of same length as betahat and sebetahat
#' @param m mixture distribution with k components
#' @param c a numeric vector with n elements
#' @param betahat an n vector of observations
#' @param sebetahat an n vector of standard errors
#' @param v degree of freedom of error distribution (scalar)
#' @return an n vector, whose ith element is the cdf for beta_i at c_i
#' @examples
#' beta = rnorm(100,0,1)
#' betahat= beta+rnorm(100,0,1)
#' sebetahat=rep(1,100)
#' ash.beta = ash(betahat,1,mixcompdist="normal")
#' c = pcdf_post(ash.beta$fitted.g,beta,betahat,sebetahat,NULL)
#' @export
pcdf_post = function(m,c,betahat,sebetahat,v){
  mapply(cdf_post,c,betahat,sebetahat,MoreArgs=list(m=m,v=v))
}

#output posterior mean for beta for prior mixture m,
#given observations betahat, sebetahat, df v
#' postmean
#'
#' @param m mixture distribution with k components
#' @param betahat an n vector of observations
#' @param sebetahat an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @export
postmean = function(m, betahat,sebetahat,v){
  UseMethod("postmean")
}

#' @export
postmean.default = function(m,betahat,sebetahat,v){
  colSums(comppostprob(m,betahat,sebetahat,v) * comp_postmean(m,betahat,sebetahat,v))
}



#' @title postmean2
#' @description output posterior mean-squared value for beta for prior
#'     mixture m,given observations betahat, sebetahat, df v
#' @param m mixture distribution with k components
#' @param betahat an n vector of observations
#' @param sebetahat an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @export
postmean2 = function(m, betahat,sebetahat,v){
  UseMethod("postmean2")
}
#' @export
postmean2.default = function(m,betahat,sebetahat,v){
  colSums(comppostprob(m,betahat,sebetahat,v) * comp_postmean2(m,betahat,sebetahat,v))
}


#' @title postsd
#' @description output posterior sd for beta for prior mixture m,given
#'     observations betahat, sebetahat, df v
#' @param m mixture distribution with k components
#' @param betahat an n vector of observations
#' @param sebetahat an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @export
postsd = function(m,betahat,sebetahat,v){
  UseMethod("postsd")
}

#' @export
postsd.default = function(m,betahat,sebetahat,v){
  sqrt(postmean2(m,betahat,sebetahat,v)-postmean(m,betahat,sebetahat,v)^2)
}



#' @title comp_postmean2
#' @description output posterior mean-squared value for beta for prior
#'     mixture m,given observations betahat, sebetahat, df v
#' @param m mixture distribution with k components
#' @param betahat an n vector of observations
#' @param sebetahat an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @export
comp_postmean2 = function(m,betahat,sebetahat,v){
  UseMethod("comp_postmean2")
}
#' @export
comp_postmean2.default = function(m,betahat,sebetahat,v){
  stop("method comp_postmean2 not written for this class")
  #comp_postsd(m,betahat,sebetahat,v)^2 + comp_postmean(m,betahat,sebetahat,v)^2
}



#' @title comp_postmean
#' @description output posterior mean for beta for each component of
#'     prior mixture m,given observations betahat, sebetahat, df v
#' @param m mixture distribution with k components
#' @param betahat an n vector of observations
#' @param sebetahat an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @export
comp_postmean = function(m, betahat,sebetahat,v){
  UseMethod("comp_postmean")
}
#' @export
comp_postmean.default = function(m,betahat,sebetahat,v){
  stop("method comp_postmean not written for this class")
}



#' @title comp_postsd
#' @description output posterior sd for beta for each component of
#'     prior mixture m,given observations betahat, sebetahat, df v
#' @param m mixture distribution with k components
#' @param betahat an n vector of observations
#' @param sebetahat an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @examples
#' beta = rnorm(100,0,1)
#' betahat= beta+rnorm(100,0,1)
#' ash.beta = ash(betahat,1,mixcompdist="normal")
#' comp_postmean(ash.beta$fitted.g,betahat,rep(1,100),NULL)
#' comp_postsd(ash.beta$fitted.g,betahat,rep(1,100),NULL)
#' comppostprob(ash.beta$fitted.g,betahat,rep(1,100),NULL)
#' @export
comp_postsd = function(m, betahat,sebetahat,v){
  UseMethod("comp_postsd")
}
#' @export
comp_postsd.default = function(m,betahat,sebetahat,v){
  stop("method comp_postsd not written for this class")
}

#find nice limits of mixture m for plotting
min_lim = function(m){
  UseMethod("min_lim")
}
min_lim.default=function(m){
  -5
}

max_lim = function(m){
  UseMethod("max_lim")
}
max_lim.default=function(m){
  5
}


#plot density of mixture
plot_dens = function(m,npoints=100,...){
  UseMethod("plot_dens")
}
plot_dens.default = function(m,npoints=100,...){
  x = seq(min_lim(m),max_lim(m),length=npoints)
  graphics::plot(x,dens(m,x),type="l",xlab="density",ylab="x",...)
}

plot_post_cdf = function(m,betahat,sebetahat,v,npoints=100,...){
  UseMethod("plot_post_cdf")
}
plot_post_cdf.default = function(m,betahat,sebetahat,v,npoints=100,...){
  x = seq(min_lim(m),max_lim(m),length=npoints)
  x_cdf = vapply(x,cdf_post,FUN.VALUE=betahat,m=m,betahat=betahat,sebetahat=sebetahat,v=v)
  graphics::plot(x,x_cdf,type="l",xlab="x",ylab="cdf",...)
  # for(i in 2:nrow(x_cdf)){
  #   lines(x,x_cdf[i,],col=i)
  # }
}

############################### METHODS FOR normalmix class ###########################

#' @title Constructor for normalmix class
#'
#' @description Creates an object of class normalmix (finite mixture
#'     of univariate normals)
#'
#' @details None
#'
#' @param pi vector of mixture proportions
#' @param mean vector of means
#' @param sd vector of standard deviations
#'
#' @return an object of class normalmix
#'
#' @export
#'
#' @examples normalmix(c(0.5,0.5),c(0,0),c(1,2))
#'
normalmix = function(pi,mean,sd){
  structure(data.frame(pi,mean,sd),class="normalmix")
}


#' @title comp_sd.normalmix
#' @description returns sds of the normal mixture
#' @param m a normal mixture distribution with k components
#' @return a vector of length k
#' @export
comp_sd.normalmix = function(m){
  m$sd
}

#' @title comp_mean.normalmix
#' @description returns mean of the normal mixture
#' @param m a normal mixture distribution with k components
#' @return a vector of length k
#' @export
comp_mean.normalmix = function(m){
  m$mean
}

#' @export
compdens.normalmix = function(m,y,log=FALSE){
  k=ncomp(m)
  n=length(y)
  d = matrix(rep(y,rep(k,n)),nrow=k)
  return(matrix(stats::dnorm(d, m$mean, m$sd, log),nrow=k))
}


#' @title compdens_conv.normalmix
#' @description returns density of convolution of each component of a
#'     normal mixture with N(0,s^2) or s*t(v) at x. Note that
#'     convolution of two normals is normal, so it works that way
#' @param m mixture distribution with k components
#' @param x an n-vector at which density is to be evaluated
#' @param s an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @param FUN default is "+"
#' @return a k by n matrix
compdens_conv.normalmix = function(m,x,s,v,FUN="+"){
  if(!is.null(v)){
    stop("method compdens_conv of normal mixture not written for df!=NULL")
  }
  if(length(s)==1){s=rep(s,length(x))}
  sdmat = sqrt(outer(s^2,m$sd^2,FUN)) #n by k matrix of standard deviations of convolutions
  return(t(stats::dnorm(outer(x,m$mean,FUN="-")/sdmat)/sdmat))
}

#' @title log_compdens_conv.normalmix
#' @description returns log-density of convolution of each component
#'     of a normal mixture with N(0,s^2) or s*t(v) at x. Note that
#'     convolution of two normals is normal, so it works that way
#' @param m mixture distribution with k components
#' @param x an n-vector at which density is to be evaluated
#' @param s an n vector of standard errors
#' @param v degree of freedom of error distribution
#' @param FUN default is "+"
#' @return a k by n matrix
log_compdens_conv.normalmix = function(m,x,s,v,FUN="+"){
  if(!is.null(v)){
    stop("method compdens_conv of normal mixture not written for df!=NULL")
  }
  if(length(s)==1){s=rep(s,length(x))}
  sdmat = sqrt(outer(s^2,m$sd^2,FUN)) #n by k matrix of standard deviations of convolutions
  return(t(stats::dnorm(outer(x,m$mean,FUN="-")/sdmat,log=TRUE) - log(sdmat)))
}



#' @export
comp_cdf.normalmix = function(m,y,lower.tail=TRUE){
  vapply(y,stats::pnorm,m$mean,m$mean,m$sd,lower.tail)
}




#' @export
compcdf_post.normalmix=function(m,c,betahat,sebetahat,v){
  if(!is.null(v)){
    stop("Error: normal mixture for student-t likelihood is not yet implemented")
  }
  k = length(m$pi)
  n=length(betahat)
  #compute posterior standard deviation (s1) and posterior mean (m1)
  s1 = sqrt(outer(sebetahat^2,m$sd^2,FUN="*")/outer(sebetahat^2,m$sd^2,FUN="+"))
  ismissing = (is.na(betahat) | is.na(sebetahat))
  s1[ismissing,]=m$sd

  m1 = t(comp_postmean(m,betahat,sebetahat,v))
  t(stats::pnorm(c,mean=m1,sd=s1))
}


#' @export
comp_postmean.normalmix = function(m,betahat,sebetahat,v){
  if(!is.null(v)){
    stop("method comp_postmean of normal mixture not written for df!=NULL")
  }
  tmp=(outer(sebetahat^2,m$mean, FUN="*") + outer(betahat,m$sd^2, FUN="*"))/outer(sebetahat^2,m$sd^2,FUN="+")
  ismissing = (is.na(betahat) | is.na(sebetahat))
  tmp[ismissing,]=m$mean #return prior mean when missing data
  t(tmp)
}


#' @export
comp_postsd.normalmix = function(m,betahat,sebetahat,v){
  if(!is.null(v)){
    stop("method comp_postsd of normal mixture not written for df!=NULL")
  }
  t(sqrt(outer(sebetahat^2,m$sd^2,FUN="*")/outer(sebetahat^2,m$sd^2,FUN="+")))
}

#' @export
comp_postmean2.normalmix = function(m,betahat,sebetahat,v){
  comp_postsd(m,betahat,sebetahat,v)^2 + comp_postmean(m,betahat,sebetahat,v)^2
}

#' @title loglik_conv_mixlik
#'
#' @description find log likelihood of data, when the mixture m is convolved with l-comp normal mixture in betahat with mixture sd betahatsd, mixture proportion pilik.
#'
#' @param m mixture distribution
#' @param betahat an n vector
#' @param betahatsd an n-by-l matrix
#' @param v degree of freedom of error distribution
#' @param pilik an l vector
#' @param FUN default is "+"
#' @export
#'
loglik_conv_mixlik = function(m,betahat,betahatsd,v,pilik,FUN="+"){
  UseMethod("loglik_conv_mixlik")
}
#' @title loglik_conv.default
#'
#' @description The default version of \code{\link{loglik_conv}}.
#'
#' @param m mixture distribution
#' @param betahat an n vector
#' @param betahatsd an n-by-l matrix
#' @param v degree of freedom of error distribution
#' @param pilik an l vector
#' @param FUN default is "+"
#' @export
#'
loglik_conv_mixlik.default = function(m,betahat,betahatsd,v,pilik,FUN="+"){
  sum(log(dens_conv_mixlik(m,betahat,betahatsd,v,pilik,FUN)))
}

#compute the density of the components of the mixture m
#when convoluted with l-components normal mixture with standard deviation s
#or C (scale vector) multiplies scaled (se) student.t l-components mixture with df v
#with mixture proportion pilik
#the density is evaluated at x
#x is an n-vector
#s and pilik are n by l matrices
#v and c are l-vectors
#m is a mixture with k components
#output is a (k*l) by n matrix of densities

#' @title compdens_conv_mixlik
#' @description compute the density of the components of the mixture m
#'     when convoluted with l-components normal mixture with standard
#'     deviation s or C (scale vector) multiplies scaled (se)
#'     student.t l-components mixture with df v with mixture
#'     proportion pilik the density is evaluated at x
#'
#' @param m a mixture with k components
#' @param x an n vector
#' @param s normal mixture of sd(s), n-by-l matrix
#' @param v degree of freedom of error distribution
#' @param pilik mixture proportion , n-by-l matrix
#' @param FUN default is "+"
#' @return a (k*l) by n matrix of densities
#' @export
#'
#todo: C is missing for function input
compdens_conv_mixlik = function(m,x,s,v,pilik,FUN="+"){
  UseMethod("compdens_conv_mixlik")
}
compdens_conv_mixlik.default = function(m,x,s,v,pilik,FUN="+"){
  dens=NULL
  for (i in 1:dim(pilik)[2]){
    dens=rbind(dens,pilik[,i]*compdens_conv(m,x,s[,i],v[i],FUN))
  }
  return(dens)
}


#' @title dens_conv_mixlik
#' @description compute density of mixture m convoluted with
#'     l-components normal mixture of sd (s) or student t mixture with
#'     df v with mixture proportion pilik at locations x
#'
#' @param m mixture distribution
#' @param x an n vector
#' @param s normal mixture of sd(s), n-by-l matrix
#' @param v degree of freedom of error distribution
#' @param pilik mixture proportion , n-by-l matrix
#' @param FUN default is "+"
#' @export
#'
dens_conv_mixlik = function(m,x,s,v,pilik,FUN="+"){
  UseMethod("dens_conv_mixlik")
}
dens_conv_mixlik.default = function(m,x,s,v,pilik,FUN="+"){
  l=dim(pilik)[2]
  colSums(rep(m$pi,l) * compdens_conv_mixlik(m,x,s,v,pilik,FUN))
}


#' @title comppostprob_mixlik
#' @description compute the posterior prob that each observation came
#'     from each component of the mixture m,output a k by n vector of
#'     probabilities computed by weighting the component densities by
#'     pi and then normalizing,when likelihood is an l-components
#'     normal mixture or student t mixture with mixture proportion
#'     pilik
#'
#' @param m mixture distribution
#' @param x an n vector
#' @param s normal mixture of sd(s), n-by-l matrix
#' @param v degree of freedom of error distribution
#' @param pilik mixture proportion , n-by-l matrix
#' @export
comppostprob_mixlik=function(m,x,s,v,pilik){
  UseMethod("comppostprob_mixlik")
}
#' @export
comppostprob_mixlik.default = function(m,x,s,v,pilik){
  l=dim(pilik)[2]
  k=length(m$pi)
  tmp= (t(rep(m$pi,l) * compdens_conv_mixlik(m,x,s,v,pilik))/dens_conv_mixlik(m,x,s,v,pilik))
  ismissing = (is.na(x) | apply(is.na(s),1,sum))
  tmp[ismissing,]=rep(m$pi,l)/l
  group=rep(1:k,l)
  return(rowsum(t(tmp),group))
}


#' @title comppostprob_mixlik2
#' @description compute the posterior prob that each observation came
#'     from each component of the mixture m and the likelihood
#'     mixture, output a (k*l) by n vector of probabilities computed
#'     by weighting the component densities by pi and then
#'     normalizing, when likelihood is an l-components normal mixture
#'     or student t mixture with mixture proportion pilik.
#'
#' @param m mixture distribution
#' @param x an n vector
#' @param s normal mixture of sd(s), n-by-l matrix
#' @param v degree of freedom of error distribution
#' @param pilik mixture proportion , n-by-l matrix
#' @export
comppostprob_mixlik2=function(m,x,s,v,pilik){
  UseMethod("comppostprob_mixlik2")
}
#' @export
comppostprob_mixlik2.default = function(m,x,s,v,pilik){
  l=dim(pilik)[2]
  k=length(m$pi)
  tmp= (t(rep(m$pi,l) * compdens_conv_mixlik(m,x,s,v,pilik))/dens_conv_mixlik(m,x,s,v,pilik))
  ismissing = (is.na(x) | apply(is.na(s),1,sum))
  tmp[ismissing,]=rep(m$pi,l)/l
  return(t(tmp))
}



#' @title compcdf_post_mixlik
#' @description evaluate cdf of posterior distribution of beta at c
#' @param m prior on beta, a mixture
#' @param c location of evaluation
#' @param betahat the data
#' @param sebetahat the observed standard errors
#' @param v degree of freedom of error distribution
#' @param pilik mixture proportion
#' @return it returns a (k*l) by n matrix
#' @export
compcdf_post_mixlik=function(m,c,betahat,sebetahat,v,pilik){
  UseMethod("compcdf_post_mixlik")
}
#' @export
compcdf_post_mixlik.default=function(m,c,betahat,sebetahat,v,pilik){
  cdf=NULL
  for (i in 1:dim(pilik)[2]){
    cdf=rbind(cdf,pilik[,i]*compcdf_post(m,c,betahat,sebetahat[,i],v[i]))
  }
  cdf
}

cdf_post_mixlik = function(m,c,betahat,sebetahat,v,pilik){
  UseMethod("cdf_post_mixlik")
}
cdf_post_mixlik.default=function(m,c,betahat,sebetahat,v,pilik){
  colSums(comppostprob_mixlik2(m,betahat,sebetahat,v,pilik)*
            compcdf_post_mixlik(m,c,betahat,sebetahat,v,pilik))
}


#' @title postmean_mixlik
#' @description output posterior mean for beta for prior mixture m
#' @param m mixture distribution
#' @param betahat the data
#' @param sebetahat the observed standard errors
#' @param v degree of freedom of error distribution
#' @param pilik mixture proportion
#' @export
#'
postmean_mixlik = function(m, betahat,sebetahat,v,pilik){
  UseMethod("postmean_mixlik")
}
postmean_mixlik.default = function(m,betahat,sebetahat,v,pilik){
  colSums(comppostprob_mixlik2(m,betahat,sebetahat,v,pilik) * comp_postmean_mixlik(m,betahat,sebetahat,v,pilik))
}


#' @title postmean2_mixlik
#' @description output posterior mean-squared value for beta for prior mixture m,given observations betahat, sebetahat, df v, from l-components mixture likelihood with mixture proportion pilik
#' @param m mixture distribution
#' @param betahat the data
#' @param sebetahat the observed standard errors
#' @param v degree of freedom of error distribution
#' @param pilik mixture proportion
#' @export
postmean2_mixlik = function(m, betahat,sebetahat,v,pilik){
  UseMethod("postmean2_mixlik")
}
#' @export
postmean2_mixlik.default = function(m,betahat,sebetahat,v,pilik){
  colSums(comppostprob_mixlik2(m,betahat,sebetahat,v,pilik) * comp_postmean2_mixlik(m,betahat,sebetahat,v,pilik))
}

#' @title postsd_mixlik
#' @description output posterior sd for beta for prior mixture m
#' @param m mixture distribution
#' @param betahat the data
#' @param sebetahat the observed standard errors
#' @param v degree of freedom of error distribution
#' @param pilik mixture proportion
#' @export
#'
postsd_mixlik = function(m,betahat,sebetahat,v,pilik){
  UseMethod("postsd_mixlik")
}
postsd_mixlik.default = function(m,betahat,sebetahat,v,pilik){
  sqrt(postmean2_mixlik(m,betahat,sebetahat,v,pilik)-postmean_mixlik(m,betahat,sebetahat,v,pilik)^2)
}


#' @title comp_postmean2_mixlik
#' @description output posterior mean-squared value for beta
#' @param m mixture distribution
#' @param betahat the data
#' @param sebetahat the observed standard errors
#' @param v degree of freedom of error distribution
#' @param pilik mixture proportion
#' @export
comp_postmean2_mixlik = function(m,betahat,sebetahat,v,pilik){
  UseMethod("comp_postmean2_mixlik")
}
#' @export
comp_postmean2_mixlik.default = function(m,betahat,sebetahat,v,pilik){
  comp_postsd_mixlik(m,betahat,sebetahat,v,pilik)^2 +
    comp_postmean_mixlik(m,betahat,sebetahat,v,pilik)^2
}


#' @title comp_postmean_mixlik
#' @description output posterior mean for beta for each component of
#'     prior mixture m,given observations betahat, sebetahat, df v
#'     from l-components mixture likelihood with mixture proportion
#'     pilik
#' @param m mixture distribution
#' @param betahat the data
#' @param sebetahat the observed standard errors
#' @param v degree of freedom of error distribution
#' @param pilik mixture proportion
#' @export
#'
comp_postmean_mixlik=function(m,betahat,sebetahat,v,pilik){
  UseMethod("comp_postmean_mixlik")
}
comp_postmean_mixlik.default=function(m,betahat,sebetahat,v,pilik){
  mean=NULL
  for (i in 1:dim(pilik)[2]){
    mean=rbind(mean,comp_postmean(m,betahat,sebetahat[,i],v[i]))
  }
  return(mean)
}


#' @title comp_postsd_mixlik
#'
#' @description output posterior sd for beta for each component of
#'     prior mixture m,given observations betahat, sebetahat, df v
#'     from l-components mixture likelihood with mixture proportion
#'     pilik
#' @param m mixture distribution
#' @param betahat the data
#' @param sebetahat the observed standard errors
#' @param v degree of freedom of error distribution
#' @param pilik mixture proportion
#' @export
#'
comp_postsd_mixlik=function(m,betahat,sebetahat,v,pilik){
  UseMethod("comp_postsd_mixlik")
}
comp_postsd_mixlik.default=function(m,betahat,sebetahat,v,pilik){
  sd=NULL
  for (i in 1:dim(pilik)[2]){
    sd=rbind(sd,comp_postsd(m,betahat,sebetahat[,i],v[i]))
  }
  return(sd)
}




negpenloglik = function(pi,matrix_lik,prior){return(-penloglik(pi,matrix_lik,prior))}

penloglik = function(pi, matrix_lik, prior){
  pi = normalize(pmax(0,pi))
  m  = t(pi * t(matrix_lik)) # matrix_lik is n by k; so this is also n by k
  m.rowsum = rowSums(m)
  loglik = sum(log(m.rowsum))
  subset = (prior != 1.0)
  priordens = sum((prior-1)[subset]*log(pi[subset]))
  return(loglik+priordens)
}