R/helper.R

Defines functions INTERNAL_glmfit simulate.disclapmixfit plot.disclapmixfit summary.disclapmixfit print.disclapmixfit predict.disclapmixfit get_BIC get_AICc get_AIC get_loglikelihood_marginal get_loglikelihood_full move_centers convert_coef_to_disclap_parameters_internal convert_coef_to_disclap_parameters create_initial_y create_response_vector create_model_matrix check_y check_x

Documented in plot.disclapmixfit predict.disclapmixfit print.disclapmixfit simulate.disclapmixfit summary.disclapmixfit

check_x <- function(x) {
  if (!is.matrix(x) || !is.integer(x)) {
    stop("x is not an integer matrix, please use the str function to ensure that the object is a matrix and only as integer columns (and not numeric)")
  } else if (nrow(x) <= 1) {
    stop("x must have more than one row")
  }
  
  if (any(is.na(x))) {
    stop("x contains NA's")
  }
}

check_y <- function(y, clusters, loci) {
  if (!is.matrix(y) || !is.integer(y)) {
    stop("y is not an integer matrix, please use the str function to ensure that the object is a matrix and only as integer columns (and not numeric)")
  } 
  #else if (nrow(y) <= 1) {
  #  stop("y must have more than one row")
  #} 
  else if (nrow(y) != clusters) {
    stop("y must have exactly as many rows as the specified number of clusters")
  } else if (ncol(y) != loci) {
    stop("y must have the same number of columns as x")
  }
}

create_model_matrix <- function(x, clusters, model.matrix.function) {
  mat <- rcpp_create_design_matrix(x, clusters)
  colnames(mat) <- c("cluster", "locus")
  
  mat <- as.data.frame(mat)
  mat$cluster <- factor(mat$cluster)
  mat$locus <- factor(mat$locus)
  
  if (clusters == 1L && ncol(x) == 1L) {
    mat$cluster <- NULL
    mat$locus <- NULL
    mat <- model.matrix.function(~ 1, mat)
  } else if (clusters == 1L) {
    mat$cluster <- NULL
    mat <- model.matrix.function(~ locus - 1, mat, contrasts = list(locus = "contr.treatment"))
  } else if (ncol(x) == 1L) {
    mat$locus <- NULL
    mat <- model.matrix.function(~ cluster - 1, mat, contrasts = list(cluster = "contr.treatment"))
  } else {
    mat <- model.matrix.function(~ cluster + locus - 1, mat, contrasts = list(cluster = "contr.treatment", locus = "contr.treatment"))
  }
  
  return(mat)
}

create_response_vector <- function(x, y) {
  response <- rcpp_create_response_vector(x, y)
  return(response)
}

#' @importFrom stats dist hclust cutree
#' @importFrom cluster medoids
create_initial_y <- function(x, clusters, init_y_method = "pam") {  
  if (is.null(init_y_method) | !is.character(init_y_method) | length(init_y_method) != 1) {
    stop("Invalid init_y_method argument.")
  }
  
  y <- NULL
  
  if (clusters == 1L) {
    y <- matrix(as.integer(round(apply(x, 2L, median))), nrow = 1L)
  } else {
    centers.result <- NULL
    
    if (init_y_method == "pam") {      
      centers.result <- cluster::pam(x, k = clusters, diss = FALSE, stand = FALSE, metric = "manhattan", do.swap = TRUE)
      y <- centers.result$medoids
    } else if (init_y_method == "clara") {
      centers.result <- cluster::clara(x, k = clusters, metric = "manhattan", 
                                       stand = FALSE, samples = 100, 
                                       sampsize = min(ceiling(nrow(x)/2), 100 + 2*clusters),
                                       medoids.x = TRUE, keep.data = FALSE,
                                       rngR = TRUE, pamLike = TRUE)
      y <- centers.result$medoids
    } else if (init_y_method == "hclust") {
      # FIXME: Re-use d...
      d <- stats::dist(x, method = "manhattan")
      wc <- stats::hclust(d, method = "ward.D2")
      cl <- stats::cutree(wc, clusters)
      y <- x[cluster::medoids(x, cl), ]
    } else {
      stop("Unsupported init_y_method chosen")
    }
    
    y <- as.matrix(apply(y, 2L, function(r) as.integer(round(r))))
  }
  
  return(y)
}

convert_coef_to_disclap_parameters <- function(beta, clusters) {
  if (clusters == 1L) {
    pcl <- matrix(exp(beta), nrow = 1)
    return(pcl)  
  } else {
    theta_cs <- beta[1L:clusters]
    theta_ls <- c(0L, beta[-(1L:clusters)])
    theta <- outer(theta_cs, theta_ls, "+")
    pcl <- exp(theta)
    return(pcl)
  }
}

convert_coef_to_disclap_parameters_internal <- function(beta, clusters) {
  loci <- length(beta) - clusters
  theta_ls <- beta[1L:loci]
  theta_cs <- beta[-(1L:loci)]
  theta <- outer(theta_cs, theta_ls, "+")
  pcl <- exp(theta)
  return(pcl)
}

move_centers <- function(x, y, v_matrix) {
  y_candidates <- apply(x, 2L, range)
  
  new_y <- sapply(1L:ncol(x), function(l) {
    ycls <- y_candidates[1L, l]:y_candidates[2L, l]
    res <- sapply(ycls, function(ycl) {
      sapply(1L:nrow(y), function(cluster) {
        sum(v_matrix[, cluster]*abs(x[, l] - ycl))
      })
    })
    
    if (nrow(y) == 1L) {
      indices <- which.min(res)
    } else {
      indices <- apply(res, 1L, which.min)
    }
    
    return(ycls[indices])
  })
  
  if (nrow(y) == 1L) {
    new_y <- matrix(new_y, nrow = 1L)
  }
  
  colnames(new_y) <- colnames(y)
  return(new_y)
}

get_loglikelihood_full <- function(fit, clusters, loci, response_vector, weight_vector, tau_norm) { 
  theta <- fit$linear.predictors
  p <- exp(theta)
  #print(head(theta))
  #print(head(p))
  #print(theta[p < 0 | p >= 1])
  #print(p[p < 0 | p >= 1])
  
  den <- ddisclap(response_vector, p)
  #zi <- rep(1L:clusters, each = length(theta)/clusters)
  zi <- rep(1L:clusters, length.out = length(weight_vector), each = loci)
  
  tau_norm <- tau_norm[zi]
  logL <- sum(weight_vector*log(tau_norm*den))
  
  return(logL)
}

get_loglikelihood_marginal <- function(x, y, disclap_parameters, tau_vector) {
  xprobs <- rcpp_calculate_haplotype_probabilities(x, y, disclap_parameters, tau_vector)
  logL <- sum(log(xprobs))
  return(logL)
}

get_AIC <- function(logL, individuals, clusters, loci) {
  # coord             # glm                  # tau (sums to 1)
  k <- (clusters * loci) + (loci + clusters - 1) + (clusters - 1)
  
  return(2*k - 2*logL)
}

get_AICc <- function(logL, individuals, clusters, loci) {
  # coord             # glm                  # tau (sums to 1)
  k <- (clusters * loci) + (loci + clusters - 1) + (clusters - 1)
  
  # Our observations is individuals' haplotype
  # We have a model for the haplotype
  n <- individuals*loci
  
  AIC <- 2*k - 2*logL
  correction <- 2*k*(k+1) / (n-k-1)
  
  return(AIC + correction)
}

get_BIC <- function(logL, individuals, clusters, loci) {
  # coord             # glm                  # tau (sums to 1)
  k <- (clusters * loci) + (loci + clusters - 1) + (clusters - 1)
  
  return(log(individuals*loci)*k - 2*logL)
}



#' Predict from a disclapmixfit
#' 
#' Is able to predict haplotype frequencies using a \code{\link{disclapmixfit}}
#' object.
#' 
#' Note that `NA` values give rise to an error unless the `marginalise` argument 
#' is set to `TRUE`.
#' 
#' @param object a \code{\link{disclapmixfit}} object
#' @param newdata the haplotypes in matrix format to estimate haplotype
#' probabilities for
#' @param marginalise Should loci with `NA` be dealt with by marginalising out 
#'                    that locus?
#' @param ... not used
#' @seealso \code{\link{disclapmix}} \code{\link{disclapmixfit}}
#' \code{\link{print.disclapmixfit}} \code{\link{summary.disclapmixfit}}
#' \code{\link{simulate.disclapmixfit}} \code{\link{plot.disclapmixfit}}
#' %\code{\link{haplotype_diversity}} \code{\link{clusterdist}}
#' \code{\link{clusterprob}}
#' @keywords predict
#' @export
predict.disclapmixfit <-
  function(object, newdata, marginalise = FALSE, ...) {
    if (!is(object, "disclapmixfit")) {
      stop("object must be a disclapmixfit")
    }
    
    if (marginalise == FALSE && any(is.na(newdata))) {
      stop("newdata contains NA values - if you meant to marginalising out ", 
           "those, you need to set marginalise = TRUE")
    }
    
    probs <- rcpp_calculate_haplotype_probabilities(newdata, object$y, object$disclap_parameters, object$tau)
    return(probs)
    
    #if (se.fit && !(object$glm_method %in% c("internal_coef", "internal_dev"))) {
    #  stop("Cannot predict se when fitting with other than internal_coef or internal_dev")
    #}
    #return(NA)
  }


#predict_increase_at_alleles <-
#function(object, newdata, constants, increase_at_alleles, ...) {
#  if (!is(object, "disclapmixfit")) stop("object must be a disclapmixfit")
#  if (any(constants < 0)) stop("constants must be >= 0")
#  if (length(constants) == 1L) constants <- rep(constants, length(object$tau))
#  if (length(constants) != length(object$tau)) stop("constants must have length 1 or number of clusters")
#  if (!is.vector(increase_at_alleles) || !is.integer(increase_at_alleles) || length(increase_at_alleles) != ncol(object$y)) stop("increase_at_alleles must be an integer vector with length corresponding to number of loci")
#  #dbsize <- object$model_observations / ncol(object$y)
#  #constants <- dbsize * object$tau
#  probs <- rcpp_calculate_haplotype_probabilities_increase_at_alleles(newdata, object$y, object$disclap_parameters, object$tau, constants, increase_at_alleles) 
#  return(probs)
#}

#predictNEW <-
#function(object, newdata, ...) {
#  if (!is(object, "disclapmixfit")) stop("object must be a disclapmixfit")
#  probs <- rcpp_calculate_haplotype_probabilities_NEW(t.default(newdata), t.default(object$y), object$disclap_parameters, object$tau)
#  return(probs)
#}



#' Print a disclapmixfit
#' 
#' Prints a \code{\link{disclapmixfit}} object.
#' 
#' @param x a \code{\link{disclapmixfit}} object, usually from a result of a
#' call to \code{disclapmix}.
#' @param ... not used
#' @seealso \code{\link{disclapmix}} \code{\link{disclapmixfit}}
#' \code{\link{predict.disclapmixfit}} \code{\link{summary.disclapmixfit}}
#' \code{\link{simulate.disclapmixfit}} \code{\link{plot.disclapmixfit}}
#' %\code{\link{haplotype_diversity}} \code{\link{clusterdist}}
#' @keywords print
#' @export
print.disclapmixfit <-
  function(x, ...) {
    if (!is(x, "disclapmixfit")) stop("x must be a disclapmixfit")
    
    summary.disclapmixfit(x)
    
    return(invisible(x))
  }



#' Summary of a disclapmixfit
#' 
#' Summary of a \code{\link{disclapmixfit}} object.
#' 
#' @param object a \code{\link{disclapmixfit}} object, usually from a result of
#' a call to \code{disclapmix}.
#' @param ... not used
#' @seealso \code{\link{disclapmix}} \code{\link{disclapmixfit}}
#' \code{\link{predict.disclapmixfit}} \code{\link{print.disclapmixfit}}
#' \code{\link{simulate.disclapmixfit}} %\code{\link{haplotype_diversity}}
#' \code{\link{clusterdist}}
#' @keywords print
#' @export
summary.disclapmixfit <-
  function(object, ...) {
    if (!is(object, "disclapmixfit")) stop("object must be a disclapmixfit")
    
    cat("disclapmixfit from ", 
        formatC(nrow(object$v_matrix)), " observations on ", 
        formatC(ncol(object$y)), " loci with ", 
        formatC(nrow(object$y)), " clusters.\n", sep = "")
    
    cat("\n")
    cat("EM converged:                                                       ", object$converged, "\n", sep = "")
    cat("Number of central haplotype changes:                                ", length(object$changed_center), "\n", sep = "")
    cat("Total number of EM iterations:                                      ", formatC(object$iterations), "\n", sep = "")
    cat("Model observations (n*loci*clusters):                               ", formatC(object$model_observations), "\n", sep = "")
    cat("Model parameters ((clusters*loci)+(loci+clusters-1)+(clusters-1)):  ", formatC(object$model_parameters), "\n", sep = "")
    cat("GLM method:                                                         ", formatC(object$glm_method), "\n", sep = "")
    cat("Initial central haplotypes supplied:                                ", !is.null(object$init_y), "\n", sep = "")
    
    if (is.null(object$init_y)) {
      cat("Method to find initial central haplotypes:                          ", object$init_y_method, "\n", sep = "")
    }
    
    return(invisible(object))
  }




#' Plot a disclapmixfit
#' 
#' Plot a \code{\link{disclapmixfit}} object.
#' 
#' 
#' @param x a \code{\link{disclapmixfit}} object, usually from a result of a
#' call to \code{disclapmix}.
#' @param which What plot to make. 1L = clusters and their distances.
#' @param clusdist To use previously computed cluster distances to avoid doing
#' the same computations twice.
#' @param ... not used
#' @return A data frame with discrete Laplace distributions for each cluster
#' and locus. Side effect: A plot.
#' @seealso \code{\link{disclapmix}} \code{\link{disclapmixfit}}
#' \code{\link{predict.disclapmixfit}} \code{\link{print.disclapmixfit}}
#' \code{\link{simulate.disclapmixfit}} \code{\link{summary.disclapmixfit}}
#' %\code{\link{haplotype_diversity}} \code{\link{clusterdist}}
#' @keywords plot
#' @examples
#' 
#' data(danes)
#' db <- as.matrix(danes[rep(1:nrow(danes), danes$n), 1:(ncol(danes)-1)])
#' fit <- disclapmix(db, clusters = 4L)
#' plot(fit)
#' 
#' @export
plot.disclapmixfit <-
  function(x, which = 1L, clusdist = clusterdist(x), ...) {
    if (!is(x, "disclapmixfit")) stop("x must be a disclapmixfit")
    
    object <- x
    
    #--
    
    prob_masses <- do.call(rbind, lapply(1L:ncol(object$disclap_parameters), function(locus_index) {
      ps <- object$disclap_parameters[, locus_index]
      ys <- object$y[, locus_index]
      xs <- (-1L):1L
      xs_probs <- lapply(ps, ddisclap, x = xs)
      
      while (any(unlist(lapply(xs_probs, sum)) < 0.99)) {
        xs <- c(min(xs) - 1L, xs, max(xs) + 1L)
        xs_probs <- lapply(ps, ddisclap, x = xs)
      }
      
      #rownames(xs_probs) <- names(ps)
      #colnames(xs_probs) <- xs
      
      df <- vector("list", length(ps)) 
      
      for (df_i in seq_along(ps)) {
        df[[df_i]] <- data.frame(x = xs + ys[df_i], probX = xs_probs[[df_i]], cluster = rownames(object$disclap_parameters)[df_i], locus = colnames(object$disclap_parameters)[locus_index])
      }
      
      df <- do.call(rbind, df)
      
      return(df)
    }))
    
    #--
    
    packages_needed <- c("ggplot2", "gridExtra", "ggdendro", "scales", "seriation")
    missing <- c()
    
    for (pkg in packages_needed) {
      #pkgAvail <- require(pkg, character.only = TRUE)
      pkgAvail <- requireNamespace(pkg, quietly = TRUE)  
      
      if (!pkgAvail) {
        missing <- c(missing, pkg)
      }
    }
    
    if (length(missing) != 0L) {
      warning(paste("Packages ", paste(missing, collapse = ", "), " are not available, hence the plot cannot be created.\nPlease install these if you wish to plot using this function.\nAlternatively, you can use the return value of this function to obtain the necesary data needed to create your own plot function if you for some reason do not want to install the packages.", sep = ""))
      return(invisible(prob_masses))
    }
    
    #--
    
    integer_breaks <- function(n = 5, ...) {
      breaker <- scales::pretty_breaks(n, ...)
      function(x) {
        breaks <- breaker(x)
        breaks[breaks == floor(breaks)]
      }
    }
    
    # To satisfy R CMD check
    probX <- NULL  
    #--
    
    if (nrow(object$y) == 1L) {
      p_dists <- ggplot2::ggplot(prob_masses, ggplot2::aes(x = x, y = probX)) + 
        ggplot2::geom_bar(stat = "identity") + 
        ggplot2::facet_grid(~ locus, scales = "free_x") + 
        ggplot2::scale_x_continuous(breaks = integer_breaks()) +
        ggplot2::labs(x = "X", y = "P(X)") 
      
      print(p_dists)
    } else {
      hc <- hclust(clusdist, method = "complete")
      
      if (nrow(object$y) > 2L) {
        #y_ser_vec <- seriation::seriate(clusdist, margin = 1, method = "OLO", hclust = hc)
        y_ser_vec <- seriation::seriate(clusdist, method = "OLO", hclust = hc)
        y_order <- seriation::get_order(y_ser_vec)
        hc_reordered <- y_ser_vec[[1L]]
      } else {
        hc_reordered <- hc
        y_order <- 1L:nrow(object$y)
      }
      
      prob_masses$ordered_cluster <- factor(prob_masses$cluster,
                                            levels = rev(levels(prob_masses$cluster)[y_order]))
      
      p_dists <- ggplot2::ggplot(prob_masses, ggplot2::aes(x = x, y = probX)) + 
        ggplot2::geom_bar(stat = "identity") + 
        ggplot2::facet_grid(ordered_cluster ~ locus, scales = "free_x") + 
        ggplot2::scale_x_continuous(breaks = integer_breaks()) +
        ggplot2::labs(x = "X", y = "P(X)") 
      
      # same ^
      
      #clusdist <- clusterdist(object)
      
      hcdata <- ggdendro::dendro_data(hc_reordered, type = "rectangle")
      
      hcdata$labels$label <- ''
      p_dendo <- ggdendro::ggdendrogram(hcdata, rotate = TRUE, leaf_labels = FALSE)
      
      #--
      
      gridExtra::grid.arrange(ggplot2::ggplotGrob(p_dists), ggplot2::ggplotGrob(p_dendo), 
                              ncol = 2, widths = c(4, 2))
      
      #--
    }
    
    return(invisible(prob_masses))
  }



#' Simulate from a disclapmixfit
#' 
#' Simulate from a \code{\link{disclapmixfit}} object.
#' 
#' 
#' @param object a \code{\link{disclapmixfit}} object, usually from a result of
#' a call to \code{disclapmix}.
#' @param nsim number of haplotypes to generate.
#' @param seed not used
#' @param cluster which cluster to simulate from (if `NULL`, the default, take 
#'                a random according to the a priori probabilities)
#' @param ... not used
#' @return A matrix where the rows correspond to the simulated haplotypes.
#' @seealso \code{\link{disclapmix}} \code{\link{disclapmixfit}}
#' \code{\link{predict.disclapmixfit}} \code{\link{print.disclapmixfit}}
#' \code{\link{plot.disclapmixfit}} \code{\link{summary.disclapmixfit}}
#' %\code{\link{haplotype_diversity}} \code{\link{clusterdist}}
#' @keywords print
#' @export
simulate.disclapmixfit <- function(object, nsim = 1L, seed = NULL, cluster = NULL, ...) {
  if (!is(object, "disclapmixfit")) stop("object must be a disclapmixfit")
  
  if (!is.null(seed)) {
    stop("seed must be null or else ")
  }
  
  if (is.null(nsim) || length(nsim) != 1L || !is.integer(nsim) || nsim <= 0L) {
    stop("nsim must be >= 1L (note the L postfix for integer)")
  }
  
  res <- if (is.null(cluster)) {
    tau_cumsum <- cumsum(object$tau)
    rcpp_simulate(        nsim, object$y, tau_cumsum, object$disclap_parameters)
  } else if (length(cluster) == 1L && cluster >= 1 && cluster <= nrow(object$y)) {
    cluster <- as.integer(cluster)
    rcpp_simulate_cluster(nsim, object$y, cluster, object$disclap_parameters) 
  } else {
    stop("Unexpected")
  }
  
  return(res)
}

INTERNAL_glmfit <- function(loci, clusters, individuals, response_vector, apriori_probs, weight_vector, vmat, 
                            verbose = FALSE, stop_by_deviance = TRUE, epsilon = 1e-4, maxit = 25L) {
  
  converged <- FALSE
  devold <- Inf
  dev <- 0
  
  ks <- 1:loci
  js <- 1:clusters
  
  mi <- min(loci, clusters)
  di <- max(loci, clusters) - mi
  
  ############################################################
  # Initialisation
  ############################################################
  tmp_d <- array(response_vector, c(loci, clusters, individuals))
  
  RkSj <- matrix(0, nrow = loci, ncol = clusters)
  for (k in ks) {
    for (j in js) {
      RkSj[k, j] <- RkSj[k, j] + sum(vmat[, j]*tmp_d[k, j, ])
    }
  }
  
  Rk <- apply(RkSj, 1, sum)
  Sj <- apply(RkSj, 2, sum)
  ###########################################################
  
  d_vec <- as.numeric(response_vector)
  
  beta <- c(rep(0.5, loci), rep(-1, clusters))
  beta_correction <- beta
  beta_correction_old <- beta
  
  lin.pred <- rep(beta[ks], clusters * individuals)
  lin.pred <- lin.pred + rep(rep(beta[-(ks)], each = loci), individuals)
  
  for (iter in 1L:maxit) {
    if (stop_by_deviance) {
      mu_m <- disclapglm_linkinv(lin.pred)
      
      # Deviance:
      dev <- disclapglm_deviance(d_vec, mu_m, weight_vector)
      
      if (verbose == TRUE) {
        cat("  IRLS iteration ", iter, ", deviance = ", dev, "\n", sep = "")
      }
      
      # Stop if devience is NaN
      if (is.null(dev) || is.na(dev) || is.nan(dev) || is.infinite(dev)) {
        converged <- FALSE
        break
      }
      
      if (abs(dev - devold)/(0.1 + abs(dev)) < epsilon) {
        converged <- TRUE
        break
      }
      
      devold <- dev
    } else if (iter > 1) {
      beta_change <- max(abs(beta_correction - beta_correction_old)/(0.1 + abs(beta_correction)))
      
      if (verbose == TRUE) {
        cat("  IRLS iteration ", iter, ", max relative coefficient change = ", beta_change, "\n", sep = "")
      }
      
      if (beta_change < epsilon) {
        converged <- TRUE
        break
      }
      
      beta_correction_old <- beta_correction
    }
    ############################################################################
    
    lin_pred_generic <- outer(beta[ks], beta[(loci+1):(loci+clusters)], "+")
    
    mu_generic <- apply(lin_pred_generic, 2, disclapglm_linkinv)
    if (!is.matrix(mu_generic)) { # == 1 cluster
      mu_generic <- matrix(mu_generic, nrow = loci, ncol = clusters) 
    }
    
    var_generic <- apply(mu_generic, 2, disclapglm_varfunc)    
    if (!is.matrix(var_generic)) { # == 1 cluster
      var_generic <- matrix(var_generic, nrow = loci, ncol = clusters)
    }
    
    apriori_probs_indv <- apriori_probs * individuals
    
    offD <- matrix(0, nrow = loci, ncol = clusters)
    mu_generic_apriori_probs <- matrix(0, nrow = loci, ncol = clusters)
    for (k in ks) {
      offD[k, ] <- apriori_probs_indv * var_generic[k, ]
      mu_generic_apriori_probs[k, ] <- apriori_probs_indv * mu_generic[k, ]
    }
    
    D1 <- apply(offD, 1, sum)
    D2 <- apply(offD, 2, sum)
    y1 <- Rk - apply(mu_generic_apriori_probs, 1, sum)
    y2 <- Sj - apply(mu_generic_apriori_probs, 2, sum)    
    
    H <- D1^-.5*offD
    H <- t(D2^-.5*t(H))
    
    dec <- svd(H, loci, clusters)
    
    OD <- 1-dec$d^2
    OD[1] <- 0.25
    
    if (mi >= 2L) { # > 1 cluster
      OD[2:mi] <- 1/OD[2:mi]
    }
    
    if (loci == mi) {
      dr <- OD
    } else {
      dr <- c(OD, rep(1, di))
    }
    
    if (clusters == mi) {
      ds <- OD
    } else {
      ds <- c(OD, rep(1, di))
    }
    
    dia <- (dec$d-2*dec$d*OD)/(1+dec$d^2)
    if (length(dia) > 1) { # > 1 cluster
      dia <- diag(dia)
    }
    
    K <- matrix(0, loci, clusters)
    
    if (loci <= clusters) {
      K[ks, ks] <- dia
    } else {
      K[1:clusters, 1:clusters] <- dia
    }
    
    if (length(dr) > 1) {
      dr <- diag(dr)
    }
    
    if (length(ds) > 1) {
      ds <- diag(ds)
    }
    
    mI <- matrix(0, loci+clusters, loci+clusters)
    U <- D1^-.5*dec$u
    V <- D2^-.5*dec$v
    
    mI[ks, ks] <- U %*% dr %*% t(U)
    mI[ks, (loci+1):(loci+clusters)] <- U %*% K %*% t(V)
    mI[(loci+1):(loci+clusters), ks] <- t(mI[ks,(loci+1):(loci+clusters)])
    mI[(loci+1):(loci+clusters), (loci+1):(loci+clusters)] <- V %*% ds %*% t(V)  
    beta_correction <- mI %*% c(y1, y2)
    lin_pred_correction <- rep(beta_correction[ks, 1], clusters * individuals)
    lin_pred_correction <- lin_pred_correction + rep(rep(beta_correction[(loci+1):(loci+clusters), 1], each = loci), individuals)
    ##################    
    beta <- beta + beta_correction
    lin.pred <- lin.pred + lin_pred_correction
    ##################
  }
  
  coefficients <- as.numeric(beta)
  
  if (stop_by_deviance == FALSE) {
    mu_m <- disclapglm_linkinv(lin.pred)
    dev <- disclapglm_deviance(d_vec, mu_m, weight_vector)
  }
  
  ans <- list(
    coefficients = coefficients,
    converged = converged,
    deviance = dev,
    linear.predictors = lin.pred,
    P = mI
  )
  
  return(ans)
}
mikldk/disclapmix documentation built on Aug. 22, 2023, 10:56 a.m.