R/gom_ml.R

#' Grade of Membership Mixture Model
#'
#' This function takes a data object and creates a fixed, small set of extreme profiles, each characterized
#' by the probabilities of answering a category of each question, along with a set of scores (or grades
#' of membership) that measures each subject’s degree of similarity to each extreme profile (reference group).
#'
#'
#' @param data.object A data frame or an object that can be coerced to a data frame.
#' @param initial.K An integer indicating the initial number of the set of pure type probabilities to be estimated.
#' @param final.K An integer indicating the largest number of the set of pure type probabilities to be estimated.
#' @param gamma.algorithm A character that specifies the algorithm to be used to estimate the gamma values.
#' @param initial.gamma A character that specifies how the initial gamma values should be specified. If "gamma.object", then initial.gamma.object is needed.
#' @param initial.gamma.object A data frame with the initial gamma values for each observation.
#' @param gamma.fit A logical value indicating if the gamma values need to be estimated.
#' @param lambda.algorithm A character that specifies the algorithm to be used to estimate the lambda values.
#' @param initial.lambda A character that specifies how the initial lambda values should be specified. If "lambda.object", then initial.lambda.object is needed.
#' @param initial.lambda.object An array with the initial lambda values for each category of each variable.
#' @param lambda.fit A logical value indicating if the lambda values need to be estimated.
#' @param case.id A character with the name of the variable with each observation ID.
#' @param case.weight A character with the name of the variable which contains the weight for each observation.
#' @param internal.var A character vector with the name of the variables to be used by the model.
#' @param order.K A logical value indicating if the lambdas should be organized with 99 percent confidence interval.
#' @param omega.fit A logical value specifying if the model should generate missing patterns of the data before compiling.
#' @param dec.char A character with the decimal symbol to be used in the output files.
#' @param MC_iter The number of iterations to be used within the Monte Carlo Simulation.
#'
#' @return gom_ml saves two files, one containing a table with unique data configuration along with the grades of membership, and another file
#' with the initial and final pure type probabilities along with some summary statistics. The gom_ml function also returns an object of class \emph{gom_ml} with the following components:
#'
#' \describe{
#'   \item{Data}{A data frame with the original data given by the user and the gamma for each observation.}
#'   \item{Pkjl}{An array with the pure type probabilities. The array dimensions equal the number of extreme profiles, variables and categories, respectively.}
#'   \item{Likelihood}{The maximum log-likelihood achieved by the model.}
#'   \item{AIC}{The Akaike Information Criterion.}
#'   \item{Table}{A table with the posterior lambdas organized by variables and their categories.}
#' }
#' @export
#' @examples
#'
#' data <- data.frame(x1 = round(stats::runif(n = 100, 1, 2), 0),
#'                    x2 = round(stats::runif(n = 100, 1, 3), 0),
#'                    Id = 1:100)
#'
#' gom_ml(data.object = data, case.id = "Id", initial.lambda = "random", MC_iter = 300)
#'
gom_ml <- function (data.object = NULL,
                    initial.K = 2, final.K = initial.K,
                    gamma.algorithm = c("gradient.1992", "woodbury.1974"),
                    initial.gamma = c("equal.values", "random", "pure1", "gamma.object"),
                    initial.gamma.object = NULL,
                    gamma.fit = TRUE,
                    lambda.algorithm = c("gradient.1992", "woodbury.1974"),
                    initial.lambda = c("random", "pure1", "equal.values", "lambda.object"),
                    initial.lambda.object = NULL,
                    lambda.fit = TRUE,
                    case.id = NA,
                    case.weight = NA,
                    internal.var = NULL,
                    order.K = TRUE,
                    omega.fit = FALSE,
                    dec.char = ".",
                    MC_iter = 1000) {

  data.object <- as.data.frame(data.object)
  tempdir <- tempdir()

  GoM <- '
    using namespace std;
    using namespace Rcpp;

    const int MAXITER_MODEL = 500;
    const int MAXITER_PARAMETERS = 25;

    const double ZERO = 1.0E-20;
    const double REALBIG = 1.0E+30;
    const double CTOL = 1.0E-07;

    IntegerVector   baselevel       (baselevel_);
    IntegerVector   ljlevels        (ljlevels_);
    NumericMatrix   cell            (cell_);
    CharacterVector gammaalgorithm  (gammaalgorithm_);
    NumericMatrix   G               (FG_);
    LogicalVector   gammafit        (gammafit_);
    CharacterVector lambdaalgorithm (lambdaalgorithm_);
    NumericVector   P               (FP_);
    LogicalVector   lambdafit       (lambdafit_);
    IntegerMatrix   FITP            (FITP_);

    int I = (cell.nrow() - 1);
    int J = (cell.ncol() - 2);
    int K = (G.ncol() - 1);

    int i, k, k_k, j, l, iter, subiter;
    double ploglik, difflik;
    double sumlik, p_ijl, part, partlik, newpartlik, sumG, sumP, startlik, curlik;

    double old_G[(K + 1)];
    double new_G[(K + 1)];

    NumericVector old_P(clone(FP_));
    NumericVector new_P(clone(FP_));

    vector<double> loglik(2);
    char buffer[255];

    // ## WOODBURY VARIABLES ####################

    double numer, denom;

    // ##########################################

    // ## GRADIENT VARIABLES ####################

    const int HALFSTEPS = MAXITER_PARAMETERS;

    const double ITOL = 0.0005;

    const double MAXSTEP = 1.0;
    const double MINSTEP = ZERO;

    int l_l, converged, halfstep, somefree;

    double f0, f1, norm, g_ij, bestlik, stepsize;

    int    freeG[(K + 1)];
    double dL_dG[(K + 1)];

    NumericVector dL_dP(clone(FP_));
    NumericVector freeP(clone(FP_));

    // ##########################################

    loglik[0] = 0.0;
    ploglik = loglik[0];
    // ## BEGIN loglikelihood FUNCTION ## //
    sumlik = 0.0;
    for (i = 1; i <= I; i++) {
      partlik = 0.0;
      for (j = 1; j <= J; j++) {
        l = cell(i, j) + 1 - baselevel(j);
        p_ijl = 0.0;
        for (k = 1; k <= K; k++) {
          p_ijl += (G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)));
        }
        if (p_ijl < ZERO) {
          p_ijl = ZERO;
        }
        if (p_ijl < REALBIG) {
          partlik += log(p_ijl);
        }
      }
      sumlik += (cell(i, (J + 1)) * partlik);
    }
    // ## END loglikelihood FUNCTION ## //
    loglik[0] = sumlik;
    sprintf(buffer, "%-10.4f", loglik[0]);
    Rcout << "Primal Log-Likelihood is:\t" << buffer << endl << endl;
    loglik[1] = loglik[0];
    for (iter = 0; iter < MAXITER_MODEL; iter++) {
     difflik = (loglik[1] - ploglik);
     ploglik = loglik[1];
     if (iter) {
      if (fabs(difflik / loglik[1]) < CTOL) {
       break;
      }
     }
     if (gammafit[0] && gammaalgorithm[0] == "woodbury.1974") {
      // ## BEGIN Fit_G_Woodbury_1974 FUNCTION ## //
      subiter = 0;
      for (i = 1; i <= I; i++) {
       for (k = 1; k <= K; k++) {
        old_G[k] = G(i, k);
       }
       // ## BEGIN partlikelihood FUNCTION ## //
       part = 0.0;
       for (j = 1; j <= J; j++) {
        l = cell(i, j) + 1 - baselevel[j];
        p_ijl = 0.0;
        for (k = 1; k <= K; k++) {
         p_ijl += (G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)));
        }
        if (p_ijl < ZERO) {
         p_ijl = ZERO;
        }
        if (p_ijl < REALBIG) {
         part += log(p_ijl);
        }
       }
       // ## END partlikelihood FUNCTION ## //
       partlik = part;
       for (subiter = 0; subiter < MAXITER_PARAMETERS; subiter++) {
        for (k = 1; k <= K; k++) {
         new_G[k] = 0.0;
         for (j = 1; j <= J; j++) {
          l = cell(i, j) + 1 - baselevel[j];
          p_ijl = 0.0;
          for (k_k = 1; k_k <= K; k_k++) {
           p_ijl += (G(i, k_k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k_k)));
          }
          if (p_ijl > ZERO) {
           new_G[k] += ((G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k))) / p_ijl);
          }
         }
        }
        for (k = 1; k <= K; k++) {
         G(i, k) = (new_G[k] / J);
        }
        // ## BEGIN rescale_G FUNCTION ## //
        sumG = 0.0;
        for (k = 1; k <= K; k++) {
         if (G(i, k) < ZERO) {
          G(i, k) = 0.0;
         }
         sumG += G(i, k);
        }
        if (sumG < ZERO) {
         sumG = (double)K;
          for (k = 1; k <= K; k++) {
           G(i, k) = 1.0;
          }
        }
        for (k = 1; k <= K; k++) {
         G(i, k) = (G(i, k) / sumG);
        }
        // ## END rescale_G FUNCTION ## //
        // ## BEGIN partlikelihood FUNCTION ## //
        part = 0.0;
        for (j = 1; j <= J; j++) {
         l = cell(i, j) + 1 - baselevel[j];
         p_ijl = 0.0;
         for (k = 1; k <= K; k++) {
          p_ijl += (G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)));
         }
         if (p_ijl < ZERO) {
          p_ijl = ZERO;
         }
         if (p_ijl < REALBIG) {
          part += log(p_ijl);
         }
        }
        // ## END partlikelihood FUNCTION ## //
        newpartlik = part;
        if (newpartlik < partlik) {
         for (k = 1; k <= K; k++) {
          G(i, k) = old_G[k];
         }
         break;
        } else {
         if (partlik > ZERO) {
          if ((fabs(newpartlik - partlik) / partlik) < CTOL) {
           break;
          }
         }
        }
       }
      }
      sprintf(buffer, "%03d.%03d%s%015.5f%s%015.7f", iter, subiter, "\t", loglik[1], "\t", fabs((loglik[1] - ploglik) / ploglik));
      Rcout << "Fit G (Woodbury 1974):\t" << buffer << endl << endl;
      // ## END Fit_G_Woodbury_1974 FUNCTION ## //
     } else if (gammafit[0] && gammaalgorithm[0] == "gradient.1992") {
      // ## BEGIN Fit_G_Gradient_1992 FUNCTION ## //
      subiter = 0;
      startlik = loglik[1];
      for (i = 1; i <= I; i++) {
       converged = 0;
       for (subiter = 0; converged == 0; subiter++) { //##//
        // ## BEGIN cellgradient_G FUNCTION ## //
        norm = 0.0;
        for (k = 1; k <= K; k++) {
         dL_dG[k] =- (double)J;
         for (j = 1; j <= J; j++) {
          l = cell(i, j) + 1 - baselevel(j);
          p_ijl = 0.0;
          for (k_k = 1; k_k <= K; k_k++) {
           p_ijl = p_ijl + (G(i, k_k) * P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k_k));
          }
          if (p_ijl > ZERO) {
           dL_dG[k] += (P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) / p_ijl);
          }
         }
         if ((G(i, k) <= ZERO && dL_dG[k] < 0.0) || ((1.0 - G(i, k)) <= ZERO && dL_dG[k] > 0.0)) {
          freeG[k] = 0;
         } else {
          freeG[k] = 1;
          norm += (dL_dG[k] * dL_dG[k]);
         }
        }
        // ## END cellgradient_G FUNCTION ## //
        if (norm <= ZERO) {
         converged = 1;
         break;
        }
        somefree = 0;
        for (k = 1; k <= K; k++) {
         old_G[k] = G(i, k);
         new_G[k] = G(i, k);
         if (freeG[k]) {
          somefree++;
         }
        }
        if (!somefree) {
         break;
        }
        // ## BEGIN partlikelihood FUNCTION ## //
        part = 0.0;
        for (j = 1; j <= J; j++) {
         l = cell(i, j) + 1 - baselevel[j];
         p_ijl = 0.0;
         for (k = 1; k <= K; k++) {
          p_ijl += (G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)));
         }
         if (p_ijl < ZERO) {
          p_ijl = ZERO;
         }
         if (p_ijl < REALBIG) {
          part += log(p_ijl);
         }
        }
        // ## END partlikelihood FUNCTION ## //
        f0 = part;
        stepsize = MAXSTEP;
        bestlik = f0;
        for (halfstep = 0; halfstep < HALFSTEPS && stepsize > MINSTEP; halfstep++) {
         for (k = 1;k <= K; k++) {
          G(i, k) = old_G[k] + stepsize * dL_dG[k];
         }
         // ## BEGIN rescale_G FUNCTION ## //
         sumG = 0.0;
         for (k = 1; k <= K; k++) {
          if (G(i, k) < ZERO) {
           G(i, k) = 0.0;
          }
          sumG += G(i, k);
         }
         if (sumG < ZERO) {
          sumG = (double)K;
          for (k = 1; k <= K; k++) {
           G(i, k) = 1.0;
          }
         }
         for (k = 1; k <= K; k++) {
          G(i, k) = (G(i, k) / sumG);
         }
         // ## END rescale_G FUNCTION ## //
         // ## BEGIN partlikelihood FUNCTION ## //
         part = 0.0;
         for (j = 1; j <= J; j++) {
          l = cell(i, j) + 1 - baselevel[j];
          p_ijl = 0.0;
          for (k = 1; k <= K; k++) {
           p_ijl += (G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)));
          }
          if (p_ijl < ZERO) {
           p_ijl = ZERO;
          }
          if (p_ijl < REALBIG) {
           part += log(p_ijl);
          }
         }
         // ## END partlikelihood FUNCTION ## //
         f1 = part;
         if (f1 > bestlik) {
          bestlik = f1;
          for (k = 1; k <= K; k++) {
           new_G[k] = G(i, k);
          }
         } else {
          if (bestlik > (f0 + ZERO) && bestlik > (f1 + ZERO)) {
           break;
          }
         }
         stepsize = (stepsize / 2.0);
        }
        f1 = bestlik;
        for (k = 1;k <= K; k++) {
         G(i, k) = new_G[k];
        }
        if (fabs(f0) > ZERO && fabs((f1 - f0) / f0) < CTOL) {
         converged = 1;
        }
        if (fabs(f0) > ZERO && fabs((f1-f0)/f0) < ITOL && subiter > MAXITER_PARAMETERS) {
         break;
        }
       }
      }
      sprintf(buffer, "%03d.%03d%s%015.5f%s%015.7f", iter, subiter, "\t", loglik[1], "\t", fabs((loglik[1] - ploglik) / ploglik));
      Rcout << "Fit G (Gradient 1992):\t" << buffer << endl << endl;
      // ## END Fit_G_Gradient_1992 FUNCTION ## //
     }
     // ## BEGIN loglikelihood FUNCTION ## //
     sumlik = 0.0;
     for (i = 1; i <= I; i++) {
      partlik = 0.0;
      for (j = 1; j <= J; j++) {
       l = cell(i, j) + 1 - baselevel(j);
       p_ijl = 0.0;
       for (k = 1; k <= K; k++) {
        p_ijl += (G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)));
       }
       if (p_ijl < ZERO) {
        p_ijl = ZERO;
       }
       if (p_ijl < REALBIG) {
        partlik += log(p_ijl);
       }
      }
      sumlik += (cell(i, (J + 1)) * partlik);
     }
     // ## END loglikelihood FUNCTION ## //
     loglik[1] = sumlik;
     if (lambdafit[0] && lambdaalgorithm[0] == "woodbury.1974") {
     // ## BEGIN Fit_P_Woodbury_1974 FUNCTION ## //
     startlik = loglik[1];
     for (subiter = 0; subiter < MAXITER_PARAMETERS; subiter++) {
      for (k = 1; k <= K; k++) {
       for (j = 1; j <= J; j++) {
        for (l = 1; l <= ljlevels(j); l++) {
         old_P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) = P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k));
        }
       }
      }
      for (k = 1; k <= K; k++) {
       for (j = 1; j <= J; j++) {
        if (FITP(k, j)) {
         for (l = 1; l <= ljlevels(j); l++) {
          numer = 0.0;
          denom = 0.0;
          for (i = 1; i <= I; i++) {
           p_ijl = 0.0;
           for (k_k = 1; k_k <= K; k_k++) {
            p_ijl += (G(i, k_k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k_k)));
           }
           if (p_ijl > ZERO) {
            g_ij = ((G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k))) / p_ijl);
           } else {
            g_ij = 0.0;
           }
           denom += g_ij;
           if (l == (cell(i, j) + 1 - baselevel(j))) {
            numer += g_ij;
           }
          }
          if (denom > ZERO) {
           new_P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) = (numer / denom);
          } else {
           new_P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) = 0.0;
          }
         }
        }
       }
      }
      for (k = 1; k <= K; k++) {
       for (j = 1; j <= J; j++) {
        if (FITP(k, j)) {
         for (l = 1; l <= ljlevels(j); l++) {
          P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) = new_P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k));
         }
         // ## BEGIN rescale_P FUNCTION ## //
         sumP = 0.0;
         for (l = 1; l <= ljlevels(j); l++) {
          if (P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) < ZERO) {
           P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) = 0.0;
          }
          sumP += P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k));
         }
         if (sumP < ZERO) {
          sumP = (double)ljlevels(j);
          for (l = 1; l <= ljlevels(j); l++) {
           P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) = 1.0;
          }
         }
         for (l = 1; l <= ljlevels(j); l++) {
          P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) = (P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) / sumP);
         }
         // ## END rescale_P FUNCTION ## //
        }
       }
      }
      // ## BEGIN loglikelihood FUNCTION ## //
      sumlik = 0.0;
      for (i = 1; i <= I; i++) {
       partlik = 0.0;
       for (j = 1; j <= J; j++) {
        l = cell(i, j) + 1 - baselevel(j);
        p_ijl = 0.0;
        for (k = 1; k <= K; k++) {
         p_ijl += (G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)));
        }
        if (p_ijl < ZERO) {
         p_ijl = ZERO;
        }
        if (p_ijl < REALBIG) {
         partlik += log(p_ijl);
        }
       }
       sumlik += (cell(i, (J + 1)) * partlik);
      }
      // ## END loglikelihood FUNCTION ## //
      curlik = sumlik;
      sprintf(buffer, "%03d.%03d%s%015.5f%s%015.7f", iter, subiter, "\t", startlik, "\t", fabs((startlik - ploglik) / ploglik));
      Rcout << "Fit P (Woodbury 1974):\t" << buffer << endl;
      if (curlik < startlik) {
       for (k = 1; k <= K; k++) {
        for (j = 1; j <= J; j++) {
         for (l = 1; l <= ljlevels(j); l++) {
          P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) = old_P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k));
         }
        }
       }
       loglik[1] = startlik;
       break;
      } else {
       if ((startlik > ZERO) && (fabs(curlik - startlik) / startlik) < CTOL) {
        break;
       }
      }
      startlik = curlik;
     }
     Rcout << endl;
     // ## END Fit_P_Woodbury_1974 FUNCTION ## //
     } else if (lambdafit[0] && lambdaalgorithm[0] == "gradient.1992") {
      // ## BEGIN Fit_P_Gradient_1992 FUNCTION ## //
      converged = 0;
      for (subiter = 0; converged == 0; subiter++) { //##//
       // ## BEGIN gradient_P FUNCTION ## //
       norm = 0.0;
       for (k = 1; k <= K; k++) {
        for (j = 1; j <= J; j++) {
         for (l = 1; l <= ljlevels(j); l++) {
          dL_dP(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) = 0.0;
          for (i = 1; i <= I; i++) {
           l_l = cell(i, j) + 1 - baselevel(j);
           p_ijl = 0.0;
           for (k_k = 1; k_k <= K; k_k++) {
            p_ijl += (G(i, k_k) * P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k_k));
           }
           if (l == l_l) {
            if (p_ijl > ZERO) {
             dL_dP(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) += (cell(i, (J + 1)) * G(i, k) * ((1.0 / p_ijl) - 1.0));
            }
           } else {
            dL_dP(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) -= (cell(i, (J + 1)) * G(i, k));
           }
          }
          if (((P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) <= ZERO) && (dL_dP(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) < 0.0)) || (((1.0 - P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) <= ZERO) && (dL_dP(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) > 0.0))) {
           freeP(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) = 0;
          } else {
           freeP(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) = 1;
           norm += dL_dP(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) * dL_dP(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k);
          }
         }
        }
       }
       // ## END gradient_P FUNCTION ## //
       if (norm <= ZERO) {
        converged = 1;
        break;
       }
       somefree = 0;
       for (k = 1; k <= K; k++) {
        for (j = 1; j <= J; j++) {
         for (l = 1; l <= ljlevels(j); l++) {
          old_P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) = P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k);
          new_P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) = P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k);
          if (freeP(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) {
           somefree++;
          }
         }
        }
       }
       if (!somefree) {
        break;
       }
       stepsize = MAXSTEP;
       // ## BEGIN loglikelihood FUNCTION ## //
       sumlik = 0.0;
       for (i = 1; i <= I; i++) {
        partlik = 0.0;
        for (j = 1; j <= J; j++) {
         l = cell(i, j) + 1 - baselevel(j);
         p_ijl = 0.0;
         for (k = 1; k <= K; k++) {
          p_ijl += (G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)));
         }
         if (p_ijl < ZERO) {
          p_ijl = ZERO;
         }
         if (p_ijl < REALBIG) {
          partlik += log(p_ijl);
         }
        }
        sumlik += (cell(i, (J + 1)) * partlik);
       }
       // ## END loglikelihood FUNCTION ## //
       f0 = sumlik;
       bestlik = f0;
       for (halfstep = 0; halfstep < HALFSTEPS && converged == 0; halfstep++) {
        for (k = 1; k <= K; k++) {
         for (j = 1; j <= J; j++) {
          if (FITP(k, j)) {
           for (l = 1; l <= ljlevels(j); l++) {
            P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) = old_P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) + stepsize * dL_dP(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k);
           }
           // ## BEGIN rescale_P FUNCTION ## //
           sumP = 0.0;
           for (l = 1; l <= ljlevels(j); l++) {
            if (P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) < ZERO) {
             P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) = 0.0;
            }
            sumP += P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k));
           }
           if (sumP < ZERO) {
            sumP = (double)ljlevels(j);
            for (l = 1; l <= ljlevels(j); l++) {
             P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) = 1.0;
            }
           }
           for (l = 1; l <= ljlevels(j); l++) {
            P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) = (P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)) / sumP);
           }
           // ## END rescale_P FUNCTION ## //
          }
         }
        }
        // ## BEGIN loglikelihood FUNCTION ## //
        sumlik = 0.0;
        for (i = 1; i <= I; i++) {
         partlik = 0.0;
         for (j = 1; j <= J; j++) {
          l = cell(i, j) + 1 - baselevel(j);
          p_ijl = 0.0;
          for (k = 1; k <= K; k++) {
           p_ijl += (G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)));
          }
          if (p_ijl < ZERO) {
           p_ijl = ZERO;
          }
          if (p_ijl < REALBIG) {
           partlik += log(p_ijl);
          }
         }
         sumlik += (cell(i, (J + 1)) * partlik);
        }
        // ## END loglikelihood FUNCTION ## //
        f1 = sumlik;
        if (f1 > bestlik) {
         bestlik = f1;
         for (k = 1; k <= K; k++) {
          for (j = 1; j <= J; j++) {
           for (l = 1; l <= ljlevels(j); l++) {
            new_P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) = P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k);
           }
          }
         }
        } else {
         if ((bestlik > (f0 + ZERO)) && (bestlik > (f1 + ZERO))) {
          break;
         }
        }
        stepsize = (stepsize / 2.0);
       }
       f1 = bestlik;
       loglik[1] = bestlik;
       for (k = 1; k <= K; k++) {
        for (j = 1; j <= J; j++) {
         for (l = 1; l <= ljlevels(j); l++) {
          P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k) = new_P(((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k);
         }
        }
       }
       sprintf(buffer, "%03d.%03d%s%015.5f%s%015.7f", iter, subiter, "\t", loglik[1], "\t", fabs((loglik[1] - ploglik) / ploglik));
       Rcout << "Fit P (Gradient 1992):\t" << buffer << endl;
       if ((fabs(f0) > ZERO) && (fabs((f1 - f0) / f0) < CTOL)) {
        converged = 1;
       }
       if ((fabs(f0) > ZERO) && (fabs((f1 - f0) / f0) < ITOL) && (subiter >= MAXITER_PARAMETERS)) {
        break;
       }
      }
      Rcout << endl;
      // ## END Fit_P_Gradient_1992 FUNCTION ## //
     }
    }
    // ## BEGIN loglikelihood FUNCTION ## //
    sumlik = 0.0;
    for (i = 1; i <= I; i++) {
     partlik = 0.0;
     for (j = 1; j <= J; j++) {
      l = cell(i, j) + 1 - baselevel(j);
      p_ijl = 0.0;
      for (k = 1; k <= K; k++) {
       p_ijl += (G(i, k) * P((((l * ((J + 1) * (K + 1))) + ((K + 1) * j)) + k)));
      }
      if (p_ijl < ZERO) {
       p_ijl = ZERO;
      }
      if (p_ijl < REALBIG) {
       partlik += log(p_ijl);
      }
     }
     sumlik += (cell(i, (J + 1)) * partlik);
    }
    // ## END loglikelihood FUNCTION ## //
    loglik[1] = sumlik;
    sprintf(buffer, "%-10.4f", loglik[1]);
    Rcout << "Latter Log-Likelihood is:\t" << buffer << endl;
    return(wrap(loglik));
  '

  #####################BEGIN verify.parameters function#


  GoM_Model <- inline::cxxfunction (methods::signature(
    baselevel_ = "numeric",
    ljlevels_ = "numeric",
    cell_ = "matrix",
    gammaalgorithm_ = "character",
    FG_ = "matrix",
    gammafit_ = "bool",
    lambdaalgorithm_ = "character",
    FP_ = "numeric",
    lambdafit_ = "bool",
    FITP_ = "matrix"),
    body = GoM,
    includes = "#include <cstdio>",
    plugin = "Rcpp")


  verify.parameters <- function (case.id,
                                 case.weight,
                                 data.object,
                                 internal.var,
                                 initial.gamma, initial.lambda,
                                 gamma.algorithm, lambda.algorithm,
                                 initial.gamma.object, initial.lambda.object) {
    if (!(gamma.algorithm %in% c("woodbury.1974", "gradient.1992"))) {
      stop("The gamma.algorithm information is wrong ...")
    }
    if (!(initial.gamma %in% c("random", "pure1", "equal.values", "gamma.object"))) {
      stop("The initial.gamma information is wrong ...")
    }
    if (initial.gamma == c("gamma.object") & missing(initial.gamma.object)) {
      stop("The initial GoM scores object is missing ...")
    }
    if (!(lambda.algorithm %in% c("woodbury.1974", "gradient.1992"))) {
      stop("The lambda.algorithm information is wrong ...")
    }
    if (!(initial.lambda %in% c("random", "pure1", "equal.values", "lambda.object"))) {
      stop("The initial.lambda information is wrong ...")
    }
    if (initial.lambda == c("lambda.object") & missing(initial.lambda.object)) {
      stop("The initial pure type probabilities object is missing ...")
    }
    if (is.na(case.id) | !(case.id %in% names(data.object))) {
      stop("The case.id is missing ...")
    }
    if (!is.na(case.weight) && !(case.weight %in% names(data.object))) {
      stop("The case.weight is missing ...")
    }
    if (missing(internal.var)) {
      stop("The internal.var is missing ...")
    } else if (length(internal.var[(c(internal.var) %in% names(data.object) == FALSE)]) > 0) {
      stop("The internal.var information is wrong ...")
    }
  }
  #####################END verify.parameters function#

  #####################BEGIN summary.parameters function#
  summary.parameters <- function (case.id,
                                  case.weight,
                                  data.object,
                                  internal.var,
                                  initial.K, final.K,
                                  initial.gamma, initial.lambda,
                                  gamma.algorithm, lambda.algorithm,
                                  gamma.fit, lambda.fit, order.K, omega.fit) {
    cat(paste("\n*GoMRcpp Summary***********************\n", sep = "", collapse = NULL))
    cat(paste("Input data object---------------------: ", "From input data object", "\n", sep = "", collapse = NULL))
    cat(paste("Number of profiles in initial model---: ", initial.K, "\n", sep = "", collapse = NULL))
    cat(paste("Number of profiles in final model-----: ", final.K, "\n", sep = "", collapse = NULL))
    if(gamma.fit == TRUE) {
      cat(paste("GoM scores algorithm------------------: ", gamma.algorithm, "\n", sep = "", collapse = NULL))
    } else {
      cat(paste("GoM scores algorithm------------------: None\n", sep = "", collapse = NULL))
    }
    if (initial.gamma == "gamma.object") {
      cat(paste("Initial GoM scores--------------------: From input gamma object\n", sep = "", collapse = NULL))
    } else {
      cat(paste("Initial GoM scores--------------------: ", initial.gamma, "\n", sep = "", collapse = NULL))
    }
    cat(paste("Fit GoM scores------------------------: ", gamma.fit, "\n", sep = "", collapse = NULL))
    if (lambda.fit == TRUE) {
      cat(paste("Pure type probabilities algorithm-----: ", lambda.algorithm, "\n", sep = "", collapse = NULL))
    } else {
      cat(paste("Pure type probabilities algorithm-----: None\n", sep = "", collapse = NULL))
    }
    if (initial.lambda == "lambda.object") {
      cat(paste("Initial pure type probabilities-------: From input lambda object\n", sep = "", collapse = NULL))
    } else {
      cat(paste("Initial pure type probabilities-------: ", initial.lambda, "\n", sep = "", collapse = NULL))
    }
    cat(paste("Fit pure type probabilities-----------: ", lambda.fit, "\n", sep = "", collapse = NULL))
    cat(paste("Sort profiles-------------------------: ", order.K, "\n", sep = "", collapse = NULL))
    cat(paste("Records in data object----------------: ", nrow(data.object), "\n", sep = "", collapse = NULL))
    if (omega.fit == TRUE) {
      cat(paste("Unique data patterns------------------: All patterns", "\n", sep = "", collapse = NULL))
    } else {
      cat(paste("Unique data patterns------------------: From input data object", "\n", sep = "", collapse = NULL))
    }
    cat(paste("Case label----------------------------: ", case.id, "\n", sep = "", collapse = NULL))
    if (!is.na(case.weight)) {
      cat(paste("Case weight---------------------------: ", case.weight, "\n", sep = "", collapse = NULL))
    } else {
      cat(paste("Case weight---------------------------: None\n", sep = "", collapse = NULL))
    }
    cat("Internal variables--------------------:", c(internal.var), "\n")
    if (length(names(data.object)[(names(data.object) %in% c(c(case.id), c(case.weight), c(internal.var)) == FALSE)] > 0)) {
      external.var <- names(data.object)[(names(data.object) %in% c(c(case.id), c(case.weight), c(internal.var)) == FALSE)]
    } else {
      external.var <- c("------")
    }
    cat("External variables--------------------:", external.var, "\n")
  }
  #####################END summary.parameters function#

  #####################BEGIN cell.data function#
  cell.data <- function (data.object, case.weight, internal.var, omega.fit) {
    cell <- data.frame(lapply(data.object[, internal.var], as.factor))
    cell <- data.frame(lapply(cell[, internal.var], as.numeric))
    cell <- data.frame(lapply(cell[, internal.var], as.factor))
    if (!is.na(case.weight)) {
      cell[c(case.weight)] <- data.frame(sapply(data.object[, case.weight], as.numeric))
    }
    if (max(mapply(nlevels, cell[, c(internal.var)])) == 2) {
      cat(paste("\n*Note: All internal variables are dichotomous.\n", sep = "", collapse = NULL))
    }
    cell <- cell[do.call(order, cell[c(internal.var)]), ]
    cell$Patterns <- do.call(paste, c(as.list(cell[c(internal.var)]), sep=""))
    if (omega.fit == TRUE) {
      cellomega <- as.data.frame(stats::ftable(cell[c(internal.var)]))
      cellomega$Freq <- as.numeric(cellomega$Freq + 1)
      cellomega <- cellomega[do.call(order, cellomega[c(internal.var)]), ]
      cellomega$Patterns <- do.call(paste, c(as.list(cellomega[c(internal.var)]), sep=""))
      if (is.na(case.weight)) {
        cell <- cellomega[, c(c("Patterns"), c(internal.var), c("Freq"))]
      } else if (!is.na(case.weight)) {
        cellStringFreq <- stats::aggregate(cell[c(case.weight)], list(cell$Patterns), FUN = sum, simplify = TRUE)
        names(cellStringFreq) <- c("Patterns", "Freqomega")
        cellomega <- merge(cellomega, cellStringFreq, by = c("Patterns"), all.x = TRUE)
        cellomega$Freqomega <- as.numeric(cellomega$Freqomega + 1)
        cellomega$Freqomega[is.na(cellomega$Freqomega)] <- 1
        cell <- cellomega[, c(c("Patterns"), c(internal.var), c("Freqomega"))]
        names(cell) <- c("Patterns", c(internal.var), "Freq")
      }
      cat(paste("\n*Note: ", nrow(cell), " unique data patterns has found for combinations of the all patterns.\n\n", sep = "", collapse = NULL))
    }
    if (omega.fit == FALSE) {
      if (is.na(case.weight)) {
        cell$FreqCell <- sequence(rle(cell$Patterns)$lengths)
        cellStringFreq <- stats::aggregate(cell$FreqCell, list(cell$Patterns), FUN = max, simplify = TRUE)
        names(cellStringFreq) <- c("Patterns", "Freq")
      } else if (!is.na(case.weight)) {
        cellStringFreq <- stats::aggregate(cell[c(case.weight)], list(cell$Patterns), FUN = sum, simplify = TRUE)
        names(cellStringFreq) <- c("Patterns", "Freq")
      }
      cell <- merge(cell, cellStringFreq, by = c("Patterns"))
      cell <- cell[, c(c("Patterns"), c(internal.var), c("Freq"))]
      cell <- unique(cell)
      cat(paste("\n*Note: ", nrow(cell), " unique data patterns has found in data object.\n\n", sep = "", collapse = NULL))
    }
    cell <- rbind(rep(NA, (length(c(internal.var)) + 2)), cell)
    row.names(cell) <- NULL
    return (cell)
  }
  #####################END cell.data function#

  #####################BEGIN ljlevels.function function#
  ljlevels.function <- function (cell, internal.var) {
    ljlevels <- sapply(c(c(NA), c(cell[c(internal.var)])), nlevels)
    return (ljlevels)
  }
  #####################END ljlevels.function function#

  #####################BEGIN l.levels.j.function function#
  l.levels.j.function <- function (cell, internal.var) {
    l.levels.j <- (sapply(sapply(c(c(NA), c(cell[c(internal.var)])), levels), as.numeric))
    for (i in 1:length(internal.var)) {
      if (min(l.levels.j[[i+1]]) < 1) {
        stop(paste0("\n\nThe data.object can only encompass codes values from 1 (one):\n\n",
                    "\tTherefore, you must to avoid to use the 0 (zero) code ...\n\n\n"))
      }
    }
    return (l.levels.j)
  }
  #####################END l.levels.j.function function#

  #####################BEGIN parameter.FG function#
  parameter.FG <- function (initial.K, initial.gamma, cell, initial.gamma.object) {
    if (initial.gamma == c("random")) {
      FG <- as.data.frame(matrix(NA, nrow(cell), initial.K, byrow = T))
      for (i in 1:nrow(cell)) {
        ki <- sample(c(1:initial.K), initial.K, replace = FALSE, prob = NULL)
        random.gamma <- c(rep(as.double(0), initial.K))
        for (k in 1:initial.K) {
          if (k == 1) {
            random.gamma[k] <- as.double(stats::runif(1, min = as.double(0), max = as.double(1)))
          } else {
            random.gamma[k] <- as.double(stats::runif(1, min = as.double(0), max = (as.double(1) - sum(random.gamma))))
          }
        }
        for (k in 1:initial.K) {
          if (k == 1) {
            FG[i, ki[k]] <- random.gamma[k]
          } else if (k != 1) {
            if (sum(FG[i, 1:initial.K], random.gamma[k], na.rm = TRUE) <= as.double(1)) {
              if (k != length(ki)) {
                FG[i, ki[k]] <- random.gamma[k]
              } else if (k == length(ki)) {
                FG[i, ki[k]] <- (as.double(1) - (sum(FG[i, 1:initial.K], na.rm = TRUE)))
              }
            } else if (sum(FG[i, 1:initial.K], random.gamma[k], na.rm = TRUE) > as.double(1)) {
              FG[i, ki[k]] <- (as.double(1) - (sum(FG[i, 1:initial.K], na.rm = TRUE)))
            }
          }
        }
      }
    } else if (initial.gamma == c("pure1")) {
      FG <- as.data.frame(matrix(c(as.double(1), rep(as.double(0), (initial.K - 1))), nrow(cell), initial.K, byrow = T))
    } else if (initial.gamma == c("equal.values")) {
      FG <- as.data.frame(matrix(c(as.double(1) / initial.K), nrow(cell), initial.K, byrow = T))
    } else if (initial.gamma == c("gamma.object")) {
      FG <- initial.gamma.object
    }
    if (initial.gamma != c("gamma.object")) {
      FG <- cbind(rep(NA, nrow(cell)), FG)
      FG[1, ] <- NA
    } else {
      FG <- cbind(rep(NA, (nrow(cell) - 1)), FG)
      FG <- rbind(rep(NA, (initial.K + 1)), FG)
    }
    names(FG) <- c("Patterns", paste("k", 1:initial.K, sep = ""))
    FG[, "Patterns"] <- NA
    return (FG)
  }
  #####################END parameter.FG function#

  #####################BEGIN fit.P function#
  fit.P <- function (initial.K, initial.lambda, ljlevels, initial.lambda.object) {
    FITP <- matrix(as.integer(1), (initial.K + 1), length(ljlevels), byrow = T)
    FITP[1, ] <- NA
    FITP[, 1] <- NA
    if (initial.lambda == c("lambda.object")) {
      for (k in 2:(initial.K + 1)) {
        for (j in 2:length(ljlevels)) {
          for (l in 2:(ljlevels[[j]] + 1)) {
            if (initial.lambda.object[(k - 1), (j - 1), (l - 1)] < as.double(0)) {
              FITP[k, j] <- as.integer(0)
            }
          }
        }
      }
    }
    return (FITP)
  }
  #####################BEGIN fit.P function#

  #####################BEGIN parameter.FP function#
  parameter.FP <- function (initial.K, initial.lambda, ljlevels, initial.lambda.object) {
    FP <- array(c((as.double(1) / initial.K)), c((initial.K + 1), length(ljlevels), (max(ljlevels) + 1)), dimnames = list(c(paste("k", 0:initial.K, sep = "")), c(paste("j", 0:(length(ljlevels) - 1), sep = "")), c(paste("l", 0:max(ljlevels), sep = ""))))
    FP[1, , ] <- NA
    FP[, 1, ] <- NA
    FP[, , 1] <- NA
    if (initial.lambda == c("pure1")) {
      for (j in 2:length(ljlevels)) {
        FP[2, j, 2] <- as.double(1)
        for (l in 3:(ljlevels[[j]] + 1)) {
          FP[2, j, l] <- as.double(0)
        }
      }
    }
    if (initial.lambda == c("random")) {
      for (k in 2:(initial.K + 1)) {
        for (j in 2:length(ljlevels)) {
          li <- sample(c(2:(ljlevels[[j]] + 1)), ljlevels[[j]], replace = FALSE, prob = NULL)
          random.lambda <- c(rep(as.double(0), (ljlevels[[j]] + 1)))
          for (l in 2:(ljlevels[[j]] + 1)) {
            if (l == 2) {
              random.lambda[l] <- as.double(stats::runif(1, min = as.double(0), max = as.double(1)))
            } else {
              random.lambda[l] <- as.double(stats::runif(1, min = as.double(0), max = (as.double(1) - sum(random.lambda))))
            }
          }
          for (l in 2:(ljlevels[[j]] + 1)) {
            if (l == 2) {
              FP[k, j, 2:(ljlevels[[j]] + 1)] <- NA
              FP[k, j, li[l - 1]] <- random.lambda[l]
            } else if (l != 2) {
              if (sum(FP[k, j, 2:(ljlevels[[j]] + 1)], random.lambda[l], na.rm = TRUE) <= as.double(1)) {
                if ((l - 1) != length(li)) {
                  FP[k, j, li[l - 1]] <- random.lambda[l]
                } else if ((l - 1) == length(li)) {
                  FP[k, j, li[l - 1]] <- as.double(1) - sum(FP[k, j, 2:(ljlevels[[j]] + 1)], na.rm = TRUE)
                }
              } else if (sum(FP[k, j, 2:(ljlevels[[j]] + 1)], random.lambda[l], na.rm = TRUE) > as.double(1)) {
                FP[k, j, li[l - 1]] <- as.double(1) - sum(FP[k, j, 2:(ljlevels[[j]] + 1)], na.rm = TRUE)
              }
            }
          }
        }
      }
    }
    if (initial.lambda == c("lambda.object")) {
      for (k in 2:(initial.K + 1)) {
        for (j in 2:length(ljlevels)) {
          for (l in 2:(ljlevels[[j]] + 1)) {
            if (initial.lambda.object[(k - 1), (j - 1), (l - 1)] < as.double(0)) {
              FP[k, j, l] <- -(initial.lambda.object[(k - 1), (j - 1), (l - 1)])
            } else {
              FP[k, j, l] <- initial.lambda.object[(k - 1), (j - 1), (l - 1)]
            }
          }
        }
      }
    }
    for (k in 2:(initial.K + 1)) {
      for (j in 2:length(ljlevels)) {
        for (l in 2:(max(ljlevels) + 1)) {
          if (l > (max(ljlevels[[j]]) + 1)) {
            FP[k, j, l] <- NA
          }
        }
      }
    }
    return (FP)
  }
  #####################END parameter.FP function#

  #####################BEGIN v.order.K function#
  v.order.K <- function (initial.K, cell, ljlevels, FP) {
    N <- sum(cell$Freq, na.rm = TRUE)
    Zc <- 2.58
    for (l in 2:((max(ljlevels) + 1) - 1)) {
      v.order <- c(rep(as.double(0), initial.K))
      for (k in 2:(initial.K + 1)) {
        Pc1 <- 0
        Pc2 <- 0
        for (j in 2:length(ljlevels)) {
          if (l < (max(ljlevels[[j]]) + 1)) {
            if ((as.double(sum(FP[k, j, 2:l])) != as.double(0)) == TRUE) {
              if (v.order[k - 1] == as.double(0)) {
                v.order[k - 1] <- as.double(sum(FP[k, j, 2:l]) ^ (1 / N))
              } else {
                v.order[k - 1] <- prod(v.order[k - 1], as.double(sum(FP[k, j, 2:l]) ^ (1 / N)))
              }
            } else {
              Pc1 <- Pc1 + 1
              Pc2 <- Pc2 + ((length(ljlevels) - 1) + 2) - j
            }
          } else {
            l_l <- ((max(ljlevels[[j]]) + 1) - 1)
            if ((as.double(sum(FP[k, j, 2:l_l])) != as.double(0)) == TRUE) {
              if (v.order[k - 1] == as.double(0)) {
                v.order[k - 1] <- as.double(sum(FP[k, j, 2:l_l]) ^ (1 / N))
              } else {
                v.order[k - 1] <- prod(v.order[k - 1], as.double(sum(FP[k, j, 2:l_l]) ^ (1 / N)))
              }
            } else {
              Pc1 <- Pc1 + 1
              Pc2 <- Pc2 + ((length(ljlevels) - 1) + 2) - j
            }
          }
        }
        v.order[k - 1] <- (v.order[k - 1] / (1 + ((Pc2 / sum((length(ljlevels) - 1):1)) * Pc1)))
      }
      p.v.order <- matrix(NA, initial.K, initial.K)
      for (k in 1:initial.K) {
        for (k_k in 1:initial.K) {
          if (k != k_k) {
            p.v.order[k, k_k] <- ((v.order[k] - v.order[k_k]) / sqrt((((N * v.order[k]) + (N * v.order[k_k])) / (N + N)) * (1 - (((N * v.order[k]) + (N * v.order[k_k])) / (N + N))) * ((N + N) / (N * N))))
          }
        }
      }
      if (min(abs(p.v.order), na.rm = TRUE) > Zc) {
        break
      }
    }
    if (min(abs(p.v.order), na.rm = TRUE) > Zc) {
      v.order <- order(v.order, decreasing = TRUE)
    } else {
      v.order <- NULL
    }
    return (v.order)
  }
  #####################END v.order.K function#

  #####################BEGIN v.order.P function#
  v.order.P <- function (initial.K, ljlevels, v.order, beforeP) {
    afterP <- array(NA, c((initial.K + 1), length(ljlevels), (max(ljlevels) + 1)), dimnames = list(c(paste("k", 0:initial.K, sep = "")), c(paste("j", 0:(length(ljlevels) - 1), sep = "")), c(paste("l", 0:max(ljlevels), sep = ""))))
    k <- 1
    for (k_k in v.order) {
      k <- k + 1
      for (j in 2:length(ljlevels)) {
        for (l in 2:(max(ljlevels[[j]]) + 1)) {
          afterP[k, j, l] <- beforeP[(k_k + 1), j, l]
        }
      }
    }
    return(afterP)
  }
  #####################END v.order.P function#

  #####################BEGIN data.gamma function#
  data.gamma <- function (case.id, data.object, internal.var, case.weight,
                          initial.K, cell, ljlevels, l.levels.j,
                          order.K, v.order, IG, FG,
                          omega.fit, dec.char) {
    IG[, "Patterns"] <- cell$Patterns
    IG <- IG[-1, ]
    row.names(IG) <- NULL
    FG[, "Patterns"] <- cell$Patterns
    FG$FreqPatterns <- cell$Freq
    FG <- FG[-1,]
    row.names(FG) <- NULL
    if ((order.K == TRUE) && (!is.null(v.order))){
      v.order <- (v.order + 1)
      IG <- IG[, c(1, v.order)]
      FG <- FG[, c(1, v.order, (max(v.order) + 1))]
    }
    names(IG) <- c("Patterns", paste("initial_gik", 1:initial.K, sep = ""))
    names(FG) <- c("Patterns", paste("final_gik", 1:initial.K, sep = ""), "FreqPatterns")
    names.object <- names(data.object)
    data.object <- data.object[do.call(order, data.object[, internal.var]), ]
    data.object$Patterns <- do.call(paste, c(as.list(data.object[, internal.var]), sep=""))
    if (omega.fit == TRUE) {
      data.object <- merge(data.object, IG, by = c("Patterns"), all.y = TRUE)
      data.object <- merge(data.object, FG, by = c("Patterns"), all.y = TRUE)
    } else {
      data.object <- merge(data.object, IG, by = c("Patterns"))
      data.object <- merge(data.object, FG, by = c("Patterns"))
    }
    data.object <- data.object[c(c(names.object), c("Patterns"), c("FreqPatterns"), c(paste("initial_gik", 1:initial.K, sep = "")), c(paste("final_gik", 1:initial.K, sep = "")))]
    data.object <- data.object[do.call(order, list(data.object[, case.id])), ]
    if (omega.fit == TRUE) {
      if (!is.na(case.weight)) {
        data.object[case.weight][is.na(data.object[case.weight])] <- 1
      }
      st <- 0
      for (j in 1:(length(ljlevels) - 1)) {
        st <- (st + as.numeric((nchar(max(l.levels.j[[j + 1]])))))
        data.object[, c(internal.var[j])] <- as.data.frame(substr(data.object[, "Patterns"], st, ((st + as.numeric(nchar(max(l.levels.j[[j + 1]])))) - 1)))
      }
    }
    row.names(data.object) <- NULL
    nf <- 0
    repeat {
      nf <- (nf + 1)
      dataoutput <- paste (tempdir, "/GoMK", initial.K, "(", nf, ")", ".TXT", sep = "", collapse = NULL)
      logname <- paste (tempdir, "/LogGoMK", initial.K, "(", nf, ")", ".TXT", sep = "", collapse = NULL)
      if ((file.exists(dataoutput) == FALSE) && (file.exists(logname) == FALSE)) {
        break
      }
    }
    utils::write.table(data.object, file = dataoutput, quote = FALSE, sep = " ", eol = "\n", na = ".", dec = dec.char, row.names = FALSE, col.names = TRUE, qmethod = c("escape", "double"), fileEncoding = "")
    return (nf)
  }
  #####################END data.gamma function#

  #####################BEGIN loggom function#
  loggom <- function (case.id,
                      case.weight,
                      data.object,
                      internal.var,
                      initial.K, final.K,
                      initial.gamma, initial.lambda,
                      gamma.algorithm, lambda.algorithm,
                      gamma.fit, lambda.fit, order.K, omega.fit,
                      cell, ljlevels, l.levels.j, IP, FP, loglik, nf, dec.char, v.order) {
    output <- paste (tempdir, "/LogGoMK", initial.K, "(", nf, ")", ".TXT", sep = "", collapse = NULL)
    file.create(output)
    sink(output)
    cat(paste(date(), "\n\n", sep = "", collapse = NULL))
    summary.parameters(case.id,
                       case.weight,
                       data.object,
                       internal.var,
                       initial.K, final.K,
                       initial.gamma, initial.lambda,
                       gamma.algorithm, lambda.algorithm,
                       gamma.fit, lambda.fit, order.K, omega.fit)
    if (max(ljlevels, na.rm = TRUE) == 2) {
      cat(paste("\n\n*Note: All internal variables are dichotomous.\n", sep = "", collapse = NULL))
    }
    if (omega.fit == TRUE) {
      cat(paste("\n\n*Note ", (nrow(cell) - 1), " unique data patterns (I) has found for combinations of the all patterns.\n", sep = "", collapse = NULL))
    } else if (omega.fit == FALSE) {
      cat(paste("\n\n*Note ", (nrow(cell) - 1), " unique data patterns (I) has found in data object.\n", sep = "", collapse = NULL))
    }
    charnamevar <- max(sapply(internal.var, nchar)) + 1
    cat(paste("\n\nFrequency Table Original Data:\n", sep = "", collapse = NULL))
    for (j in 1:(length(ljlevels) - 1)) {
      if (!is.na(case.weight)) {
        n <- stats::xtabs(data.object[, case.weight] ~ data.object[, internal.var[j]], data.object)
      } else {
        aux <- internal.var[j]
        n <- table(data.object[, aux])
      }
      p <- prop.table(n)
      for (l in l.levels.j[[(j + 1)]]) {
        if (l == (min(l.levels.j[[(j + 1)]]))) {
          if (j == 1) {
            t <- do.call(paste, as.list(rep("", charnamevar)))
            cat(paste(t, " \t", "   ", "\tn\t%\n", sep = "", collapse = NULL))
          }
          t <- do.call(paste, as.list(rep("", (charnamevar - (nchar(internal.var[j]))))))
          cat(paste(internal.var[j], t, sep = "", collapse = NULL))
          cat(paste("\t", "l", l, "\t", sep = "", collapse = NULL))
        } else {
          t <- do.call(paste, as.list(rep("", charnamevar)))
          cat(paste(t, " \t", "l", l, "\t", sep = "", collapse = NULL))
        }
        if ((p[[l]] * 100) < 10) {
          cat(paste(format(n[[l]], nsmall = 0, decimal.mark = dec.char), "\t", "0", format(round((p[[l]] * 100), 3), nsmall = 3, decimal.mark = dec.char), "\n", sep = "", collapse = NULL))
        } else {
          cat(paste(format(n[[l]], nsmall = 0, decimal.mark = dec.char), "\t", format(round((p[[l]] * 100), 3), nsmall = 3, decimal.mark = dec.char), "\n", sep = "", collapse = NULL))
        }
      }
      cat(paste("\n", sep = "", collapse = NULL))
    }
    LJ <- 0
    for (i in 1:2) {
      if (i == 1) {
        cat(paste("\nPrimal Pure Type Probabilities:\n", sep = "", collapse = NULL))
      } else {
        cat(paste("\nLatter Pure Type Probabilities:\n", sep = "", collapse = NULL))
      }
      for (j in 2:length(ljlevels)) {
        if (i == 1) {
          LJ <- sum(LJ, max(l.levels.j[[j]]))
        }
        for (l in 2:(ljlevels[[j]] + 1)) {
          if ((j == 2) && (l == (min(l.levels.j[[j]]) + 1))) {
            t <- do.call(paste, as.list(rep("", charnamevar)))
            cat(paste(t, "\t", "   ", sep = "", collapse = NULL))
            for (k in 2:(initial.K + 1)) {
              cat(paste("\tk", (k - 1), "    ", sep = "", collapse = NULL))
            }
            cat(paste("\n", sep = "", collapse = NULL))
            t <- do.call(paste, as.list(rep("", (charnamevar - (nchar(internal.var[j - 1]))))))
            cat(paste(internal.var[j - 1], t, sep = "", collapse = NULL))
            cat(paste("\t", "l", l - 1, sep = "", collapse = NULL))
          } else if (l == (min(l.levels.j[[j]]) + 1)) {
            t <- do.call(paste, as.list(rep("", (charnamevar - (nchar(internal.var[j - 1]))))))
            cat(paste(internal.var[j - 1], t, sep = "", collapse = NULL))
            cat(paste("\t", "l", l - 1, sep = "", collapse = NULL))
          }
          for (k in 2:(initial.K + 1)) {
            if((k == 2) && (l != (min(l.levels.j[[j]]) + 1))) {
              t <- do.call(paste, as.list(rep("", charnamevar)))
              cat(paste(t, " \t", "l", l - 1, sep = "", collapse = NULL))
            }
            if (i == 1) {
              cat(paste("\t", format(round(IP[k, j, l], 4), nsmall = 4, decimal.mark = dec.char), sep = "", collapse = NULL))
            } else {
              cat(paste("\t", format(round(FP[k, j, l], 4), nsmall = 4, decimal.mark = dec.char), sep = "", collapse = NULL))
            }
          }
          cat(paste("\n", sep = "", collapse = NULL))
        }
        cat(paste("\n", sep = "", collapse = NULL))
      }
    }
    if (is.null(v.order)) {
      cat(paste("\n*Note: Could not organize pure type probabilities with a confidence interval of 99.0%.\n\n", sep = "", collapse = NULL))
    }
    AIC <- ((2 * ((initial.K * LJ) + (initial.K * (sum(cell$Freq, na.rm = TRUE))))) - (2 * loglik[2]))
    cat(paste("\nPrimal Log-Likelihood is:       \t", format(round(loglik[1], 4), nsmall = 4, decimal.mark = dec.char) , "\n", sep = "", collapse = NULL))
    cat(paste("\n\nLatter Log-Likelihood is:       \t", format(round(loglik[2], 4), nsmall = 4, decimal.mark = dec.char) , "\n", sep = "", collapse = NULL))
    cat(paste("\n\nAkaike Information Criterion:   \t", format(round(AIC, 4), nsmall = 4, decimal.mark = dec.char) , "\n", sep = "", collapse = NULL))
    cat(paste("\n\nLambda-Marginal Frequency Ratio (LMFR):\n", sep = "", collapse = NULL))
    table <- utils::capture.output({
      for (j in 2:length(ljlevels)) {
        if (omega.fit == FALSE) {
          if (!is.na(case.weight)) {
            n <- stats::xtabs(data.object[, case.weight] ~ data.object[, internal.var[(j - 1)]], data.object)
          } else {
            aux <- internal.var[(j-1)]
            n <- table(data.object[, aux])
          }
        } else {
          n <- stats::xtabs(cell[-1, "Freq"] ~ cell[-1, internal.var[(j - 1)]], cell)
        }
        p <- prop.table(n)
        for (l in l.levels.j[[j]]) {
          if (l == (min(l.levels.j[[j]]))) {
            if (j == 2) {
              t <- do.call(paste, as.list(rep("", charnamevar)))
              cat(paste(t, "\t", "   ", "\tn\t%", sep = "", collapse = NULL))
              for (k in 2:(initial.K + 1)) {
                cat(paste("\tk", (k - 1), "    ", sep = "", collapse = NULL))
              }
              for (k in 2:(initial.K + 1)) {
                cat(paste("\tk", (k - 1), "/%lj", sep = "", collapse = NULL))
              }
              cat(paste("\n", sep = "", collapse = NULL))
            }
            t <- do.call(paste, as.list(rep("", (charnamevar - (nchar(internal.var[(j - 1)]))))))
            cat(paste(internal.var[(j - 1)], t, sep = "", collapse = NULL))
            cat(paste("\t", "l", l, "\t", sep = "", collapse = NULL))
          } else {
            t <- do.call(paste, as.list(rep("", charnamevar)))
            cat(paste(t, " \t", "l", l, "\t", sep = "", collapse = NULL))
          }
          if ((p[[l]] * 100) < 10) {
            cat(paste(format(n[[l]], nsmall = 0, decimal.mark = dec.char), "\t", "0", format(round((p[[l]] * 100), 3), nsmall = 3, decimal.mark = dec.char), sep = "", collapse = NULL))
          } else {
            cat(paste(format(n[[l]], nsmall = 0, decimal.mark = dec.char), "\t", format(round((p[[l]] * 100), 3), nsmall = 3, decimal.mark = dec.char), sep = "", collapse = NULL))
          }
          for (k in 2:(initial.K + 1)) {
            cat(paste("\t", format(round(FP[k, j, (l + 1)], 4), nsmall = 4, decimal.mark = dec.char), sep = "", collapse = NULL))
          }
          for (k in 2:(initial.K + 1)) {
            cat(paste("\t", format(round((FP[k, j, (l + 1)] / p[[l]]), 4), nsmall = 4, decimal.mark = dec.char), sep = "", collapse = NULL))
          }
          cat(paste("\n", sep = "", collapse = NULL))
        }
        if (j != length(ljlevels)) {
          cat(paste("\n", sep = "", collapse = NULL))
        }
      }
    })
    writeLines(table)
    sink()
    if (is.null(v.order)) {
      cat(paste("\n*Note: Could not organize pure type probabilities with a confidence interval of 99.0%.\n", sep = "", collapse = NULL))
    }
    cat(paste("\n*Note: Saved frequency table, pure type probabilities and loglikelihood values in ", output, ".\n", sep = "", collapse = NULL))
    cat(paste("\n*Note: Saved original data with GoM scores in " , tempdir, "/GoMK", initial.K, "(", nf, ")", ".TXT", ".\n", sep = "", collapse = NULL))
    return(table)
  }
  #####################END loggom function#

  if (!(is.data.frame (data.object))) {
    stop("The data.object is not a data frame ...")
  }
  if (initial.K < 2) {
    stop("The initial.K information is wrong ...")
  }
  if (final.K < 2) {
    stop("The final.K information is wrong ...")
  }
  gamma.algorithm <- gamma.algorithm[1]
  initial.gamma <- initial.gamma[1]
  if ((gamma.fit != TRUE) & (gamma.fit != FALSE)) {
    stop("The gamma.fit information is wrong ...")
  }
  lambda.algorithm <- lambda.algorithm[1]
  initial.lambda <- initial.lambda[1]
  if ((lambda.fit != TRUE) & (lambda.fit != FALSE)) {
    stop("The lambda.fit information is wrong ...")
  }
  if (is.null(internal.var)) {
    internal.var <- names(data.object)[!(names(data.object) %in% c(c(case.id), c(case.weight)))]
  }
  if ((order.K != TRUE) & (order.K != FALSE)) {
    stop("The order.K information is wrong ...")
  }
  if ((omega.fit != TRUE) & (omega.fit != FALSE)) {
    stop("The omega.fit information is wrong ...")
  }
  dec.char <- substr(dec.char, 1, 1)
  if ((dec.char != ",") && (dec.char != ".")) {
    dec.char <- "."
  }
  pathfolder <- tempdir
  oldoptions <- options()
  on.exit(options(oldoptions))
  options(digits = 15)
  FINAL.PARAMETERS <- vector("list")
  for (initial.K in initial.K:final.K) {
    data.aux = data.object
    verify.parameters(case.id,
                      case.weight,
                      data.object,
                      internal.var,
                      initial.gamma, initial.lambda,
                      gamma.algorithm, lambda.algorithm,
                      initial.gamma.object, initial.lambda.object)
    summary.parameters(case.id,
                       case.weight,
                       data.object,
                       internal.var,
                       initial.K, final.K,
                       initial.gamma, initial.lambda,
                       gamma.algorithm, lambda.algorithm,
                       gamma.fit, lambda.fit, order.K, omega.fit)
    cell <- cell.data(data.object, case.weight, internal.var, omega.fit)
    ljlevels <- ljlevels.function(cell, internal.var)
    l.levels.j <- l.levels.j.function(cell, internal.var)
    baselevel <- c(rep(0, length(ljlevels)))
    for (j in 2:length(ljlevels)) {
      baselevel[j] <- l.levels.j[[j]][1]
    }
    names(baselevel) <- names(ljlevels)
    if (final.K > (nrow(cell) - 1)) {
      stop("The final.K information can not be greater than value of (I) ...")
    }
    cell <- as.matrix(cell)
    row.names(cell) <- NULL
    cell[, 1] <- NA
    cell <- apply(cell, 2, as.numeric)
    if (initial.gamma == c("gamma.object")) {
      if (!is.data.frame(initial.gamma.object)) {
        stop("The initial.gamma.object is not a data frame ...")
      }
    }

    ###############################C++###############################
    loglik <- vector(mode = "list", length = MC_iter)
    FG_list <- vector(mode = "list", length = MC_iter)
    FP_list <- vector(mode = "list", length = MC_iter)
    FITP_list <- vector(mode = "list", length = MC_iter)
    IG_list <- vector(mode = "list", length = MC_iter)
    cat("\n Monte Carlo Iteration: \n")
    pb = utils::txtProgressBar(min = 0, max = MC_iter, initial = 0, style = 3, width = 60)
    for(i in 1:MC_iter){
      FG <- parameter.FG(initial.K, initial.gamma, cell, initial.gamma.object)
      FG <- as.matrix(FG)
      IG <- as.data.frame(round(FG, 4))
      if (nrow(IG) != nrow(cell)) {
        stop("The number of lines of initial.gamma.object is wrong ...")
      }
      if (initial.lambda == c("lambda.object")) {
        if (!is.array(initial.lambda.object)) {
          stop("The initial.lambda.object is not a array ...")
        }
      }
      FITP <- fit.P(initial.K, initial.lambda, ljlevels, initial.lambda.object)
      FP <- parameter.FP(initial.K, initial.lambda, ljlevels, initial.lambda.object)
      FP <- as.array(FP)

      FG_list[[i]] <- FG
      FP_list[[i]] <- FP
      FITP_list[[i]] <- FITP
      IG_list[[i]] <- IG

      invisible(utils::capture.output(loglik[[i]] <- GoM_Model(baselevel, ljlevels, cell,
                                                               gamma.algorithm, FG, gamma.fit,
                                                               lambda.algorithm, FP, lambda.fit, FITP)))

      utils::setTxtProgressBar(pb,i)



    }
    cat("\n \n")
    maxloglik <- vector(length = MC_iter)
    for(i in 1:MC_iter){
      maxloglik[i] <- loglik[[i]][2]
    }
    FG <- FG_list[[which.max(maxloglik)]]
    FP <- FP_list[[which.max(maxloglik)]]
    FITP <- FITP_list[[which.max(maxloglik)]]
    IG <- IG_list[[which.max(maxloglik)]]
    loglik <- loglik[[which.max(maxloglik)]]
    IP <- array(NA, c((initial.K + 1), length(ljlevels), (max(ljlevels) + 1)), dimnames = list(c(paste("k", 0:initial.K, sep = "")), c(paste("j", 0:(length(ljlevels) - 1), sep = "")), c(paste("l", 0:max(ljlevels), sep = ""))))
    for (k in 2:(initial.K + 1)) {
      for (j in 2:length(ljlevels)) {
        for (l in 2:(ljlevels[[j]] + 1)) {
          IP[k, j, l] <- round(FP[k, j, l], 4)
        }
      }
    }
    #################################################################
    newfolder <- paste(pathfolder, "/K", initial.K,"/", sep = "", collapse = NULL)
    if (file.exists(newfolder) == FALSE) {
      dir.create(newfolder, showWarnings = TRUE, recursive = FALSE, mode = "0777")
    }

    cell <- as.data.frame(cell)
    cell$Patterns <- do.call(paste, c(as.list(cell[c(internal.var)]), sep=""))
    FG <- as.data.frame(round(FG, 4))
    FP <- round(FP, 4)
    if (order.K == TRUE) {
      v.order <- v.order.K(initial.K, cell, ljlevels, FP)
      if (!is.null(v.order)) {
        IP <- v.order.P(initial.K, ljlevels, v.order, IP)
        FP <- v.order.P(initial.K, ljlevels, v.order, FP)
      }
    } else {
      v.order <- as.integer(0)
    }
    options(scipen = 9999999)
    options(digits = 4)
    nf <- data.gamma(case.id, data.object, internal.var, case.weight,
                     initial.K, cell, ljlevels, l.levels.j,
                     order.K, v.order, IG, FG,
                     omega.fit, dec.char)
    table <- loggom(case.id,
                    case.weight,
                    data.object,
                    internal.var,
                    initial.K, final.K,
                    initial.gamma, initial.lambda,
                    gamma.algorithm, lambda.algorithm,
                    gamma.fit, lambda.fit, order.K, omega.fit,
                    cell, ljlevels, l.levels.j, IP, FP, loglik, nf, dec.char, v.order)
    FG <- FG[-1, -1]
    row.names(FG) <- NULL
    if ((order.K == TRUE) && (!is.null(v.order))){
      FG <- FG[, v.order]
    }
    names(FG) <- c(paste("gik", 1:initial.K, sep = ""))
    data.aux$place <- row.names(data.aux)
    aux <- data.frame(patterns = do.call(paste0, c(as.list(data.aux[, internal.var]))),
                      rows = 1:nrow(data.aux))
    aux <- aux %>% dplyr::arrange(.data$patterns)
    data.aux <- data.aux[aux$rows,]
    data.aux$patterns <- aux$patterns
    FG$patterns = sort(unique(aux$patterns))
    data.aux <- dplyr::inner_join(data.aux,
                                  FG, by = "patterns")
    data.aux <- data.aux %>% dplyr::select(-.data$place, -.data$patterns)

    FINAL.PARAMETERS[[paste0("K", initial.K)]]$Data <- data.aux
    FP <- FP[-1, -1, -1]
    FINAL.PARAMETERS[[paste0("K", initial.K)]]$Pkjl <- FP
    FINAL.PARAMETERS[[paste0("K", initial.K)]]$Likelihood <- loglik[2]
    LJ <- 0
    for(i in 2:length(ljlevels)){
      LJ <- sum(LJ, max(l.levels.j[[i]]))
    }
    AIC <- ((2 * ((initial.K * LJ) + (initial.K * (sum(cell$Freq, na.rm = TRUE))))) - (2 * loglik[2]))
    FINAL.PARAMETERS[[paste0("K", initial.K)]]$AIC <- AIC
    FINAL.PARAMETERS[[paste0("K", initial.K)]]$Table <- table
  }

  class(FINAL.PARAMETERS) <- "gom_ml"

  return(FINAL.PARAMETERS)
}
epopea/gom documentation built on March 1, 2023, 1:54 a.m.