rankmodelr: Penalized multistage models for ordered data

# function name and purpose: 'BPL_neg_loglik' calculates the negative-log-likelihood of a BPL-type model when the dampening function
# is as parameterized in Section 3.1.
#
# author: Phil Boonstra (philb@umich.edu)
#
# date: 3/5/2019, 10:00am EST
#
# Function inputs:
#
### param: (vector with 'names' attribute, numeric) the parameter vector to evaluate with respect to the data. Must be named exactly 
#as follows: '0' (optional, if the stopping process is to be modeled), then consecutive positive integer labels, one for each 
#unique item, then 'log_delta1', then 'log_delta2'
#
### dat: (data.frame, integers, positive) A data.frame of ordered lists of positive integers. Each row is a different list, each column 
# is a rank, each cell is the integer label of an item. So for example the [1,1] element gives the label of the item that the first 
# ranker ordered first,  i.e. ranked highest. The constraints are as follows: If any cell is non-missing, then every cell in that same
# row to the left should also be non-missing. Second, for any number appearing anywhere in this data.frame, every single number that is 
# less than this number should appear at least once somewhere (not necessarily in the same list)
#
### obs_weights: (vector, integers, positive) A vector with length equal to the number of rows of 'dat' giving the number of observations that
# each list represents. This is gives a simple way to considerably speed up the calculation if there are a large number of duplicated lists. Just
# provide a vector of 1's if each list represents a single ranker
#
### fixed: (vector with 'names' attribute, numeric) a numeric vector containing a subset of elements in 'param' the values of which 
# will be over-written by the values in 'fixed'. This is not useful in isolation but helpful for the coordinate ascent algorithm
#
### safe_mode: (logical) if TRUE, then perform error checks to make sure that all inputs are as expected. Set to FALSE to speed things up a bit
#
### num_uniq: (integer, positive) number of unique items in 'dat'. Only required if safe_mode == FALSE, otherwise will be equal to sum(!is.na(unique(as.numeric(dat))))
#
### num_obs: (integer, positive) number of observations in 'dat'. Only required if safe_mode == FALSE, otherwise will be equal to nrow(dat)
#
### num_ranks: (integer, positive) 1 + length of longest observed list. Only required if safe_mode == FALSE, otherwise will be equal to 1 + ncol(dat)
#
### max_columns: (integer, positive) number of columns need to loop over. Only required if safe_mode == FALSE, otherwise will be equal to  min(num_uniq, num_ranks)
#
### list_lengths: (vector, positive) vector of individual list lengths. Only required if safe_mode == FALSE, otherwise will be equal to unlist(lapply(apply(apply(dat,1,is.na),2,which),"[",1)) 
#
### Value
#The weighted sum of negative of the BPL log-likelihood function (Equation 3.2) over the n observations, weighted by obs_weights

BPL_neg_loglik = function(param, 
                          dat, 
                          obs_weights, 
                          fixed = NULL,
                          safe_mode = TRUE, 
                          num_uniq = NULL, 
                          num_obs = NULL, 
                          num_ranks = NULL, 
                          max_columns = NULL,
                          list_lengths = NULL) {
  
  #augment the data with a column of NAs so that there is always room to add a zero after the end of an actual list
  aug_dat = cbind(dat,NA);
  #conduct the checks that all inputs are as expected
  if(safe_mode) {
    num_obs = nrow(aug_dat);
    num_ranks = ncol(aug_dat);
    num_uniq = sum(!is.na(unique(as.numeric(dat))));
    max_columns = min(num_uniq, num_ranks);
    list_lengths = unlist(lapply(apply(apply(aug_dat,1,is.na),2,which),"[",1)) - 1;
    if(!isTRUE(all(apply(apply(is.na(aug_dat),1,cumsum),2,diff) | (apply(is.na(dat),1,cumsum) == 0))) | 
       isTRUE(any(apply(dat,1,duplicated) & !apply(dat,1,is.na)))) {
      stop("'dat' must contain uninterrupted left-to-right sets of unique integers, i.e. no NAs between items");
    }
    if(max(colSums(is.na(dat))) == num_obs) {
      stop("One or more columns of 'dat' contain only NAs");
    }
    if(!isTRUE(all.equal(names(param),c(0:num_uniq,"log_delta1","log_delta2"))) && !isTRUE(all.equal(names(param),c(1:num_uniq,"log_delta1","log_delta2")))) {
      stop("'param' must be named exactly as follows: '0' (optional, if the stopping process is to be modeled), then consecutive positive integer labels, one for each unique item, then 'log_delta1', then 'log_delta2'");
    }
    if(!all(names(fixed)%in%names(param))) {stop("'fixed' must be NULL or a subset of named elements from param");}
    if(num_uniq != max(dat,na.rm = TRUE)) {stop("Item labels in 'dat' must be consecutively labeled integers starting with 1");}
    if(num_uniq != length(param) - 2 - ("0"%in%names(param))) {stop("Length of 'param' must be equal to the number of unique items ranked plus 2 (for 'log_delta1' and 'log_delta2') plus 1 (if '0' is modeled");}
    if("log_delta1"%in%names(param) && param["log_delta1"] > 0) {stop("'log_delta1' must be non-positive");}
    if("log_delta2"%in%names(param) && param["log_delta2"] > 0) {stop("'log_delta2' must be non-positive");}
  }
  #Overwrite with fixed values
  param[names(fixed)] = fixed;
  log_item_weights = param[as.character(1:num_uniq)];
  item_weights = exp(log_item_weights);
  delta1 = exp(param["log_delta1"]);
  delta2 = exp(param["log_delta2"]);
  #Insert zero after end of every list to indicate 'stop'
  aug_dat[cbind(1:num_obs,list_lengths + 1)] = 0;
  #Check if fatigue is to be monitored
  if("0"%in%names(param)) {
    log_fatigue_weight = param["0"];
    fatigue_weight = exp(log_fatigue_weight);
  } else {
    log_fatigue_weight = fatigue_weight = 0;
  }
  #Contribution from Pr(xis=k|ui,Ois,xis!=0)
  log_num = log_denom = matrix(0,nrow=num_obs,ncol=num_ranks);
  log_num[,1] = log_item_weights[aug_dat[,1]];
  #Denominator is same for everyone at rank 1 (and does not include the constant '1' since no list can stop at rank 1)
  log_denom[,1] = log(sum(item_weights));
  index_to_loop = 1:max_columns;
  #all dampening effects at once
  dampening = (delta2 * delta1^(index_to_loop-1) + (1-delta2)^(2 * index_to_loop - 1))
  for(s in index_to_loop[-1]) {
    #which lists ranked an item at this stage?
    which_ranked_item = which(aug_dat[,s] > 0);
    #which lists ended precisely at this stage?
    which_ended = which(aug_dat[,s] == 0);
    if(length(which_ranked_item)) {
      log_num[which_ranked_item,s] = log_item_weights[aug_dat[which_ranked_item,s]] * dampening[s];
    }
    if(length(which_ended)) {
      log_num[which_ended,s] = log_fatigue_weight;
    }
    which_either = which(aug_dat[,s] >= 0);
    log_denom[which_either,s] = log(fatigue_weight + sum(item_weights^dampening[s]) - rowSums(matrix(item_weights[aug_dat[which_either,1:(s-1),drop = FALSE]]^dampening[s], ncol = s-1),na.rm = TRUE));
  }  
  sum(obs_weights * log_denom) - sum(obs_weights * log_num);
}

# function name and purpose: 'BPL_neg_loglik_CA' calculates a grid of the negative-log-likelihood value sof a BPL-type model 
# when the dampening function is as parameterized in Section 3.1. However, one element of 'param' can be missing (NA), and 
# in this case the function calculates and returns multiple values of the negative log-likelihood by plugging in 
# each element of 'CA_param' into the missing value 
#
# author: Phil Boonstra (philb@umich.edu)
#
# date: 3/5/2019, 10:00am EST
#
# Function inputs:
#
### param: (vector with 'names' attribute, numeric) the parameter vector to evaluate with respect to the data. Must be named exactly 
#as follows: '0' (optional, if the stopping process is to be modeled), then consecutive positive integer labels, one for each 
#unique item, then 'log_delta1', then 'log_delta2'. Up to one element of 'param' can be missing (NA), in which case CA_param
# should be non-missing
#
### CA_param (vector, numeric) a grid of values to plug in for the missing element of 'param'. The function will return a vector
# of negative log-likelihoods as long as this grid
#
### dat: (data.frame, integers, positive) A data.frame of ordered lists of positive integers. Each row is a different list, each column 
# is a rank, each cell is the integer label of an item. So for example the [1,1] element gives the label of the item that the first 
# ranker ordered first,  i.e. ranked highest. The constraints are as follows: If any cell is non-missing, then every cell in that same
# row to the left should also be non-missing. Second, for any number appearing anywhere in this data.frame, every single number that is 
# less than this number should appear at least once somewhere (not necessarily in the same list)
#
### obs_weights: (vector, integers, positive) A vector with length equal to the number of rows of 'dat' giving the number of observations that
# each list represents. This is gives a simple way to considerably speed up the calculation if there are a large number of duplicated lists. Just
# provide a vector of 1's if each list represents a single ranker
#
### safe_mode: (logical) if TRUE, then perform error checks to make sure that all inputs are as expected. Set to FALSE to speed things up a bit
#
### num_uniq: (integer, positive) number of unique items in 'dat'. Only required if safe_mode == FALSE, otherwise will be equal to sum(!is.na(unique(as.numeric(dat))))
#
### num_obs: (integer, positive) number of observations in 'dat'. Only required if safe_mode == FALSE, otherwise will be equal to nrow(dat)
#
### num_ranks: (integer, positive) 1 + length of longest observed list. Only required if safe_mode == FALSE, otherwise will be equal to 1 + ncol(dat)
#
### max_columns: (integer, positive) number of columns need to loop over. Only required if safe_mode == FALSE, otherwise will be equal to  min(num_uniq, num_ranks)
#
### list_lengths: (vector, positive) vector of individual list lengths. Only required if safe_mode == FALSE, otherwise will be equal to unlist(lapply(apply(apply(dat,1,is.na),2,which),"[",1)) 
#
### CA_param_counts_by_rank: (vector, integer, positive) vector of counts that the missing element of 'param' is ranked at each stage. Only required if 
# 'param' contains a missing element, that missing element is an item, and safe_mode == FALSE. Otherwise, will be equal to 
# c(colSums(obs_weights * (dat == as.numeric(CA_flag)), na.rm = TRUE),0)[1:max_columns]
#
### Value
#A vector as long as 'CA_param', with each element giving the weighted sum of negative of the BPL log-likelihood function (Equation 3.2) over the n observations, 
# weighted by obs_weights, after plugging in each element of 'CA_param' into the missing value of 'param'. If 'param' has no missing elements, 
# then this just returns the negative BPL log-likelihood


BPL_neg_loglik_CA = function(param,
                             CA_param,
                             dat,
                             obs_weights, 
                             safe_mode = TRUE, 
                             num_uniq = NULL, 
                             num_obs = NULL, 
                             num_ranks = NULL, 
                             max_columns = NULL,
                             list_lengths = NULL, 
                             CA_param_counts_by_rank = NULL) {
  
  #name of parameter to grid-search over
  CA_flag = names(param)[which(is.na(param))];
  #length of grid
  length_CA_param = length(CA_param);
  #augment the data with a column of NAs so that there is always room to add a zero after the end of an actual list
  aug_dat = cbind(dat,NA);
  #conduct the checks that all inputs are as expected
  if(safe_mode) {
    num_obs = nrow(aug_dat);
    num_ranks = ncol(aug_dat);
    num_uniq = sum(!is.na(unique(as.numeric(dat))));
    max_columns = min(num_uniq, num_ranks);
    list_lengths = unlist(lapply(apply(apply(aug_dat,1,is.na),2,which),"[",1)) - 1;
    num_uniq = sum(!is.na(unique(as.numeric(dat))));
    if(length(CA_flag) > 1) {stop("coordinate ascent cannot maximize with respect to more than one parameter");}
    if(max(colSums(is.na(dat))) == num_obs) {
      stop("One or more columns of 'dat' contain only NAs");
    }
    if(!isTRUE(all.equal(names(param),c(0:num_uniq,"log_delta1","log_delta2"))) && !isTRUE(all.equal(names(param),c(1:num_uniq,"log_delta1","log_delta2")))) {
      stop("'param' must be named exactly as follows: '0' (optional, if the stopping process is to be modeled), then consecutive positive integer labels, one for each unique item, then 'log_delta1', then 'log_delta2'");
    }
    if(num_uniq != max(dat,na.rm = TRUE)) {stop("Item labels in 'dat' must be consecutively labeled integers starting with 1");}
    if(num_uniq!=length(param) - 2 - ("0"%in%names(param))) {stop("Length of 'param' must be equal to the number of unique items ranked plus 2 (for 'log_delta1' and 'log_delta2') plus 1 (if '0' is modeled");}
    if(length(CA_flag) && !(CA_flag %in% c("0","log_delta1","log_delta2"))) {
      CA_param_counts_by_rank = c(colSums(obs_weights * (dat == as.numeric(CA_flag)), na.rm = TRUE),0)[1:max_columns];#Last rank indicates termination, so no items are ever ranked there
      #unlist(lapply(apply(is.na(log_num),2,which),"length"))
    }
    if("log_delta1"%in%names(param) && CA_flag != "log_delta1" && param["log_delta1"] > 0) {stop("'log_delta1' must be non-positive");}
    if("log_delta2"%in%names(param) && CA_flag != "log_delta2" && param["log_delta2"] > 0) {stop("'log_delta2' must be non-positive");}
    if(length(CA_flag) && CA_flag == "log_delta1" && any(CA_param > 0)) {stop("'log_delta1' must be non-positive");}
    if(length(CA_flag) && CA_flag == "log_delta2" && any(CA_param > 0)) {stop("'log_delta2' must be non-positive");}
  }
  log_item_weights = param[as.character(1:num_uniq)];
  item_weights = exp(log_item_weights);
  delta1 = exp(param["log_delta1"]);
  delta2 = exp(param["log_delta2"]);
  #Insert zero after end of every list to indicate 'stop'
  aug_dat[cbind(1:num_obs,list_lengths + 1)] = 0;
  #Check if fatigue is to be monitored
  if("0"%in%names(param)) {
    log_fatigue_weight = param["0"];
    fatigue_weight = exp(log_fatigue_weight);
  } else {
    log_fatigue_weight = fatigue_weight = 0;
  }
  #Do this if a grid of dampening parameters is provided
  if(length(CA_flag) && (CA_flag %in% c("log_delta1","log_delta2"))) {
    log_num = 
      log_denom = array(0, dim = c(num_obs,num_ranks,length_CA_param));
    log_num[,1,] = log_item_weights[aug_dat[,1]];
    #Denominator is same for everyone at rank 1 (and does not include the constant '1' since no list can stop at rank 1)
    log_denom[,1,] = log(sum(item_weights));
    index_to_loop = 1:max_columns;
    expanded_index_to_loop = matrix(index_to_loop,nrow = length_CA_param, ncol = max_columns, byrow = TRUE);
    if(CA_flag %in% "log_delta1") {
      dampening_matrix = delta2 * matrix(exp(CA_param),nrow = length_CA_param, ncol = max_columns)^(expanded_index_to_loop - 1) + (1-delta2)^(2 * expanded_index_to_loop - 1);
    } else {
      matrix_delta2 = matrix(exp(CA_param), nrow = length_CA_param, ncol = max_columns);
      dampening_matrix = matrix_delta2 * delta1^(expanded_index_to_loop - 1) + (1 - matrix_delta2)^(2 * expanded_index_to_loop - 1);
    }
    dampened_item_weights = array(rep(item_weights, each = prod(dim(dampening_matrix))), dim = c(length_CA_param,num_ranks,num_uniq)) ^ array(dampening_matrix,dim = c(length_CA_param,num_ranks,num_uniq));
    sum_dampened_item_weights = rowSums(dampened_item_weights,dims = 2);
    for(s in index_to_loop[-1]) {
      which_ranked_item = which(aug_dat[,s] > 0);
      which_ended = which(aug_dat[,s] == 0);
      if(length(which_ranked_item)) {
        log_num[which_ranked_item,s,] = tcrossprod(log_item_weights[aug_dat[which_ranked_item,s]],dampening_matrix[,s]);
      }
      if(length(which_ended)) {
        log_num[which_ended,s,] = log_fatigue_weight;
      }
      which_either = which(aug_dat[,s] >= 0);
      num_either = sum(aug_dat[,s] >= 0, na.rm = TRUE);
      dampened_weight_loss = array(dampened_item_weights[,s,][cbind(rep(1:length_CA_param, each = num_either*(s-1)),  rep(as.numeric(aug_dat[which_either,1:(s-1),drop = FALSE]), times = length_CA_param))], dim = c(num_either, s - 1, length_CA_param));
      log_denom[which_either,s,] = log(fatigue_weight +
                                         matrix(sum_dampened_item_weights[,s], nrow = num_either, ncol = length_CA_param, byrow = TRUE) - 
                                         rowSums(aperm(dampened_weight_loss,c(1,3,2)), dims = 2))
    }  
    colSums(obs_weights * log_denom - obs_weights * log_num, dims = 2);
    #Do this if a grid of fatigue parameters is provided
  } else if(length(CA_flag) && (CA_flag == "0")) {
    log_num = 
      log_denom = array(0, dim = c(num_obs,num_ranks,length_CA_param));
    log_num[,1,] = log_item_weights[aug_dat[,1]];
    #Denominator is same for everyone at rank 1 (and does not include the constant '1' since no list can stop at rank 1)
    log_denom[,1,] = log(sum(item_weights));
    index_to_loop = 1:max_columns;
    dampening = (delta2 * delta1^(index_to_loop-1) + (1-delta2)^(2 * index_to_loop - 1));
    for(s in index_to_loop[-1]) {
      which_ranked_item = which(aug_dat[,s] > 0);
      which_ended = which(aug_dat[,s] == 0);
      if(length(which_ranked_item)) {
        log_num[which_ranked_item,s,] = log_item_weights[aug_dat[which_ranked_item,s]] * dampening[s];
      }
      if(length(which_ended)) {
        log_num[which_ended,s,] = rep(CA_param, each = length(which_ended));
      }
      which_either = which(aug_dat[,s] >= 0);
      num_either = sum(aug_dat[,s] >= 0, na.rm = TRUE);
      log_denom[which_either,s,] = log(rep(exp(CA_param), each = num_either) + rep(sum(item_weights^dampening[s]) - rowSums(matrix(item_weights[aug_dat[which_either,1:(s-1),drop = FALSE]]^dampening[s],ncol=s-1),na.rm = TRUE), times = length_CA_param));
    }  
    colSums(obs_weights * log_denom - obs_weights * log_num, dims = 2);
    #Do this if a grid of item weights is provided, or if 'param' is complete and no grid is provided
  } else {
    ##Insert zero after end of every list to indicate 'stop'
    index_to_loop = 1:max_columns;
    log_num = 
      denom_flag = matrix(0,nrow=num_obs,ncol=max_columns);
    denom = matrix(1,nrow=num_obs,ncol=max_columns);
    log_num[,1] = log_item_weights[aug_dat[,1]];
    denom[,1] = sum(item_weights,na.rm = TRUE);
    denom_flag[,1] = 1;
    dampening = (delta2 * delta1^(index_to_loop-1) + (1-delta2)^(2 * index_to_loop - 1));
    as_numeric_CA_flag = as.numeric(CA_flag);
    for(s in index_to_loop[-1]) {
      which_ranked_item = which(aug_dat[,s] > 0);
      which_ended = which(aug_dat[,s] == 0);
      if(length(which_ranked_item)) {
        log_num[which_ranked_item,s] = log_item_weights[aug_dat[which_ranked_item,s]] * dampening[s];
      }
      if(length(which_ended)) {
        log_num[which_ended,s] = log_fatigue_weight;
      }
      which_either = which(aug_dat[,s] >= 0);
      aug_dat_reduced = aug_dat[which_either,1:(s-1),drop = FALSE];
      dampened_item_weights = item_weights ^ dampening[s];
      denom[which_either,s] = 
        fatigue_weight + 
        sum(dampened_item_weights, na.rm = TRUE) - 
        rowSums(matrix(dampened_item_weights[aug_dat_reduced], ncol = s-1), na.rm = TRUE);
      if(length(CA_flag)) {
        denom_flag[which_either,s] = 1 - rowSums(aug_dat_reduced == as_numeric_CA_flag);
      }
    }  
    dampening_denom_flag = denom_flag * matrix(dampening, nrow = num_obs, ncol = max_columns, byrow = TRUE) + (1 - denom_flag);
    #delta1_denom_flag = delta1^(denom_flag * matrix(0:(ncol(denom_flag)-1),nrow=nrow(denom_flag),ncol=ncol(denom_flag),byrow = TRUE));
    if(length(CA_flag)) {
      rowSums(matrix(obs_weights, nrow = length_CA_param, ncol = prod(dim(denom)), byrow = TRUE) * log(tcrossprod(rep(1,length_CA_param),as.numeric(denom)) + tcrossprod(exp(CA_param),as.numeric(denom_flag))^tcrossprod(rep(1,length_CA_param),as.numeric(dampening_denom_flag)))) - 
        sum(obs_weights * log_num,na.rm = TRUE) - 
        sum(CA_param_counts_by_rank * dampening) * CA_param;
    } else {
      sum(obs_weights * log(denom)) - sum(obs_weights * log_num);
    }
  }
}

# function name and purpose: 'over_identified_cost' is a self-contained check to see if the parameter violates any of 
# the constraints needed to identify a BPL-type model. It is designed to accompany 'BPL_neg_loglik_CA', thus the user may provide
# a vector 'CA_param' to separately determine whether any of the elements of 'CA_param' result in a constraint violation. 
#
# author: Phil Boonstra (philb@umich.edu)
#
# date: 3/5/2019, 10:00am EST
#
# Function inputs:
#
### param: (vector with 'names' attribute, numeric) the parameter vector to evaluate with respect to the data. Must be named exactly 
#as follows: '0' (optional, if the stopping process is to be modeled), then consecutive positive integer labels, one for each 
#unique item, then 'log_delta1', then 'log_delta2'. Up to one element of 'param' can be missing (NA), in which case CA_param
# should be non-missing
#
### CA_param (vector, numeric) a grid of values to plug in for the missing element of 'param'. The function will return a vector
# of negative log-likelihoods as long as this grid
#
### safe_mode: (logical) if TRUE, then perform error checks to make sure that all inputs are as expected. Set to FALSE to speed things up a bit
#
### tiny_positive: (numeric, positive) self-explanatory
#
### Value
# A vector as long as 'CA_param', if it is provided and non-missing, otherwise a scalar if 'param' is fully observed. Each element will 
# be 0 if the function determines that the BPL is identified under this configuration, or Inf otherwise. 

over_identified_cost = function(param,
                                CA_param, 
                                safe_mode = TRUE,
                                tiny_positive = .Machine$double.eps^0.5) {
  #name of parameter to grid-search over
  CA_flag = names(param)[which(is.na(param))];
  item_names = intersect(1:length(param), names(param));
  if(safe_mode) {
    if(tiny_positive <= 0) {warning("In 'over_identified_cost', 'tiny_positive' seems not to be a tiny positive...");}
    if("log_delta1"%in%names(param) && CA_flag != "log_delta1" && param["log_delta1"] > 0) {stop("'log_delta1' must be non-positive");}
    if("log_delta2"%in%names(param) && CA_flag != "log_delta2" && param["log_delta2"] > 0) {stop("'log_delta2' must be non-positive");}
    if(length(CA_flag) && CA_flag == "log_delta1" && any(CA_param > 0)) {stop("'log_delta1' must be non-positive");}
    if(length(CA_flag) && CA_flag == "log_delta2" && any(CA_param > 0)) {stop("'log_delta2' must be non-positive");}
  }
  #This block handles the case that CA_flag is an actual item
  if(length(CA_flag) && !(CA_flag %in% c("0","log_delta1","log_delta2"))) {
    theta_min = pmin(min(param[item_names],na.rm = TRUE), CA_param);
  } else if(length(CA_flag) && (CA_flag %in% c("0","log_delta1","log_delta2"))) {
    theta_min = rep(min(param[item_names]),length(CA_param));
  } else {
    #Otherwise we assume param is completely provided
    theta_min = min(param[item_names]);
  }
  (abs(theta_min) > tiny_positive | theta_min < 0) / tiny_positive;
}

# function name and purpose: 'seamless_L0' counts the effective number of non-zero parameters in a given BPL-type model
# for a given value of tau. 
#
# author: Phil Boonstra (philb@umich.edu)
#
# date: 3/5/2019, 10:00am EST
#
# Function inputs:
#
### param: (vector with 'names' attribute, numeric) the parameter vector to evaluate with respect to the data. Must be named exactly 
#as follows: '0' (optional, if the stopping process is to be modeled), then consecutive positive integer labels, one for each 
#unique item, then 'log_delta1', then 'log_delta2'. Up to one element of 'param' can be missing (NA), in which case CA_param
# should be non-missing
#
### CA_param (vector, numeric) a grid of values to plug in for the missing element of 'param'. The function will return a vector
# of negative log-likelihoods as long as this grid
#
### tau (numeric, non-negative) When positive, this makes the count smoothly varying between 0 and 1. 
#
### penalty_pow (numeric, positive) defaults to 1. This was not changed in the paper, but it is the power to which
# both 'tau' and each parameter are raised, which tends to make the shape of the seamless penalty more ridge-like close to zero. 
#
### safe_mode: (logical) if TRUE, then perform error checks to make sure that all inputs are as expected. Set to FALSE to speed things up a bit
#
### num_uniq: (integer, positive) number of unique items in 'dat'. Only required if safe_mode == FALSE, otherwise will be equal to sum(!is.na(unique(as.numeric(dat))))
#
### Value
# A vector as long as 'CA_param', if it is provided and non-missing, otherwise a scalar if 'param' is fully observed. Each element will 
# be 0 if the function determines that the BPL is identified under this configuration, or Inf otherwise. 

seamless_L0 = function(param,
                       CA_param,
                       tau, 
                       penalty_pow = 1,
                       safe_mode = TRUE,
                       num_uniq = NULL) {
  #name of parameter to grid-search over
  CA_flag = names(param)[which(is.na(param))];
  item_names = intersect(1:length(param), names(param));
  fatigue_param_exists = ("0"%in%names(param));
  if(safe_mode) {
    num_uniq = length(item_names);
    if("log_delta1"%in%names(param) && CA_flag != "log_delta1" && param["log_delta1"] > 0) {stop("'log_delta1' must be non-positive");}
    if("log_delta2"%in%names(param) && CA_flag != "log_delta2" && param["log_delta2"] > 0) {stop("'log_delta2' must be non-positive");}
    if(length(CA_flag) && CA_flag == "log_delta1" && any(CA_param > 0)) {stop("'log_delta1' must be non-positive");}
    if(length(CA_flag) && CA_flag == "log_delta2" && any(CA_param > 0)) {stop("'log_delta2' must be non-positive");}
    if(penalty_pow <= 0) {stop("'penalty_pow' must be strictly positive");}
    if(tau < 0) {stop("'tau' must be strictly positive");}
  }
  #This block handles the case that CA_flag is an actual item
  if(length(CA_flag) && (length_CA_param <- length(CA_param)) > 1) {
    if(!(CA_flag %in% c("0","log_delta1","log_delta2"))) {
      theta_min = pmin(min(param[item_names],na.rm = TRUE), CA_param);
      delta1 = exp(param["log_delta1"]);
      delta2 = exp(param["log_delta2"]);
      #foo is a three-dimensional array: 
      #first dimension (rows) has length equal to length of CA_param
      #second dimension (columns) has length equal to length of item_names
      #third dimension also has length equal to number of items to rank and corresponds to rank
      foo = array(rep(sapply(param[item_names],"-",theta_min), each = num_uniq), 
                  dim = c(length_CA_param, num_uniq, num_uniq),
                  dimnames = list(NULL,item_names,item_names));
      foo[,,CA_flag] = CA_param - theta_min; 
      foo3 = array(rep(0:(num_uniq-1),each = length_CA_param) ,dim = dim(foo));
      foo4 = foo * (delta2 * (delta1 ^ foo3) + (1 - delta2)^(2 * foo3 + 1));
      #Final count:
      result = fatigue_param_exists +
        log2(1/(1 + abs(tau/param["log_delta1"])^penalty_pow) + 1) + 
        log2(1/(1 + abs(tau/param["log_delta2"])^penalty_pow) + 1) + 
        rowMeans(rowSums(log2(1/(1+(tau/foo4)^penalty_pow) + 1), dims = 2));
      #This block handles the case that CA_flag is log_delta1
    } else if(CA_flag == "log_delta1") {
      theta_min = rep(min(param[item_names]), length(CA_param));
      delta1 = exp(CA_param);
      delta2 = exp(param["log_delta2"]);
      foo = array(rep(sapply(param[item_names], "-", theta_min), each = num_uniq), 
                  dim = c(length_CA_param, num_uniq, num_uniq),
                  dimnames = list(NULL, item_names, item_names));
      #foo is a 3-dimensional array: 
      #first dimension has length equal to length of CA_param
      #second dimension has length equal to number of items to rank and corresponds to rank (this is the dimension that CA is exploring)
      #third dimension also has length equal to number of items to rank 
      foo2 = matrix(0:(num_uniq-1),nrow = length_CA_param, ncol = num_uniq, byrow = TRUE);
      foo3 = matrix(delta1,nrow = length_CA_param, ncol = num_uniq);
      foo4 = foo * array(delta2 * foo3 ^ foo2 + (1 - delta2)^(2 * foo2 + 1), dim = dim(foo));
      #Final count:
      result = fatigue_param_exists + 
        log2(1/(1 + abs(tau/CA_param)^penalty_pow) + 1) + 
        log2(1/(1 + abs(tau/param["log_delta2"])^penalty_pow) + 1) + 
        rowSums(rowMeans(log2(1/(1+(tau/foo4)^penalty_pow)+1), dims = 2));
      #This block handles the case that CA_flag is log_delta2
    } else if(CA_flag == "log_delta2") {
      theta_min = rep(min(param[item_names]), length(CA_param));
      delta1 = exp(param["log_delta1"]);
      delta2 = exp(CA_param);
      foo = array(rep(sapply(param[item_names], "-", theta_min), each = num_uniq), 
                  dim = c(length_CA_param, num_uniq, num_uniq),
                  dimnames = list(NULL, item_names, item_names));
      #foo is a 3-dimensional array: 
      #first dimension has length equal to length of CA_param
      #second dimension has length equal to number of items to rank and corresponds to rank (this is the dimension that CA is exploring)
      #third dimension also has length equal to number of items to rank 
      foo2 = matrix(0:(num_uniq-1),nrow = length_CA_param, ncol = num_uniq, byrow = TRUE);
      foo3 = matrix(delta2,nrow = length_CA_param, ncol = num_uniq);
      foo4 = foo * array(foo3 * delta1 ^ foo2 + (1 - foo3)^(2 * foo2 + 1), dim = dim(foo));
      #Final count:
      result = fatigue_param_exists + 
        log2(1/(1 + abs(tau/param["log_delta1"])^penalty_pow) + 1) + 
        log2(1/(1 + abs(tau/CA_param)^penalty_pow) + 1) + 
        rowSums(rowMeans(log2(1/(1+(tau/foo4)^penalty_pow)+1),dims=2));
    } else if(CA_flag == "0") {
      delta1 = exp(param["log_delta1"]);
      delta2 = exp(param["log_delta2"]);
      #
      foo = matrix(param[item_names] - min(param[item_names]), nrow = num_uniq, ncol = num_uniq, byrow = TRUE);
      foo2 = 0:(num_uniq-1);
      foo3 = foo * matrix((delta2 * delta1 ^ (foo2) + (1 - delta2) ^ (2 * foo2 + 1)), nrow = num_uniq, ncol = num_uniq);
      #Final count:
      result = fatigue_param_exists + 
        log2(1/(1 + abs(tau/param["log_delta1"])^penalty_pow) + 1) + 
        log2(1/(1 + abs(tau/param["log_delta2"])^penalty_pow) + 1) + 
        mean(rowSums(log2(1/(1+(tau/foo3)^penalty_pow) + 1)));
    } 
  } else {
    #Otherwise we assume param is completely provided
    if(length(CA_flag)) {
      param[CA_flag] = CA_param;
    }
    theta_min = min(param[item_names]);
    delta1 = exp(param["log_delta1"]);
    delta2 = exp(param["log_delta2"]);
    #
    foo = matrix(param[item_names] - min(param[item_names]), nrow = num_uniq, ncol = num_uniq, byrow = TRUE);
    foo2 = 0:(num_uniq-1);
    foo3 = foo * matrix((delta2 * delta1 ^ (foo2) + (1 - delta2) ^ (2 * foo2 + 1)), nrow = num_uniq, ncol = num_uniq);
    result = fatigue_param_exists +
      log2(1/(1 + abs(tau/param["log_delta1"])^penalty_pow) + 1) + 
      log2(1/(1 + abs(tau/param["log_delta2"])^penalty_pow) + 1) + 
      mean(rowSums(log2(1/(1+(tau/foo3)^penalty_pow) + 1)));
  }
  result;
} 

# function name and purpose: 'penRank_path' calculates the full solution path of a penalized BPL model as well as estimating
# the optimal penalization as given by that minimizing the small-sample AIC and the BIC.
#
# author: Phil Boonstra (philb@umich.edu)
#
# date: 3/5/2019, 10:00am EST
#
# Function inputs:
#
### dat: (data.frame, integers, positive) A data.frame of ordered lists of positive integers. Each row is a different list, each column 
# is a rank, each cell is the integer label of an item. So for example the [1,1] element gives the label of the item that the first 
# ranker ordered first,  i.e. ranked highest. The constraints are as follows: If any cell is non-missing, then every cell in that same
# row to the left should also be non-missing. Second, for any number appearing anywhere in this data.frame, every single number that is 
# less than this number should appear at least once somewhere (not necessarily in the same list)
#
### obs_weights: (vector, integers, positive) A vector with length equal to the number of rows of 'dat' giving the number of observations that
# each list represents. This is gives a simple way to considerably speed up the calculation if there are a large number of duplicated lists. Just
# provide a vector of 1's if each list represents a single ranker
#
### fixed: (either NULL or matrix with dimnames[[1]] being a subset of c('0', positive integers that are item labels, 'log_delta1', 
# and 'log_delta2')). Each column is a set of fixed parameter values for the respective rowname. Leave any element of a column missing
# that you don't want to fix. So, for example, fixed = matrix(c(0, NA, 0, NA, 3, 3), nrow = 2,  byrow = T, dimnames = list(c("0","2"),NULL)),
# means that you want to run the algorithm when the fatigue parameter (theta0) is fixed at 0 and everything else is estimated, and you 
# want to run the algorithm when the item 2 parameter (theta2) is fixed at 3 and everything else is estimated, and you want to run the
# algorithm when both theta0 and theta2 are fixed at 0 and 3, respectively, with everything else to be estimated. This is all duplicated
# for each set of initial values provided. So, if num_init=2, this will imply six separate runs of the algorithm. 
#
### num_inits: (integer, positive) the number of initial starting values
#
### conv_tol: (numeric, positive) covergence tolerance; looking back over the previous 'stable_reps' (default 5) iterations 
# of the coordinate ascent algorithm, if the maximum absolute difference between any current parameter value and the previous
# 'stable_reps' values of the same parameter is no more than 'conv_tol', then numerical convergence is satisifed (as long as 
# at least 'min_reps' [default 5] iterations have been run) 
#
### tau (numeric, non-negative) When positive, this makes the count smoothly varying between 0 and 1. 
#
### num_lambda (integer, positive) the maximum length of the grid of lambda values to evaluate over. The algorithm will move 
# increasing order, starting with the smallest value of lambda, but not all lambdas will necessarily be evaluated, however; 
# the algorithm will break after a minimum AIC is reached and it encounters a lambda that yields an AIC increase of 10% relative 
# to the minimum. See 'lambda_min_ratio'
#
### lambda_min_ratio (numeric, positive) if 'lambda_seq' is not provided, the algorithm will calculate its own. First, it
# estimates lambda_max that results in the most parsimonious model, then calculates lambda_min = lambda_max * lambda_min_ratio * tau^penalty_pow, 
# and then take the equal spaced (on the log-scale) sequence of values between these two extremes. 
#
### lambda_seq (numeric, positive) optional user-provided set of tuning parameters lambda. If not provided, the algorithm will
# calculate its own set.
#
### consider_truncating_lambda_seq (logical) defaults to TRUE. The algorithm starts with the smallest value of lambda and then 
# calculates the solution path for increasing lambda. It is expected that the AIC will go down but start to go up as lambda gets
# very large. Let lambda_best be the value in lambda_seq that gives the smallest (best) AIC. If consider_truncating_lambda_seq == TRUE, 
# the algorithm will truncate the all values in lambda_seq greater than the smallest value of lambda_star such that 
# (i) lambda_star > lambda_min and (ii) AIC(lambda_start) > 1.1 * AIC(lambda_min). 
#
### penalty_pow (numeric, positive) defaults to 1. This was not changed in the paper, but it is the power to which
# both 'tau' and each parameter are raised, which tends to make the shape of the seamless penalty more ridge-like close to zero. 
#
### proposal_seq (vector) the proposal sequence (Gamma in the manuscript) to use. The default behavior is as described in the manuscript. 
# If provided, at a minimum 'proposal_seq' should be a set of at least two numbers, with at least one element >= conv_tol and one element 
# <= -conv_tol. 
#
### proposal_seq_half_length (integer, positive) an alternative to specifying 'proposal_seq'. This tells the algorithm to use as
# the set of positive numbers in the proposal sequence the set of equal log-spaced values between 'conv_tol' and 'max_proposal_seq'. 
#
### max_proposal_seq (numeric, positive) the largest element of 'proposal_seq' to use
#
### stable_reps (integer, positive) the number of iterations to keep track of and look backwards over to determine when 
# convergence has been achieved. See also 'conv_tol'
#
### min_reps (integer, positive) the minimum number of iterations to conduct, regardless of convergence
#
### multivariable_proposals (logical) should the multivariable proposals described in the manuscript be conducted? Some people are
# concerned about introducing a stochastic element into the CA algorithm, and setting this to FALSE removes the stochastic element of the
# optimization. Empirically, leaving it as TRUE speeds up convergence without changing any results. 
#
### delta_lb (numeric, positive) this is a parameter that provides a hard lower bound for the values of delta1 and delta2. In theory this
# should not be needed, i.e. it should just be left at its natural lower bound of 0, but the algorithm seemed to misbehave sometimes without 
# a positive lower bound. 
#
### max_reps_kgt1 (integer, positive) the maximum allowed number of iterations for all but the smallest value of lambda. 
#
### max_reps_keq1 (integer, positive) the maximum allowed number of iterations for the smallest value of lambda. 
#
### random_seed (integer, positive) the random seed to use for initializing the starting parameter values
#
### verbose (logical) if TRUE, the algorithm's progress is communicated. Tends to slow down everything
#
### safe_mode: (logical) if TRUE, then perform error checks to make sure that all inputs are as expected. Set to FALSE to speed things up a bit
#
### init_params: (data.frame with 'row.names' attribute) the parameter vector to evaluate with respect to the data. The number of 
# columns implicitly gives the number of starting values to look over. Must have rownames exactly as follows: '0' (optional, if the stopping process 
# is to be modeled), then consecutive positive integer labels, one for each unique item, then 'log_delta1', then 'log_delta2'
#
### Value
#
###A list of many results from the fitting process:
## control = a list of the parameters used by the algorithm 
## monitor_joint_shifts = a matrix with dimension c(length(lambda_seq), 4), giving the counts of the number of each type of multivariable
# proposal that was accepted. This is just to see if the multivariable proposals are actually offering sensible proposals
## hit_max_reps = matrix with num_inits * ncol(fixed) rows and num_lambda columns, indicating when the algorithm quit due to hitting 
# the maximum allowed number of iterations (either max_reps_keq1 for the first lambda or max_reps_kgt1 for other lambdas) instead
# of converging?
## actual_reps = matrix with the same dimension as hit_max_reps indicating the number of iterations till converging or quitting
## num_eff_params = matrix with the same dimension as hit_max_reps giving the effective number of parameters, equivalently the L0 penalty
# before being scaled by lambda
## num_actual_params = atrix with the same dimension as hit_max_reps giving the actual number of parameters in the model
## all_neg_loglikelihood = matrix with the same dimension as hit_max_reps giving the negative log-likelihood values
## all_bic = matrix with the same dimension as hit_max_reps giving the bic values
## all_aic = matrix with the same dimension as hit_max_reps giving the aic values
## all_params = list as long as num_inits * ncol(fixed), with each element being a matrix with number of rows equal to the largest possible
# number of parameters and number of columsn equal to the number of lambdas evaluated. 
## best_fit_neg_loglikelihood = matrix with number of rows equal to the largest possible number of parameters and number of columns equal to
# num_inits * ncol(fixed), giving the best solution according to maximized log-likelihood
## best_neg_loglikelihood = matrix with four rows and number of columns equal to num_inits * ncol(fixed), giving the value of the negative
# log-likelihood, the value of lambda, the number of effective parameters, and the number of actual parameters corresponding to the fit with
# the largest log-likelihood
## best_fit_bic = similar to best_fit_neg_loglikelihood but using BIC as the objective function
## best_bic = similar to best_neg_loglikelihood but using BIC as the objective function
## best_fit_aic = imilar to best_fit_neg_loglikelihood but using AIC as the objective function 
## best_aic =  similar to best_neg_loglikelihood but using AIC as the objective function
## runtime = the total runtime of the function

penRank_path = function(dat, 
                        obs_weights = rep(1, nrow(dat)), 
                        fixed = NULL, 
                        num_inits = 4, 
                        conv_tol = 1e-3, 
                        tau = NULL,
                        num_lambda = 200, 
                        lambda_min_ratio = 1e-5, 
                        lambda_seq = NULL, 
                        consider_truncating_lambda_seq = TRUE,
                        penalty_pow = 1,
                        proposal_seq = NULL, 
                        proposal_seq_half_length = NULL,
                        max_proposal_seq = 1,
                        stable_reps = 5, 
                        min_reps = 5,
                        multivariable_proposals = TRUE,
                        delta_lb = 0.25, 
                        max_reps_kgt1 = 1e3, 
                        max_reps_keq1 = 250, 
                        random_seed = sample(.Machine$integer.max,1),
                        verbose = FALSE, 
                        safe_mode = FALSE, 
                        init_params = NULL) {
  
  # + Initialize ----
  begin = Sys.time();
  min_reps = max(min_reps, stable_reps);
  #All parameter estimates are always rounded to the specified convergence tolerance
  sig_digits = ceiling(abs(log10(conv_tol)));
  #Translate the lower bound of the dampening function's parameters appropriately;
  log_delta_lb = floor(log(delta_lb) * (10^sig_digits))/(10^sig_digits);
  #Numerical tolerance for zero;
  tiny_positive = .Machine$double.eps^0.5;
  num_obs = nrow(dat);
  num_ranks = ncol(dat) + 1;
  num_uniq = sum(!is.na(unique(as.numeric(dat))));
  max_columns = min(num_uniq, num_ranks);
  item_names = as.character(1:num_uniq);
  if(any(is.na(dat))) {
    param_names = c("0",item_names,"log_delta1","log_delta2");
  } else {
    param_names = c(item_names,"log_delta1","log_delta2");
  }
  
  #Check for invalid / problematic arguments
  if(tiny_positive > conv_tol || conv_tol > 1) {stop(c("'conv_tol' should be between 'tiny_positive', i.e. ",formatC(tiny_positive,format="e",digits = 2),", and 1"));}
  if(num_obs != length(obs_weights)) {stop("'obs_weights' must have length equal to 'nrow(dat)'");}
  if(num_uniq != max(dat,na.rm = TRUE)) {stop("Item labels in 'dat' must be consecutively labeled integers starting with 1");}
  if(!isTRUE(all(apply(apply(is.na(cbind(dat,NA)),1,cumsum),2,diff) | (apply(is.na(dat),1,cumsum) == 0))) | 
     isTRUE(any(apply(dat,1,duplicated) & !apply(dat,1,is.na)))) {
    stop("'dat' must contain uninterrupted left-to-right sets of unique integers, i.e. no NAs between items");
  }
  if(max(colSums(is.na(dat))) == num_obs) {
    stop("One or more columns of 'dat' contain only NAs");
  }
  if(!is.null(init_params) && !all(rownames(init_params) == param_names)) {stop("If provided, 'init_params' must have rownames exactly as follows: '0' (but only if any lists stop early), then consecutive positive integer labels, one for each unique item, then 'log_delta1', then 'log_delta2'");}
  if(!is.null(init_params) && sum(init_params[item_names,] < 0)) {stop("There are negative item weights in the user-provided matrix of 'init_params'; all item weights should be non-negative")}
  if((is.null(rownames(fixed)) || !all(rownames(fixed)%in%param_names)) && !is.null(fixed)) {stop("'fixed' must be NULL or a matrix with rownames being a subset of the parameter names");}
  if(is.null(proposal_seq) && !is.null(max_proposal_seq) && max_proposal_seq < conv_tol) {stop("'max_proposal_seq' must be greater than 'conv_tol'");}
  if(is.null(proposal_seq) && !is.null(proposal_seq_half_length) && (proposal_seq_half_length < 1 || proposal_seq_half_length %% 1 != 0)) {stop("'proposal_seq_half_length' must be a positive integer");}
  if(!is.null(proposal_seq) && !(any(proposal_seq <= -conv_tol) && any(proposal_seq >= conv_tol))) {stop(paste0("At a minimum, 'proposal_seq' should be a set of at least two numbers, with at least one element >= ", formatC(conv_tol,format="e", digits = 2), " and one element <= ",  formatC(-conv_tol,format="e", digits = 2)));}
  if(penalty_pow <= 0) {stop("'penalty_pow' must be strictly positive");}
  if(!is.null(tau) && tau <= 0) {stop("'tau', if provided, must be strictly positive");}
  
  #Additional setup
  if(is.null(tau)) {
    tau = conv_tol;
  }
  if(!is.null(lambda_seq)) {
    num_lambda = length(lambda_seq);
  } 
  length_params = length(param_names);
  list_lengths = unlist(lapply(apply(apply(cbind(dat,NA),1,is.na),2,which),"[",1)) - 1;
  sum_obs_weights = sum(obs_weights);
  
  #If fixed is null, then all parameters are to be estimated
  if(is.null(fixed)) {
    fixed = matrix(numeric(0));
    #If init_params is provided but does not match num_inits, force it to be so;
    if(!is.null(init_params) && ncol(init_params) != num_inits) {
      num_inits = ncol(init_params);
      cat("Forcing 'num_inits = ncol(init_params)'\n");
    }
    #If init_params is provided but does not match num_inits, force it to be so and note that any provided initial values that are also in fixed will be overwritten
  } else if(!is.null(init_params) && ncol(init_params) != num_inits) {
    num_inits = ncol(init_params);
    cat("Forcing 'num_inits = ncol(init_params)' and overwriting all values in rows of 'init_param' that are provided in matched rows of 'fixed'\n");
  }
  if(!(length(multivariable_proposals) %in% c(1, num_inits))) {stop("'multivariable_proposals' should either be a single logical or a vector of logicals as long as the number of initial values ('num_inits')");}
  multivariable_proposals = rep(multivariable_proposals, length = num_inits * ncol(fixed));
  
  #Set up 'init_params'
  set.seed(random_seed);  
  if(is.null(init_params)) {
    init_params = matrix(0, nrow = length_params,ncol = num_inits * ncol(fixed), dimnames = list(param_names,NULL));
    if("0"%in%param_names) {
      init_params["0", ] = rnorm(num_inits, sd = 2.5);
    }
    init_params[item_names, ] = rexp(num_uniq * num_inits, rate = 0.5);
    init_params["log_delta1",] = rep(log(runif(num_inits, delta_lb, 1)), each = ncol(init_params) / num_inits);
    init_params["log_delta2",] = rep(log(runif(num_inits, delta_lb, 1)), each = ncol(init_params) / num_inits);
  } else {
    #If 'init_params' is provided and fixed is also provided, 'init_params' will be replicated, once for each column of fixed
    init_params = matrix(rep(init_params,ncol(fixed)),
                         nrow = length_params,
                         dimnames = list(rownames(init_params), NULL));
  }
  #'fixed_indicators' matches the dimensions of 'init_params' and indicates which parameters in which column are to be left alone, i.e. fixed
  fixed_indicators = matrix(0, nrow = nrow(init_params), ncol = ncol(init_params), dimnames = list(param_names,NULL));
  if(length(fixed)) {
    for(m in rownames(fixed)) {
      foo = rep(fixed[m,], each = num_inits);
      #This is the overwriting step; any fixed parameters are initialized as such and left alone
      init_params[m,] = ifelse(is.na(foo),init_params[m,], foo);
      fixed_indicators[m,] = ifelse(is.na(foo),fixed_indicators[m,], 1);
    }
    rm(foo);
  } 
  #Start off everything so that the smallest initial item weight is always exactly zero, while ensuring that fixed parameters are not overwritten
  if(!any(item_names %in% rownames(fixed))) {
    init_params[item_names,] = init_params[item_names,,drop = FALSE] - matrix(apply(init_params[item_names,,drop = FALSE],2,min),nrow = num_uniq, ncol = ncol(init_params),byrow = TRUE);
  } else {
    for(i in 1:ncol(init_params)) {
      item_names_to_estimate = setdiff(item_names,param_names[which(fixed_indicators[,i] == 1)]);
      init_params[item_names_to_estimate,i] = init_params[item_names_to_estimate,i,drop = FALSE] - min(init_params[item_names,i,drop = FALSE]);
    }
    rm(item_names_to_estimate);
  }
  #Check that the dampening parameters are in their proper support; note: the random initial values are constructed to always pass this check
  if(any(init_params["log_delta1",] > 0) || any(init_params["log_delta1",] < log_delta_lb)) {stop("All initial values of 'log_delta1' must be in ['log_delta_lb', 0]");} 
  if(any(init_params["log_delta2",] > 0) || any(init_params["log_delta2",] < log_delta_lb)) {stop("All initial values of 'log_delta2' must be in ['log_delta_lb', 0]");} 
  init_params = round(init_params, sig_digits);
  
  #Calculate this now to avoid re-calculating at each call to the lower-level functions
  all_item_counts_by_rank = matrix(NA, nrow = num_uniq, ncol = max_columns, dimnames = list(item_names, NULL));
  for(m in as.numeric(item_names)) {  
    all_item_counts_by_rank[m,] = c(colSums(obs_weights * (dat == m), na.rm = TRUE), 0)[1:max_columns]
  }
  
  store_bic = store_aic = matrix(Inf, nrow = num_inits * ncol(fixed), ncol = num_lambda);
  hit_max_reps = actual_reps = matrix(0, nrow = num_inits * ncol(fixed), ncol = num_lambda);
  best_fit_bic = best_fit_aic = best_fit_neg_loglikelihood = 
    init_params * 0;
  store_params = vector("list",num_inits * ncol(fixed));
  
  if(is.null(proposal_seq)) {
    if(is.null(proposal_seq_half_length)) {
      proposal_seq_half_length = max(2, sig_digits);
    }
    proposal_seq = round(exp(seq(log(conv_tol), log(max_proposal_seq), length = proposal_seq_half_length)), sig_digits);
    proposal_seq = sort(c(-proposal_seq, proposal_seq));
  } 
  
  positive_proposal_seq = proposal_seq[proposal_seq > tiny_positive];
  
  #The initial 'lambda_seq' is is equally spaced (on the log-scale) with distance 1/2, and then after an initial 
  #lambda_max is identified, it is further refined by looking at equally spaced increments of 1/8 above and below
  if(is.null(lambda_seq)) {
    #Remember all of the true values of these tuning parameters; we will reset them after 'lambda_seq' has been constructed
    identifying_max_lambda = 
      first_pass = TRUE;
    num_lambda_requested = num_lambda;
    stable_reps_actual = stable_reps;
    min_reps_actual = min_reps;
    tau_actual = tau;
    stable_reps = min_reps = 3; 
    tau = min(1e-2, tau);
    construction_message = "\n Constructing 'lambda_seq' (first pass) \n";
    lambda_seq = exp(seq(log(length_params * sum_obs_weights),log(length_params * sum_obs_weights) + 2, length = 5)); 
  } else {
    identifying_max_lambda = FALSE;
  }
  #There are potentially multiple passes through this top loop. All but the last pass are for constructing the value of 'lambda_seq'.
  #Thus if it's provided, there will only be one pass through this top loop
  while(T) {
    num_lambda = length(lambda_seq);
    store_num_eff_params = 
      store_num_actual_params = 
      store_neg_loglikelihood = matrix(Inf, nrow = num_inits * ncol(fixed), ncol = num_lambda);
    #the values of j_start and j_end depend upon whether this 'lambda_seq' is still being constructed or if this is the actual
    #solution path. If the former, then we don't care about the multiple initial starts and only focus on the first. 
    j_start = ifelse(identifying_max_lambda, which.min(colSums(fixed_indicators)), 1);
    j_end = ifelse(identifying_max_lambda, j_start, num_inits * ncol(fixed));
    j = j_start;
    #Count the number of times that a multivariate proposal was accepted
    monitor_joint_shifts = matrix(0,  nrow = 4, ncol = num_lambda, dimnames = list(c("shift up","shift down", "permute estimates","swap delta"),lambda_seq));
    
    #Loop through the initial starting values of the parameters
    for(j in j_start:j_end) {
      min_possible_num_param = seamless_L0(init_params[,j] * fixed_indicators[,j], tau = tau, penalty_pow = penalty_pow, safe_mode = TRUE, num_uniq = num_uniq);
      which_to_estimate = setdiff(param_names,param_names[which(fixed_indicators[,j] == 1)]);
      which_to_estimate_except_dampening = setdiff(which_to_estimate,c("log_delta1","log_delta2"));
      which_dampening_to_estimate = intersect(which_to_estimate,c("log_delta1","log_delta2"));
      which_items_to_estimate = setdiff(which_to_estimate,c("0","log_delta1","log_delta2"));
      num_items_to_estimate = length(which_items_to_estimate);
      
      store_params[[j]] = matrix(NA, nrow = length_params, ncol = num_lambda, dimnames = list(param_names,formatC(lambda_seq,format="f",digits=3)));
      if(!identifying_max_lambda && verbose) {
        cat("\n Initial Value ", j, "\n");
      } else if(verbose) {
        cat(construction_message); 
      }
      param_for_compare = matrix(NA,nrow = length_params, ncol = stable_reps + 1, dimnames = list(param_names,NULL));
      param_for_compare[,1] = init_params[,j];
      k = 1;
      while(k <= num_lambda) {
        if(verbose && !identifying_max_lambda) {cat("\n====",j,"--",k,"====\n");}
        lambda = lambda_seq[k];
        i = curr_column = 1;
        previous_column =  stable_reps + 1;
        #curr_best_cost keeps track of the smallest achieved penalized log-likelihood that we've achieved so far with this lambda;
        #the corresponding parameter values that yield this best cost will be kept in param_for_compare[,curr_column];
        curr_best_cost = BPL_neg_loglik_CA(param_for_compare[,curr_column],NA,dat,obs_weights,safe_mode = safe_mode, num_uniq, num_obs, num_ranks, max_columns, list_lengths) + 
          lambda * seamless_L0(param_for_compare[,curr_column],NA, tau = tau, penalty_pow = penalty_pow, safe_mode = safe_mode, num_uniq = num_uniq) + 
          over_identified_cost(param_for_compare[,curr_column],NA, safe_mode = safe_mode, tiny_positive = tiny_positive);
        while((i <= min_reps) || max(abs(param_for_compare[which_to_estimate,-curr_column] - param_for_compare[which_to_estimate,curr_column])) >= conv_tol - tiny_positive) {
          #First consider coordinate-wise maximization (skipping if the previous full iteration didn't result in any changes)
          if((i <= min_reps) || max(abs(param_for_compare[which_to_estimate,ifelse(previous_column == 1, stable_reps + 1, previous_column - 1)] - 
                                        param_for_compare[which_to_estimate,curr_column])) > tiny_positive) {
            curr_column = 1 + (curr_column)%%(stable_reps + 1);
            previous_column = 1 + (previous_column)%%(stable_reps + 1);
            curr_try = param_for_compare[,previous_column];
            for(m in sample(which_to_estimate)) {
              #start with the current values, and for exactly one parameter, propose a new sequence of values to consider, subject to the natural bounds, 
              #and always ensuring that the 'null' value is proposed  
              if(m %in% c("log_delta1","log_delta2")) {
                CA_curr_try = unique(pmax(log_delta_lb, pmin(0, curr_try[m] + proposal_seq)));
                CA_param_counts_by_rank = NULL;
              } else if(m == "0") {
                CA_curr_try = curr_try[m] + proposal_seq;
                CA_param_counts_by_rank = NULL;
              } else {
                CA_curr_try = unique(pmax(0, c(0, curr_try[m] + proposal_seq)));
                CA_param_counts_by_rank = all_item_counts_by_rank[m,];
              }
              curr_try[m] = NA;
              curr_try_cost = 
                BPL_neg_loglik_CA(curr_try,
                                  CA_curr_try,
                                  dat = dat,
                                  obs_weights = obs_weights,
                                  safe_mode = safe_mode, 
                                  num_uniq = num_uniq, 
                                  num_obs = num_obs, 
                                  num_ranks = num_ranks, 
                                  max_columns = max_columns, 
                                  list_lengths = list_lengths, 
                                  CA_param_counts_by_rank = CA_param_counts_by_rank) + 
                lambda * seamless_L0(curr_try,CA_curr_try, tau = tau, penalty_pow = penalty_pow, safe_mode = safe_mode, num_uniq = num_uniq) + 
                over_identified_cost(curr_try,CA_curr_try, safe_mode = safe_mode, tiny_positive = tiny_positive);
              if(curr_try_cost[which_min_curr_try_cost <- which.min(curr_try_cost)] < curr_best_cost) {
                curr_try[m] = CA_curr_try[which_min_curr_try_cost];
                curr_best_cost = curr_try_cost[which_min_curr_try_cost];
              } else {
                curr_try[m] = param_for_compare[m,previous_column]
              }
            }
            param_for_compare[,curr_column] = curr_try
          } else {
            if(verbose) {cat("skipping univariate CA\n\n");}
            curr_column = 1 + (curr_column)%%(stable_reps + 1);
            previous_column = 1 + (previous_column)%%(stable_reps + 1);
            param_for_compare[,curr_column] = param_for_compare[,previous_column];
          }
          if(multivariable_proposals[j]) {
            #Now consider some multivariable adjustments to discourage getting stuck at local maxima
            shift_up = shift_down = permute_estimates = swap_delta =
              param_for_compare[,curr_column];
            #shift_down: Bring all item weights down by a constant amount (but item parameters should be non-negative); make dampening parameter closer to zero
            shift_down_amount = sample(positive_proposal_seq,size = 1)
            shift_down[which_items_to_estimate] = pmax(0, shift_down[which_items_to_estimate] - shift_down_amount);
            #shift_up: Shift up all positive weights by a constant amount and one random zero weight by that same amount (but only if there is more than one); make dampening parameter closer to zero
            shift_up_amount = sample(positive_proposal_seq,size = 1);
            zero_items = item_names[which(param_for_compare[item_names,curr_column] <= tiny_positive)];
            if(length(zero_items) > 1 && length(estimated_zero_items <- intersect(which_items_to_estimate, zero_items))) {
              items_to_shift_up = c(sample(estimated_zero_items, 1), setdiff(which_items_to_estimate, zero_items));
            } else {
              items_to_shift_up = setdiff(which_items_to_estimate, zero_items);
            }
            shift_up[items_to_shift_up] = shift_up[items_to_shift_up] + shift_up_amount;
            #permute_estimates: randomly permute estimated item weights up or down one place; make dampening parameter closer to zero
            old_order = order(permute_estimates[which_items_to_estimate]);
            new_order = old_order[order((1:num_items_to_estimate) + sample(c(-1,0,1), num_items_to_estimate, replace = TRUE,prob = c(1,2,1)))];
            
            permute_estimates[which_items_to_estimate[old_order]] = permute_estimates[which_items_to_estimate[new_order]];
            rm(new_order,old_order);
            
            if("0" %in% which_to_estimate) {
              shift_up["0"] = shift_up["0"] + sample(proposal_seq,size = 1);
              shift_down["0"] = shift_down["0"] + sample(proposal_seq,size = 1);
              permute_estimates["0"] = permute_estimates["0"] + sample(proposal_seq,size = 1);
            }
            
            if(length(which_dampening_to_estimate)) {
              shift_up[which_dampening_to_estimate] =  
                pmax(log_delta_lb, pmin(0, shift_up[which_dampening_to_estimate] + sample(proposal_seq,size = 1)));
              shift_down[which_dampening_to_estimate] =  
                pmin(0, shift_down[which_dampening_to_estimate] + sample(positive_proposal_seq,size = 1));
              permute_estimates[which_dampening_to_estimate] =  
                pmin(0, permute_estimates[which_dampening_to_estimate] + sample(positive_proposal_seq,size = 1));
            }
            
            #Calculate, store the penalized log-likelihoods of these multivariable proposals.
            shift_down_cost = BPL_neg_loglik(shift_down,dat,obs_weights,fixed = NULL,safe_mode = safe_mode, num_uniq, num_obs, num_ranks, max_columns, list_lengths) + 
              lambda * seamless_L0(shift_down, NA, tau = tau, penalty_pow = penalty_pow, safe_mode = safe_mode, num_uniq = num_uniq) + 
              over_identified_cost(shift_down, NA, safe_mode = safe_mode, tiny_positive = tiny_positive);
            shift_up_cost = BPL_neg_loglik(shift_up,dat,obs_weights,fixed = NULL,safe_mode = safe_mode, num_uniq, num_obs, num_ranks, max_columns, list_lengths) + 
              lambda * seamless_L0(shift_up, NA, tau = tau, penalty_pow = penalty_pow, safe_mode = safe_mode, num_uniq = num_uniq) + 
              over_identified_cost(shift_up, NA, safe_mode = safe_mode, tiny_positive = tiny_positive);
            permute_estimates_cost = BPL_neg_loglik(permute_estimates, dat, obs_weights, fixed = NULL,safe_mode = safe_mode, num_uniq, num_obs, num_ranks, max_columns, list_lengths) + 
              lambda * seamless_L0(permute_estimates, NA, tau = tau, penalty_pow = penalty_pow, safe_mode = safe_mode, num_uniq = num_uniq) + 
              over_identified_cost(permute_estimates, NA, safe_mode = safe_mode, tiny_positive = tiny_positive);
            
            #swap_delta: if both dampening parameters are to be estimated, consider a simpler dampening model that sets log_delta2 = 0 while providing an equivalent amount
            #of dampening in the first two stages
            if(length(which_dampening_to_estimate) == 2) {
              stage1_dampening = exp(sum(swap_delta[c("log_delta1","log_delta2")])) + (1-exp(swap_delta["log_delta2"]))^3;
              swap_delta[c("log_delta1","log_delta2")] =  
                pmax(log_delta_lb, pmin(0,c(round(log(stage1_dampening),sig_digits),0)));
              swap_delta_cost = BPL_neg_loglik(swap_delta,dat,obs_weights,fixed = NULL,safe_mode = safe_mode, num_uniq, num_obs, num_ranks, max_columns, list_lengths) + 
                lambda * seamless_L0(swap_delta, NA, tau = tau, penalty_pow = penalty_pow, safe_mode = safe_mode, num_uniq = num_uniq) + 
                over_identified_cost(swap_delta, NA, safe_mode = safe_mode, tiny_positive = tiny_positive);
            } else {
              swap_delta_cost = Inf;
            }
            
            if(permute_estimates_cost < min(swap_delta_cost, shift_down_cost, shift_up_cost, curr_best_cost)) {
              if(verbose) {cat("permuting estimates\n\n");}
              curr_best_cost = permute_estimates_cost;
              param_for_compare[,curr_column] = permute_estimates;
              monitor_joint_shifts["permute estimates", k] = monitor_joint_shifts["permute estimates", k] + 1;
            } else if(swap_delta_cost < min(shift_down_cost, shift_up_cost, curr_best_cost)) {
              if(verbose) {cat("swapping delta\n\n");}
              curr_best_cost = swap_delta_cost;
              param_for_compare[,curr_column] = swap_delta;
              monitor_joint_shifts["swap delta", k] = monitor_joint_shifts["swap delta", k] + 1;
            } else if (shift_down_cost < min(shift_up_cost, curr_best_cost)) {
              if(verbose) {cat("shifting down\n\n");}
              curr_best_cost = shift_down_cost;
              param_for_compare[,curr_column] = shift_down;
              monitor_joint_shifts["shift down", k] = monitor_joint_shifts["shift down", k] + 1;
            } else if(shift_up_cost < curr_best_cost) {
              if(verbose) {cat("shifting up\n\n");}
              curr_best_cost = shift_up_cost;
              param_for_compare[,curr_column] = shift_up;
              monitor_joint_shifts["shift up", k] = monitor_joint_shifts["shift up", k] + 1;
            }
          }
          if(i >= ifelse(k == 1, max_reps_keq1, max_reps_kgt1)) {
            i = i + 1;
            break;
          }
          i = i + 1;
        }
        if(verbose && !identifying_max_lambda) {
          if(k == 1) { 
            cat(i - 1,formatC((param_for_compare[which_to_estimate,curr_column] - init_params[which_to_estimate,j]),format="f"),"\n");
          } else {
            cat(i - 1,formatC((param_for_compare[which_to_estimate,curr_column] - store_params[[j]][which_to_estimate,k-1]),format="f"),"\n");
          }
        }
        store_num_actual_params[j,k] = seamless_L0(param_for_compare[,curr_column],CA_param = NULL, tau = tiny_positive, penalty_pow = penalty_pow, safe_mode = safe_mode, num_uniq = num_uniq);
        store_num_eff_params[j,k] = seamless_L0(param_for_compare[,curr_column],CA_param = NULL, tau = tau, penalty_pow = penalty_pow, safe_mode = safe_mode, num_uniq = num_uniq);
        #If we are actually calculating the solution path (as opposed to constructing 'lambda_seq'), then store the results
        if(!identifying_max_lambda) {
          store_neg_loglikelihood[j,k] = BPL_neg_loglik(param_for_compare[,curr_column],dat,obs_weights,fixed = NULL,safe_mode = safe_mode, num_uniq, num_obs, num_ranks, max_columns, list_lengths)
          store_bic[j,k] = 2 * store_neg_loglikelihood[j,k] + 
            log(sum_obs_weights) * store_num_actual_params[j,k];
          store_aic[j,k] = 2 * store_neg_loglikelihood[j,k] + 
            2 * store_num_actual_params[j,k] * (sum_obs_weights / pmax(tiny_positive, sum_obs_weights -  store_num_eff_params[j,k] - 1));
          store_params[[j]][,k] =  param_for_compare[,curr_column]; 
          actual_reps[j,k] = i - 1;
          if(i >= ifelse(k == 1, max_reps_keq1, max_reps_kgt1)) {
            hit_max_reps[j,k] = 1;
          }
          if(consider_truncating_lambda_seq && store_aic[j,k] > (1.1 * min(store_aic[j,]))) {
            k = num_lambda;
          } 
        }
        k = k + 1;
      }
      if(!identifying_max_lambda) {
        store_params[[j]] = round(store_params[[j]], sig_digits);
        best_fit_bic[,j] = store_params[[j]][,which.min(store_bic[j,])];
        best_fit_aic[,j] = store_params[[j]][,which.min(store_aic[j,])];
        best_fit_neg_loglikelihood[,j] = store_params[[j]][,which.min(store_neg_loglikelihood[j,])];
      }
    }
    #If we are actually calculating the solution path, then we're done!
    if(!identifying_max_lambda) {
      break;
    } else if(first_pass) {
      #Grow the largest value of lambda until maximum parsimony is achieved
      which_lambda_max = min(c(which(store_num_eff_params[j,1:num_lambda] <= min_possible_num_param + tiny_positive)),num_lambda + 1);
      if(which_lambda_max == 1) {
        lambda_seq = c(exp(seq(log(lambda_seq[1]) - 2.5, log(lambda_seq[1]) - 0.5, length =  5)),lambda_seq);
      } else if(which_lambda_max == num_lambda + 1) {
        lambda_seq = c(lambda_seq, exp(seq(log(lambda_seq[which_lambda_max - 1]) + 0.5, log(lambda_seq[which_lambda_max - 1]) + 2.5, length =  5)));
      } else {
        lambda_seq = exp(seq(log(lambda_seq[1]), log(lambda_seq[which_lambda_max]), by =  1/4));
        first_pass = FALSE;
        construction_message = "\n Constructing 'lambda_seq' (second pass) \n";
      }
      rm(which_lambda_max);
    } else {
      #Refine the value of lambda_max to get a more precise estimate of the smallest value that still yields maximum parsimony
      which_lambda_max = min(c(which(store_num_eff_params[j,1:num_lambda] <= min_possible_num_param + tiny_positive)),num_lambda + 1);
      if(which_lambda_max == 1) {
        log_lambda_max = log(lambda_seq[1]) - 1/4;
      } else if(which_lambda_max == num_lambda + 1) {
        log_lambda_max = log(lambda_seq[num_lambda]) + 1/4;
      } else {
        log_lambda_max = log(lambda_seq[which_lambda_max]);
      }
      lambda_seq = exp(seq(log_lambda_max + log(lambda_min_ratio) + penalty_pow * log(tau_actual), log_lambda_max, length = num_lambda_requested));
      identifying_max_lambda = FALSE;
      stable_reps = stable_reps_actual;
      min_reps = min_reps_actual;
      tau = tau_actual;
      store_params = vector("list",num_inits * ncol(fixed));
      rm(log_lambda_max, which_lambda_max, stable_reps_actual, min_reps_actual, num_lambda_requested);
    }
  }
  #If any of the prepopulated results weren't filled in because 'consider_truncating_lambda_seq == TRUE', then drop these
  if(any(colSums(store_neg_loglikelihood == Inf) == nrow(store_neg_loglikelihood))) {
    keep_columns = which(colSums(store_neg_loglikelihood == Inf) < nrow(store_neg_loglikelihood));
    lambda_seq = lambda_seq[keep_columns, drop = FALSE];
    store_bic = store_bic[, keep_columns, drop = FALSE];
    store_aic = store_aic[, keep_columns, drop = FALSE];
    store_neg_loglikelihood = store_neg_loglikelihood[, keep_columns, drop = FALSE];
    store_num_eff_params = store_num_eff_params[, keep_columns, drop = FALSE];
    store_num_actual_params = store_num_actual_params[, keep_columns, drop = FALSE];
    monitor_joint_shifts = monitor_joint_shifts[, keep_columns, drop = FALSE];
    hit_max_reps = hit_max_reps[, keep_columns, drop = FALSE];
    actual_reps = actual_reps[, keep_columns, drop = FALSE];
    for(j in 1:length(store_params)) {
      store_params[[j]] = store_params[[j]][, keep_columns, drop = FALSE];
    }
  }
  
  colnames(store_aic) = 
    colnames(store_bic) = 
    colnames(monitor_joint_shifts) = 
    colnames(store_neg_loglikelihood) = 
    colnames(store_num_eff_params) = 
    colnames(hit_max_reps) = 
    colnames(actual_reps) = 
    formatC(lambda_seq, format = "f", digits = sig_digits);
  
  
  best_neg_loglikelihood = rbind(apply(store_neg_loglikelihood,1,min), 
                                 lambda_seq[apply(store_neg_loglikelihood,1,which.min)],
                                 store_num_eff_params[cbind(1:nrow(store_num_eff_params),apply(store_neg_loglikelihood,1,which.min))],
                                 store_num_actual_params[cbind(1:nrow(store_num_actual_params),apply(store_neg_loglikelihood,1,which.min))])
  best_bic = rbind(apply(store_bic,1,min), 
                   store_neg_loglikelihood[cbind(1:nrow(store_num_eff_params),apply(store_bic,1,which.min))],
                   lambda_seq[apply(store_bic,1,which.min)],
                   store_num_eff_params[cbind(1:nrow(store_num_eff_params),apply(store_bic,1,which.min))],
                   store_num_actual_params[cbind(1:nrow(store_num_actual_params),apply(store_bic,1,which.min))]);
  best_aic = rbind(apply(store_aic,1,min), 
                   store_neg_loglikelihood[cbind(1:nrow(store_num_eff_params),apply(store_aic,1,which.min))],
                   lambda_seq[apply(store_aic,1,which.min)],
                   store_num_eff_params[cbind(1:nrow(store_num_eff_params),apply(store_aic,1,which.min))],
                   store_num_actual_params[cbind(1:nrow(store_num_actual_params),apply(store_aic,1,which.min))]);
  rownames(best_neg_loglikelihood) = 
    c("neg_loglikelihood", "lambda","num_eff_params", "num_actual_params");
  rownames(best_bic) = 
    c("bic","neg_loglikelihood", "lambda","num_eff_params", "num_actual_params");
  rownames(best_aic) =
    c("aic", "neg_loglikelihood","lambda","num_eff_params", "num_actual_params");
  
  if(num_lambda > 1 && (any(apply(store_bic,1,which.min) == 1) || any(apply(store_aic,1,which.min) == 1))) {
    cat("The smallest AIC and/or BIC occurred at the minimum value of lambda; if this is unexpected, consider decreasing the minimum value of lambda\n");
  }
  return(list(control = list(conv_tol = conv_tol,
                             tau = tau, 
                             penalty_pow = penalty_pow,
                             lambda_seq = lambda_seq, 
                             consider_truncating_lambda_seq = consider_truncating_lambda_seq,
                             proposal_seq = proposal_seq, 
                             stable_reps = stable_reps, 
                             min_reps = min_reps, 
                             max_reps_keq1 = max_reps_keq1, 
                             max_reps_kgt1 = max_reps_kgt1,
                             random_seed = random_seed,
                             delta_lb = delta_lb,
                             init_params = init_params, 
                             fixed = fixed),
              monitor_joint_shifts = monitor_joint_shifts,
              hit_max_reps = hit_max_reps, 
              actual_reps = actual_reps, 
              num_eff_params = store_num_eff_params, 
              num_actual_params = store_num_actual_params, 
              all_neg_loglikelihood = store_neg_loglikelihood,
              all_bic = store_bic, 
              all_aic = store_aic, 
              all_params = store_params, 
              best_fit_neg_loglikelihood = best_fit_neg_loglikelihood,
              best_neg_loglikelihood = best_neg_loglikelihood, 
              best_fit_bic = best_fit_bic, 
              best_bic = best_bic, 
              best_fit_aic = best_fit_aic, 
              best_aic = best_aic, 
              runtime = difftime(Sys.time(), begin, units = "secs")));
}


# function name and purpose: 'bpl_probs' calculates the model-based probabilities of based upon the provided parameter values and the
# given stage. Useful for simulating from fitted models. 
#
# author: Phil Boonstra (philb@umich.edu)
#
# date: 3/5/2019, 10:00am EST
#
# Function inputs:
#
### param: (vector with 'names' attribute, numeric) the parameter vector to evaluate with respect to the data. Must be named exactly 
# as follows: '0' (optional, if the stopping process is to be modeled), then a set of item labels, then 'log_delta1', then 'log_delta2'. 
# In contrast to other functions, the item labels do not have to be complete nor consecutive. You would leave out '7', for example, if you
# are simulating from stage 2 and item '7' was put first at the first stage, so that you want the stage 2 probabilities conditional upon
# stage 1 having ranked item 7. 
### stage: (positive integer) what stage should the probabilities be simulated at? Remember that the dampening function implies the 
# same parameter values will give different probabilities at different stages. 
#
### Value
#
# A named vector of probabilities with length equal to length(param) - 2 containing one probability for each element of param except for
# 'log_delta1' and 'log_delta2'. The sum of the vector equals 1, and these are the model-estimated probabilities of choosing the corresponding
# item (including item 0) at the next stage.

bpl_probs = function(param, stage = 1) {
  param_names = names(param);
  item_names = setdiff(param_names, c("0","log_delta1","log_delta2"));
  fatigue_weight = ifelse("0"%in%param_names && stage > 1, exp(param["0"]), 0);
  fitted_probs = numeric(length(param) - 2);
  names(fitted_probs) = setdiff(param_names, c("log_delta1","log_delta2"))
  dampening = exp(param["log_delta2"] + (stage - 1) * param["log_delta1"]) + 
    (1-exp(param["log_delta2"]))^(2 * stage - 1) ;
  fitted_probs[item_names] = exp(dampening * param[item_names]) / (fatigue_weight + sum(exp(dampening * param[item_names])))
  if("0"%in%param_names) {fitted_probs["0"] = 1 - sum(fitted_probs) + .Machine$double.eps;}
  return(fitted_probs);
}

ordered_barplot = function(data_ranked, 
                           col_palette_range, 
                           file_path = "foo.pdf", 
                           title_text = "",
                           skip_lonely = T) {
  require(RColorBrewer);
  if(skip_lonely && any(colSums(!is.na(data_ranked)) == 1)) {
    data_ranked_lonely = data_ranked[,which(colSums(!is.na(data_ranked)) == 1)];
    lonely_column_order_to_plot = order(-as.numeric(colSums(!is.na(data_ranked_lonely))),as.numeric(apply(data_ranked_lonely,2,mean,na.rm=T)));
    data_ranked = data_ranked[,which(colSums(!is.na(data_ranked)) > 1)];
  } else {
    data_ranked_lonely = data.frame(NULL);
  }
  
  column_order_to_plot = order(-as.numeric(colSums(!is.na(data_ranked))),as.numeric(apply(data_ranked,2,mean,na.rm=T)));
  
  xmax = max(max(colSums(!is.na(data_ranked))), sum(!is.na(data_ranked_lonely)));
  ymax = ncol(data_ranked) + as.logical(ncol(data_ranked_lonely));
  
  col_func = colorRampPalette(col_palette_range);
  plot_colors = col_func(5);
  plot_pch = c(15, 16, 17, 18, 15);
  plot_cex = c(1.15, 1.25, 1.05, 1.10, 0.85);
  
  pdf(file = file_path, 
      width = 8, 
      height = 1.4 + 2.6 * (ymax/28), 
      #units = "in",
      #res = 400,
      family ="serif"
  );
  par(mar=c(3.5,1.5 + 8 * (max(nchar(colnames(data_ranked)[column_order_to_plot]))/24),1.0,0.6), 
      oma = c(0, 0, 1, 0));
  plot.new();
  plot.window(xlim = c(1.25,xmax-0.5), ylim = c(1,ymax+0.1));
  
  rect(par("usr")[1],par("usr")[3],par("usr")[2],par("usr")[4],col = "#99999950",border=NA);
  if(ncol(data_ranked_lonely)) {
    axis(2,at=ymax:1,labels=c(colnames(data_ranked)[column_order_to_plot],"(PROBLEMS LISTED ONCE)"),lwd=1, las = 1, cex.axis = 0.75,hadj =0.96)
  } else {
    axis(2,at=ymax:1,labels=colnames(data_ranked)[column_order_to_plot],lwd=1, las = 1, cex.axis = 0.75,hadj =0.96)
  }
  
  mtext(side = 1,"Count",line=2.5, cex = 1.2);
  abline(v = seq(2, xmax, by = 4), col = "#FFFFFF");
  abline(v = seq(4, xmax, by = 4), col = "#FFFFFF", lty = "44");
  axis(1,las = 1,at = seq(2,xmax,by = 4),cex.axis = 1.2);
  axis(3,las = 1,at = seq(2,xmax,by = 4),labels = NA, cex.axis = 1.2);
  y = ymax + 1;
  for(prob in column_order_to_plot) {
    y = y - 1;
    index = data_ranked %>% pull(prob);
    index[which(index>4)] = 5;
    cumsum_width = 0;
    for(k in unique(sort(index))) {
      width = sum(index == k, na.rm = T);
      points(x = cumsum_width + (1:width),
             y = rep(y,width),
             cex = plot_cex[k],
             pch = plot_pch[k],
             col = plot_colors[k]);
      cumsum_width = cumsum_width + max(width);
    }
  }
  if(ncol(data_ranked_lonely)) {
    y = y - 1;
    cumsum_width = 0;
    for(prob in lonely_column_order_to_plot) {
      index = data_ranked_lonely %>% pull(prob);
      index[which(index>4)] = 5;
      for(k in unique(sort(index))) {
        width = sum(index == k, na.rm = T);
        points(x = cumsum_width + (1:width),
               y = rep(y,width),
               cex = plot_cex[k],
               pch = plot_pch[k],
               col = plot_colors[k]);
        cumsum_width = cumsum_width + max(width);
      }
    }
  }
  legend(x = xmax - 0.5,y = 9,xjust = 1, legend=c("1", "2", "3", "4", ">4"),title=expression(underline(Rank)),ncol=5,pt.cex = max(plot_cex),pch = plot_pch,col=plot_colors,bg="#BBBBBB");
  title(main = title_text, outer = T, adj = 0);
  dev.off();
}


matrix_to_list = function(x) {
  
  lapply(seq_len(nrow(x)), function(i) x[i,1:sum(!is.na(x[i,]))]) 
  
}
umich-biostatistics/rankmodelr documentation built on Nov. 6, 2019, 12:15 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
umich-biostatistics/rankmodelr
Penalized multistage models for ordered data

R/functions_bpl.R
In umich-biostatistics/rankmodelr: Penalized multistage models for ordered data

Defines functions BPL_neg_loglik BPL_neg_loglik_CA over_identified_cost seamless_L0 penRank_path bpl_probs ordered_barplot matrix_to_list

R Package Documentation

Browse R Packages

We want your feedback!

umich-biostatistics/rankmodelr Penalized multistage models for ordered data

R/functions_bpl.R In umich-biostatistics/rankmodelr: Penalized multistage models for ordered data

Defines functions BPL_neg_loglik BPL_neg_loglik_CA over_identified_cost seamless_L0 penRank_path bpl_probs ordered_barplot matrix_to_list

R Package Documentation

Browse R Packages

We want your feedback!

umich-biostatistics/rankmodelr
Penalized multistage models for ordered data

R/functions_bpl.R
In umich-biostatistics/rankmodelr: Penalized multistage models for ordered data