R/functions_estimation_engine.R

Defines functions reduceStatisticsList modifyStatisticsList getMultinomialProbabilities getMultinomialInformationMatrixM getMultinomialInformationMatrix getLikelihoodMM getIterationStepState getInformationMatrixREM getFirstDerivativeREM getFirstDerivativeMM getFirstDerivativeM getEventValues estimate_int

##################### ##
#
# Goldfish package
# Internal estimation routine
#
#################### ###

# Estimation
estimate_int <- function(
    statsList,
    nodes, nodes2,
    defaultNetworkName,
    modelType = c(
      "DyNAM-MM", "DyNAM-M", "REM-ordered",
      "DyNAM-M-Rate", "REM", "DyNAM-M-Rate-ordered"
    ),
    initialParameters = NULL,
    fixedParameters = NULL,
    excludeParameters = NULL,
    initialDamping = 1,
    maxIterations = 20,
    dampingIncreaseFactor = 2,
    dampingDecreaseFactor = 3,
    maxScoreStopCriterion = 0.001,
    # additional return objects
    returnEventProbabilities = FALSE,
    # additional parameter for DyNAM-MM
    allowReflexive = FALSE,
    isTwoMode = FALSE,
    # additional parameter for DyNAM-M-Rate
    hasIntercept = FALSE,
    returnIntervalLogL = FALSE,
    parallelize = FALSE,
    cpus = 6,
    verbose = FALSE,
    progress = FALSE,
    impute = TRUE,
    ignoreRepParameter,
    # restrictions of opportunity sets
    opportunitiesList = NULL,
    prepEnvir = new.env()) {
  ## SET VARIABLES

  minDampingFactor <- initialDamping
  # CHANGED MARION
  # nParams: number of effects + 1 (if has intercept)
  nParams <- dim(statsList$initialStats)[3] - length(excludeParameters) +
    hasIntercept
  #
  parameters <- initialParameters
  if (is.null(initialParameters)) parameters <- numeric(nParams)
  # deal with fixedParameters
  idUnfixedCompnents <- seq_len(nParams)
  idFixedCompnents <- NULL
  likelihoodOnly <- FALSE
  if (!is.null(fixedParameters)) {
    if (length(fixedParameters) != nParams) {
      stop(
        "The length of fixedParameters is inconsistent with",
        "the number of the parameters.",
        "\n\tLength ", dQuote("fixedParameters"), " vector:",
        length(fixedParameters), "\n\tNumber of parameters:", nParams,
        call. = FALSE
      )
    }

    if (all(!is.na(fixedParameters))) likelihoodOnly <- TRUE
    parameters[!is.na(fixedParameters)] <-
      fixedParameters[!is.na(fixedParameters)]
    idUnfixedCompnents <- which(is.na(fixedParameters))
    idFixedCompnents <- which(!is.na(fixedParameters))
  }

  modelType <- match.arg(modelType)

  ## PARAMETER CHECKS

  if (length(parameters) != nParams) {
    stop(
      " Wrong number of initial parameters passed to function.",
      "\n\tLength ", dQuote("parameters"), " vector:",
      length(parameters), "\n\tNumber of parameters:", nParams,
      call. = FALSE
    )
  }

  if (!(length(minDampingFactor) %in% c(1, nParams))) {
    stop(
      "minDampingFactor has wrong length:",
      "\n\tLength ", dQuote("minDampingFactor"), " vector:",
      length(minDampingFactor), "\n\tNumber of parameters:", nParams,
      "\nIt should be length 1 or same as number of parameters.",
      call. = FALSE
    )
  }

  if (dampingIncreaseFactor < 1 || dampingDecreaseFactor < 1) {
    stop(
      "Damping increase / decrease factors cannot be smaller than one.",
      call. = FALSE
    )
  }

  ## REDUCE STATISTICS LIST

  if (verbose) cat("Reducing data\n")

  # CHANGED MARION: add colOnly and rowOnly in a smart way for the estimation
  reduceMatrixToVector <- FALSE
  reduceArrayToMatrix <- FALSE
  if (modelType %in% c("DyNAM-M-Rate", "DyNAM-M-Rate-ordered")) {
    reduceMatrixToVector <- TRUE
  } else if (modelType == "DyNAM-M") {
    reduceArrayToMatrix <- TRUE
  }

  # CHANGED MARION: updated function
  # for rate model with intercept,
  # add a table of all 1 to the statsList$initStats
  statsList <- modifyStatisticsList(
    statsList = statsList,
    modelType = modelType,
    reduceMatrixToVector = reduceMatrixToVector,
    reduceArrayToMatrix = reduceArrayToMatrix,
    excludeParameters = excludeParameters,
    addInterceptEffect = hasIntercept
  )

  # CHANGED MARION: handle composition changes for
  # counting average number of actors
  # and remove absent actors for each estimation step

  ## GET COMPOSITION CHANGES
  compChangeName1 <- attr(nodes, "events")[
    "present" == attr(nodes, "dynamicAttribute")
  ]
  hasCompChange1 <- !is.null(compChangeName1) && length(compChangeName1) > 0

  compChangeName2 <- attr(nodes2, "events")[
    "present" == attr(nodes2, "dynamicAttribute")
  ]
  hasCompChange2 <- !is.null(compChangeName2) && length(compChangeName2) > 0 &&
    !modelType %in% c("DyNAM-M-Rate", "DyNAM-M-Rate-ordered")

  if (hasCompChange1) {
    compChange1 <- get(compChangeName1, envir = prepEnvir) # add prepEnvir
    compChange1 <- sanitizeEvents(compChange1, nodes, envir = prepEnvir)
  } else {
    compChange1 <- NULL
  }

  if (hasCompChange2) {
    compChange2 <- get(compChangeName2, envir = prepEnvir) # add prepEnvir
    compChange2 <- sanitizeEvents(compChange2, nodes2, envir = prepEnvir)
  } else {
    compChange2 <- NULL
  }

  if (!is.null(nodes$present)) {
    presence <- nodes$present
  } else {
    presence <- rep(TRUE, length(nodes))
  }
  if (!is.null(nodes2$present)) {
    presence2 <- nodes2$present
  } else {
    presence2 <- rep(TRUE, length(nodes2))
  }

  nEvents <- length(statsList$orderEvents)

  nEvents <- length(statsList$orderEvents)

  ## ADD INTERCEPT
  # CHANGED MARION
  # replace first parameter with an initial estimate of the intercept
  if (modelType %in% c("REM", "DyNAM-M-Rate") && hasIntercept &&
    is.null(initialParameters) &&
    (is.null(fixedParameters) || is.na(fixedParameters[1]))) {
    totalTime <- sum(unlist(statsList$intervals), na.rm = TRUE) +
      sum(unlist(statsList$rightCensoredIntervals), na.rm = TRUE)

    nActors <- sum(presence)

    if (hasCompChange1) {
      # CHANGED MARION: remove the use of the events object
      time <- statsList$startTime
      previoustime <- -Inf
      currentInterval <- 1
      currentRCInterval <- 1
      nAvgActors <- 0

      for (i in seq_len(nEvents)) {
        if (statsList$orderEvents[[i]] == 1) {
          time <- time + statsList$intervals[[currentInterval]]
          currentInterval <- currentInterval + 1
        } else {
          time <- time + statsList$rightCensoredIntervals[[currentRCInterval]]
          currentRCInterval <- currentRCInterval + 1
        }

        changesAtTime <- compChange1$replace[
          intersect(
            which(compChange1$time > previoustime),
            which(compChange1$time <= time)
          )
        ]

        # add new present actors and substract non-present
        nActors <- nActors + sum(changesAtTime) - sum(!changesAtTime)
        nAvgActors <- nAvgActors + nActors
        previoustime <- time
      }
      nAvgActors <- nAvgActors / nEvents
    } else {
      nAvgActors <- nActors
    }

    # log crude rate event, estimate when not covariates
    initialInterceptEstimate <- log(nEvents / totalTime / nAvgActors)
    parameters[1] <- initialInterceptEstimate
  }
  ## SET VARIABLES BASED ON STATSLIST

  ## ESTIMATION: INITIALIZATION

  if (verbose) cat("Estimating model type: ", modelType)

  iIteration <- 1
  informationMatrix <- matrix(0, nParams, nParams)
  score <- rep(0, nParams)
  logLikelihood <- 0
  isConverged <- FALSE
  isInitialEstimation <- TRUE
  logLikelihood.old <- -Inf
  parameters.old <- initialParameters
  score.old <- NULL
  informationMatrix.old <- NULL

  # if (parallelize && require("snowfall", quietly = TRUE)) {
  #   snowfall::sfStop()
  #   snowfall::sfInit(parallel = TRUE, cpus = cpus)
  #   snowfall::sfExport(
  #   "getMultinomialProbabilities", "getLikelihoodMM", "getFirstDerivativeMM",
  #   "getMultinomialInformationMatrix", namespace = "goldfish")
  # }

  ## ESTIMATION: ITERATIONS
  while (TRUE) {
    # MARION to be make: update this function for the new stat list
    # calculate logL, score and information; pass parallel computing parameters
    res <- getIterationStepState(
      statsList = statsList,
      nodes = nodes,
      nodes2 = nodes2,
      defaultNetworkName = defaultNetworkName,
      updatepresence = hasCompChange1,
      presence = presence,
      compChange1 = compChange1,
      updatepresence2 = hasCompChange2,
      presence2 = presence2,
      compChange2 = compChange2,
      hasIntercept = hasIntercept,
      parameters = parameters,
      parallelize = parallelize,
      cpus = cpus,
      modelType = modelType, # model type case distinction
      returnIntervalLogL = returnIntervalLogL,
      returnEventProbabilities = returnEventProbabilities,
      allowReflexive = allowReflexive,
      isTwoMode = isTwoMode,
      reduceMatrixToVector = reduceMatrixToVector,
      reduceArrayToMatrix = reduceArrayToMatrix,
      ignoreRepParameter = ignoreRepParameter,
      impute = impute,
      verbose = verbose,
      opportunitiesList = opportunitiesList,
      prepEnvir = prepEnvir
    )

    logLikelihood <- res[[1]]
    score <- res[[2]]
    informationMatrix <- res[[3]]
    if (returnIntervalLogL) intervalLogL <- res[[4]]
    # add a possibility to return the whole probability matrix: to be make
    if (returnEventProbabilities) {
      eventProbabilities <- if (is.null(res$pMatrix)) {
        paste("not implemented for model type", modelType)
      } else {
        res$pMatrix
      }
    }

    if (isInitialEstimation && any(is.na(unlist(res))) &&
      !all(parameters[-1] == 0)) { # # Check
      stop(
        "Estimation not possible with initial parameters.",
        " Try using zeros instead.",
        call. = FALSE
      )
    }

    # If we only want the likelihood break here
    if (likelihoodOnly) {
      inverseInformationUnfixed <- matrix(0, nParams, nParams)
      score <- rep(0, nParams)
      isConverged <- TRUE
      break
    }

    # we don't consider the fixed components of the score.
    # It's for the fixing parameter feature. \
    score[idFixedCompnents] <- 0

    if (!verbose && progress) {
      cat(
        "\rMax score: ",
        round(
          max(abs(score)),
          round(-logb(maxScoreStopCriterion / 1, 10)) + 1
        ),
        " (", iIteration, ").        "
      )
    }

    if (verbose) {
      cat(
        "\n\nLikelihood:", logLikelihood, "in iteration", iIteration,
        "\nParameters:", toString(parameters),
        "\nScore:", toString(score)
      )
      # print(informationMatrix)
    }

    if (logLikelihood <= logLikelihood.old || any(is.na(unlist(res)))) {
      if (verbose) {
        cat(
          "\nNo improvement in estimation.",
          " Resetting values and adjusting damping."
        )
      }
      # reset values
      logLikelihood <- logLikelihood.old
      parameters <- parameters.old
      score <- score.old
      informationMatrix <- informationMatrix.old
      minDampingFactor <- minDampingFactor * dampingIncreaseFactor
    } else {
      logLikelihood.old <- logLikelihood
      parameters.old <- parameters
      score.old <- score
      informationMatrix.old <- informationMatrix
      minDampingFactor <- max(
        1,
        minDampingFactor / ifelse(isInitialEstimation, 1, dampingDecreaseFactor)
      )
    }

    # end of initial estimation
    isInitialEstimation <- FALSE

    # Calculate the UPDATE distance taking into account the DAMPING
    dampingFactor <- minDampingFactor

    # INVERT information matrix
    # We only invert the unfixed part of the parameter.
    # The fixed components of the score have already be set to be 0.
    # It's for the fixing parameter feature.
    informationMatrixUnfixed <-
      informationMatrix[idUnfixedCompnents, idUnfixedCompnents]
    inverseInformationUnfixed <- try(
      solve(informationMatrixUnfixed),
      silent = TRUE
    )
    if (inherits(inverseInformationUnfixed, "try-error")) {
      stop(
        "Matrix cannot be inverted;",
        " probably due to collinearity between parameters."
      )
    }

    update <- rep(0, nParams)
    update[idUnfixedCompnents] <-
      (inverseInformationUnfixed %*% score[idUnfixedCompnents]) / dampingFactor

    if (verbose) {
      cat(
        "\nUpdate: ", toString(update),
        "\nDamping factor: ", toString(dampingFactor)
      )
    }

    # check for stop criteria
    if (max(abs(score)) <= maxScoreStopCriterion) {
      isConverged <- TRUE
      if (progress) {
        cat(
          "\nStopping as maximum absolute score is below ",
          maxScoreStopCriterion, ".\n",
          sep = ""
        )
      }

      break
    }
    if (iIteration > maxIterations) {
      if (progress) {
        cat(
          "\nStopping as maximum of ",
          maxIterations,
          " iterations have been reached. No convergence.\n"
        )
      }
      break
    }

    parameters <- parameters + update

    iIteration <- iIteration + 1
  } # end of while

  ## ESTIMATION: END

  # if (parallelize && require("snowfall", quietly = TRUE)) {
  #   snowfall::sfStop()
  # }

  # calculate standard errors
  # the variance for the fixed compenents should be 0
  stdErrors <- rep(0, nParams)
  stdErrors[idUnfixedCompnents] <- sqrt(diag(inverseInformationUnfixed))

  # define, type and return result
  estimationResult <- list(
    parameters = parameters,
    standardErrors = stdErrors,
    logLikelihood = logLikelihood,
    finalScore = score,
    finalInformationMatrix = informationMatrix,
    convergence = list(
      isConverged = isConverged, maxAbsScore = max(abs(score))
    ),
    nIterations = iIteration,
    nEvents = nEvents
  )
  if (returnIntervalLogL) estimationResult$intervalLogL <- intervalLogL
  if (returnEventProbabilities) {
    estimationResult$eventProbabilities <- eventProbabilities
  }
  attr(estimationResult, "class") <- "result.goldfish"
  estimationResult
}


# Function to return log likelihood, score, information matrix,
# and p vector for each event
# CHANGED SIWEI: add three parameters: isRightCensored, timespan and
#  allowReflexive
getEventValues <- function(
    statsArray,
    activeDyad,
    parameters,
    modelType,
    isRightCensored,
    timespan,
    allowReflexive,
    isTwoMode) {
  if (modelType == "DyNAM-MM") {
    multinomialProbabilities <-
      getMultinomialProbabilities(
        statsArray, activeDyad, parameters,
        allowReflexive = allowReflexive
      )
    eventLikelihoods <- getLikelihoodMM(multinomialProbabilities)
    logLikelihood <- log(eventLikelihoods[activeDyad[1], activeDyad[2]])
    firstDerivatives <- getFirstDerivativeMM(
      statsArray, eventLikelihoods, multinomialProbabilities
    )
    score <- firstDerivatives[activeDyad[1], activeDyad[2], ]
    informationMatrix <- getMultinomialInformationMatrix(
      eventLikelihoods, firstDerivatives
    )
    pMatrix <- eventLikelihoods
  }

  if (modelType == "DyNAM-M") {
    eventProbabilities <-
      getMultinomialProbabilities(
        statsArray, activeDyad, parameters,
        actorNested = TRUE, allowReflexive = allowReflexive,
        isTwoMode = isTwoMode
      )
    logLikelihood <- log(eventProbabilities[activeDyad[2]])
    firstDerivatives <- getFirstDerivativeM(statsArray, eventProbabilities)
    score <- firstDerivatives[activeDyad[2], ]
    informationMatrix <- getMultinomialInformationMatrixM(
      eventProbabilities, firstDerivatives
    )
    pMatrix <- eventProbabilities
  }

  if (modelType == "REM-ordered") {
    eventProbabilities <-
      getMultinomialProbabilities(
        statsArray, activeDyad, parameters,
        actorNested = FALSE, allowReflexive = FALSE
      )
    logLikelihood <- log(eventProbabilities[activeDyad[1], activeDyad[2]])
    firstDerivatives <- getFirstDerivativeREM(statsArray, eventProbabilities)
    score <- firstDerivatives[activeDyad[1], activeDyad[2], ]
    informationMatrix <- getInformationMatrixREM(
      eventProbabilities, firstDerivatives
    )
    pMatrix <- eventProbabilities
  }

  if (modelType == "DyNAM-M-Rate-ordered") {
    # statsMatrix <- reduceArrayToMatrix(statsArray)
    statsMatrix <- statsArray
    activeActor <- activeDyad[1]
    parameters <- c(parameters)

    rates <- exp(rowSums(t(t(statsMatrix) * parameters)))
    eventProbabilities <- rates / sum(rates)
    expectedStatistics <- colSums(statsMatrix * eventProbabilities)
    # statsMatrix[activeActor, ] * parameters: rate for actor i (i=activeActor)
    logLikelihood <- sum(statsMatrix[activeActor, ] * parameters) -
      log(sum(rates))
    # deviation from actual statistics
    deviations <- t(t(statsMatrix) - expectedStatistics)
    score <- deviations[activeActor, ]
    # Fisher information matrix
    informationMatrix <- matrix(
      rowSums(t(
        t(matrix(
          apply(deviations, 1, function(x) outer(x, x)),
          ncol = length(eventProbabilities)
        ))
        * eventProbabilities
      )),
      length(parameters), length(parameters)
    )
    pMatrix <- eventProbabilities
  }

  if (modelType %in% c("DyNAM-M-Rate", "REM")) {
    activeActor <- activeDyad[1]
    dimMatrix <- dim(statsArray)
    if (modelType == "REM") {
      activeActor <- activeDyad[1] + (activeDyad[2] - 1) * dimMatrix[1]
      statsArray <- apply(statsArray, 3, c)
    }

    parameters <- as.numeric(parameters)

    # test if time interval is NA, to be make
    if (is.na(timespan)) timespan <- 0

    # Don't consider self-connecting edge when both allowReflexive
    # and  isTwoMode are false
    dontConsiderSelfConnecting <- (modelType == "REM") && !allowReflexive &&
      !isTwoMode
    if (dontConsiderSelfConnecting) {
      idEdgeNotConsidered <- (seq_len(dimMatrix[1]) - 1) * dimMatrix[1] +
        seq_len(dimMatrix[1])
    } else {
      idEdgeNotConsidered <- numeric(0)
    }
    # vector of rates
    objectiveFunctions <- (statsArray %*% parameters)[, 1] # a vector
    objectiveFunctionOfSender <- objectiveFunctions[activeActor]
    statsOfSender <- statsArray[activeActor, ]
    rates <- exp(objectiveFunctions)
    rates[idEdgeNotConsidered] <- 0
    ratesSum <- sum(rates)
    # k vector with all rho * s_k summed over all actors i
    ratesStats <- rates * statsArray
    ratesStatsSum <- colSums(rates * statsArray)

    ratesStatsStatsSum <- colSums(t(
      apply(statsArray, 1, function(x) outer(x, x))
    ) *
      rates)
    if (length(parameters) == 1 && modelType == "DyNAM-M-Rate") {
      v <- as.vector(statsArray)
      sum <- 0
      for (i in seq_along(v)) {
        sum <- sum + v[i] * v[i] * rates[i]
      }
      ratesStatsStatsSum <- sum
    }
    dim(ratesStatsStatsSum) <- rep(length(parameters), 2)

    logL <- -timespan * ratesSum +
      if (!isRightCensored) objectiveFunctionOfSender else 0

    score <- -timespan * ratesStatsSum +
      if (!isRightCensored) statsOfSender else 0

    hessian <- -timespan * ratesStatsStatsSum
    pVector <- objectiveFunctions + (-timespan * ratesSum)
    if (modelType == "REM") dim(pVector) <- c(dimMatrix[1], dimMatrix[2])

    # cat("\ntimespan:", timespan)
    # cat("\nderivative:")
    # print(ratesStatsSum)
    # cat("\nfisher:")
    # print(ratesStatsStatsSum)
    return(list(
      logLikelihood = logL,
      score = score,
      informationMatrix = -hessian,
      pMatrix = pVector
    ))
  }

  return(list(
    logLikelihood = logLikelihood,
    score = score,
    informationMatrix = informationMatrix,
    pMatrix = pMatrix
  ))
}


# calculate the score contribution of one event of the M model
# to the log(!) likelihood
# The scores are the differences between expected and observed statistics
getFirstDerivativeM <- function(statsArray, eventProbabilities) {
  nParams <- dim(statsArray)[2]
  nActors <- dim(statsArray)[1]

  expectedStatistics <- colSums(statsArray * eventProbabilities)
  firstDerivatives <- sweep(statsArray, MARGIN = 2, expectedStatistics, "-")

  firstDerivatives
}


# Function to calculate the array of first derivatives of the log likelihood
# 1) get the likelihoods for each i-j combination
# 2) calculate the ratios of
#     multP_{ij}' / multP_{ij} = s_{ij1} - \sum_{h} multP_{ih} s_{ih1}
#     for the derivative of the first parameter (deviation from expectation)
#     in the multinomial part
# 3) calculate the constant that is substracted from each first derivative
# 4) calculate the derivative based on the values above (see paper)
getFirstDerivativeMM <- function(
    statsArray,
    likelihoods,
    multinomialProbabilities) {
  nParams <- dim(statsArray)[3]
  nActors <- dim(statsArray)[1]
  matrixSize <- nActors * nActors
  # 2)
  # multiply each "parameter slice" (third dimension)
  # with the multinomial probability matrix
  weightedValues <- statsArray * rep(multinomialProbabilities, nParams)
  expectedStatistics <- apply(weightedValues, 3, rowSums)
  # deviation from expected statistic
  deviationFromExpectation <- statsArray -
    c(apply(expectedStatistics, 2, rep, nActors))

  # 3)
  # symmetrize the deviations from expectations
  symDeviations <- deviationFromExpectation +
    aperm(deviationFromExpectation, c(2, 1, 3))
  # the likelihoods have to divided by 2
  # as the matrix sum is 2 (it includes all likelihoods twice)
  constantWeightedDeviations <-
    apply(symDeviations * c(likelihoods / 2), 3, sum)

  # 4)
  derivatives <- symDeviations -
    rep(constantWeightedDeviations, each = matrixSize)

  derivatives
}

# calculate the score contribution of one event of the M model
# to the log(!) likelihood
# The scores are the differences between expected and observed statistics
getFirstDerivativeREM <- function(statsArray, eventProbabilities) {
  # nActors <- dim(statsArray)[1]
  # nParams <- dim(statsArray)[3]

  expectedStatistics <- apply(
    apply(statsArray, 3, function(m) m * eventProbabilities),
    2, sum
  )
  firstDerivatives <- sweep(statsArray, MARGIN = 3, expectedStatistics, "-")

  firstDerivatives
}


getInformationMatrixREM <- function(eventProbabilities, firstDerivatives) {
  nParams <- dim(firstDerivatives)[3]
  # nActors <- dim(firstDerivatives)[1]

  # all indexes: 1-1, 1-2, ..., nParams-nParams
  indexes <- expand.grid(seq_len(nParams), seq_len(nParams))
  # indexes <- indexes[indexes[, 1] <= indexes[, 2], ]

  values <- colSums(apply(
    indexes, 1,
    \(ind) {
      firstDerivatives[, , ind[1]] * firstDerivatives[, , ind[2]] *
        eventProbabilities
    }
  ))
  information <- matrix(values, nParams, nParams)
  # symmetrize
  # information[lower.tri(information)] <- information[upper.tri(information)]

  information
}


# Function to return log likelihood, score and information matrix
# for all events, given a set of parameters
# for the MM, M, REM, and M-Rate function, and REM-ordered
getIterationStepState <- function(
    statsList,
    nodes,
    nodes2,
    defaultNetworkName,
    updatepresence,
    presence,
    compChange1,
    updatepresence2,
    presence2,
    compChange2,
    hasIntercept,
    parameters,
    modelType,
    parallelize = FALSE, cpus = 4,
    returnIntervalLogL = FALSE,
    returnEventProbabilities = FALSE,
    allowReflexive = TRUE,
    isTwoMode = FALSE,
    reduceMatrixToVector = FALSE,
    reduceArrayToMatrix = FALSE,
    ignoreRepParameter = NULL,
    impute = TRUE,
    verbose = FALSE,
    opportunitiesList = NULL,
    prepEnvir = new.env()) {
  # CHANGED MARION: changed dims
  nEvents <- length(statsList$orderEvents)
  nParams <- dim(statsList$initialStats)[3]

  # iterate over all events
  informationMatrix <- matrix(0, nParams, nParams) # n_effects * n_effects
  score <- rep(0, nParams)
  logLikelihood <- 0

  if (returnEventProbabilities) {
    EventProbabilities <- vector(mode = "list", length = nEvents)
  }
  if (returnIntervalLogL) {
    eventLogL <- numeric(nEvents)
  }

  # check for parallelization
  # if (parallelize && require("snowfall", quietly = TRUE)) {
  #   snowfall::sfStop()
  #   snowfall::sfInit(parallel = TRUE, cpus = cpus)
  #   snowfall::sfExport("getEventValues", namespace = "goldfish")
  # }

  # initialize progressbar output
  # showProgressBar <- FALSE
  # progressEndReached <- FALSE
  # if(!silent) {
  #  showProgressBar <- T
  #  dotEvents <- seq(1, nEvents, round(nEvents/min(nEvents, 50)))
  # }

  # CHANGED Marion: fill in the loop! stats array needs to be computed
  # also changes for dependent and rc events!
  statsArray <- statsList$initialStats # 84*84*6
  # CHANGED Marion: remove the use of events object
  time <- statsList$startTime
  # timespan <- ifelse(modelType %in% c("DyNAM-M-Rate", "REM"), NA, 0)
  idep <- 1L
  irc <- 1L

  hasIgnoreRep <- any(ignoreRepParameter)
  if (hasIgnoreRep) {
    net <- get(defaultNetworkName, envir = prepEnvir) # add prepEnvir
    ignoreRepIds <- which(ignoreRepParameter) + hasIntercept
    # with intercept, the first effect is the intercept without
    # ignoreRep option
    startTime <- -Inf
  }

  # opportunities list initialization
  opportunities <- rep(TRUE, nrow(nodes2))
  updateopportunities <- !is.null(opportunitiesList) &&
    !modelType %in% c("DyNAM-M-Rate", "DyNAM-M-Rate-ordered")

  # utility function for the statistics update
  updFun <- function(stat, change) {
    # stat: current statistics (for one effect only)
    # change: statsList$dependentStatsChange[[current event]][[current effect]]
    if (!is.null(change)) {
      stat[cbind(change[, "node1"], change[, "node2"])] <- change[, "replace"]
    }
    return(stat)
  }

  # IMPUTE missing statistics with current mean
  imputeFun <- function(m) {
    m[is.na(m)] <- mean(m, na.rm = TRUE)
    m
  }
  oldTime <- -Inf

  for (i in seq_len(nEvents)) {
    # get current event stats from preprocessing results based on time order
    # CHANGED SIWEI: update statistics, timespan, isDependent and activeDyad
    #  in diff cases with or without intercept
    if (statsList$orderEvents[[i]] == 1) {
      # dependent event
      isDependent <- TRUE
      pars2update <- !vapply(
        statsList$dependentStatsChange[[idep]],
        is.null,
        logical(1)
      )
      for (j in which(pars2update)) {
        statsArray[, , j + hasIntercept] <-
          updFun(
            statsArray[, , j + hasIntercept],
            statsList$dependentStatsChange[[idep]][[j]]
          )
      }
      # with intercept in the model
      if (hasIntercept) {
        time <- time + statsList$intervals[[idep]]
        timespan <- statsList$intervals[[idep]]
      }
      # CHANGED Marion: remove the use of events object
      activeDyad <- c(
        statsList$eventSender[[i]],
        statsList$eventReceiver[[i]]
      )
      idep <- idep + 1
    } else {
      # right-censored
      isDependent <- FALSE
      pars2update <- !vapply(
        statsList$rightCensoredStatsChange[[irc]],
        is.null,
        logical(1)
      )
      for (j in which(pars2update)) {
        statsArray[, , j + hasIntercept] <-
          updFun(
            statsArray[, , j + hasIntercept],
            statsList$rightCensoredStatsChange[[irc]][[j]]
          )
      }
      if (hasIntercept) {
        time <- time + statsList$rightCensoredIntervals[[irc]]
        timespan <- statsList$rightCensoredIntervals[[irc]]
      }

      activeDyad <- NULL
      irc <- irc + 1
    }

    # IMPUTE missing statistics with current mean
    if (impute && anyNA(statsArray)) {
      for (j in which(apply(statsArray, 3, anyNA))) {
        statsArray[, , j] <- imputeFun(statsArray[, , j])
      }
    }

    statsArrayComp <- statsArray
    # Handle the ignoreRep option
    if (hasIgnoreRep) {
      mat <- as.matrix(
        net,
        time = statsList$eventTime[[i]],
        startTime = startTime
      )
      ones <- which(mat > 0, arr.ind = TRUE)
      statsArrayComp[cbind(
        ones[, 1],
        ones[, 2],
        rep(ignoreRepIds, each = length(ignoreRepIds) * nrow(ones))
      )] <- 0
      # CHANGED SIWEI
      startTime <- statsList$eventTime[[i]]
      net[seq_len(dim(net)[1]), seq_len(dim(net)[2])] <- mat
    }

    # update opportunity set
    if (updateopportunities) {
      opportunities <- seq_len(nrow(nodes2)) %in% opportunitiesList[[i]]
    }

    # update composition
    # CHANGED SIWEI: fixed errors for composition change update
    # CHANGED MARION: fixed wrong initialization of compositions,
    #   removed next lines

    current_time <- statsList$eventTime[[i]]
    if (updatepresence) {
      update <-
        compChange1[compChange1$time <= current_time &
          compChange1$time > oldTime, ]
      presence[update$node] <- update$replace
    }

    if (updatepresence2) {
      update2 <-
        compChange2[compChange2$time <= current_time &
          compChange2$time > oldTime, ]
      presence2[update2$node] <- update2$replace
    }
    oldTime <- current_time

    # patch to avoid collision with dropping absent people
    if (!isTwoMode) {
      for (parmPos in seq_len(dim(statsArrayComp)[3])) {
        diag(statsArrayComp[, , parmPos]) <- 0
      }
    }

    # remove potential absent lines and columns from the stats array
    if (updatepresence) { # || (updateopportunities && !isTwoMode)
      keepIn <- presence
      # if (updateopportunities && !isTwoMode)
      #   keepIn <- presence & opportunities
      statsArrayComp <- statsArrayComp[keepIn, , , drop = FALSE]
      if (isDependent) {
        position <- which(activeDyad[1] == which(keepIn))
        if (length(position) == 0) {
          stop("Active node ", activeDyad[1], " not present in event ", i,
            call. = FALSE
          )
        }

        posSender <- activeDyad[1]
        activeDyad[1] <- position
      }
    } else {
      posSender <- activeDyad[1]
    }
    if ((updatepresence2 || updateopportunities)) {
      keepIn <- presence2 & opportunities
      # reducing stats array alters the correspondence between row/col
      # it needs to consider the reflexive case to avoid wrong calculation
      # excludes REM and DyNAM-MM
      if (!allowReflexive && grepl("DyNAM-M(-|$)?", modelType)) {
        if (!isTwoMode) keepIn[posSender] <- FALSE
        allowReflexiveCorrected <- TRUE
      } else {
        allowReflexiveCorrected <- FALSE
      }
      statsArrayComp <- statsArrayComp[, keepIn, , drop = FALSE]
      if (isDependent) {
        position <- which(activeDyad[2] == which(keepIn))
        if (length(position) == 0) {
          stop("Active node ", activeDyad[2], " not available in event ", i,
            call. = FALSE
          )
        }

        activeDyad[2] <- position
      }
    } else {
      allowReflexiveCorrected <- allowReflexive
    }

    # TEMPORARY: handle the reductions here for now
    # CHANGED SIWEI: reduce the matrix to vector for rate model
    # here in each step seperately
    # CHANGED SIWEI: treat one-mode and two-mode cases seperately
    # handle the reductions in one step outside the iteration loop, to be make
    if (reduceMatrixToVector) {
      if (verbose && i == 1) {
        cat("\nReplacing effects statistics by row means")
      }
      # statsArrayComp: n_nodes1*n_nodes2*num_statistics matrix
      arr <- apply(
        statsArrayComp,
        3,
        \(stat) {
          if (!isTwoMode) {
            rowSums(stat, na.rm = TRUE) / (dim(stat)[2] - 1)
          } else {
            rowMeans(stat, na.rm = TRUE)
          }
        }
      )
      statsArrayComp <- arr # statsArrayComp: n_nodes*n_effects matrix
    }
    # reduce array to matrices
    if (reduceArrayToMatrix) {
      oldDim <- dim(statsArrayComp)
      statsArrayComp <- matrix(
        statsArrayComp[activeDyad[1], , ],
        oldDim[2],
        oldDim[3]
      )
    }
    # compute loglikelihood, score, information matrix and
    # pmatrix for the current event
    # CHANGED SIWEI: add three arguments
    #  (isRightCensored, timespan and allowReflexive) to eventValues function
    isRightCensored <- !isDependent
    eventValues <- getEventValues(
      statsArray = statsArrayComp,
      activeDyad = activeDyad,
      parameters = parameters,
      modelType = modelType,
      isRightCensored = isRightCensored,
      timespan = timespan,
      allowReflexive = allowReflexiveCorrected,
      isTwoMode = isTwoMode
    )

    # update return list
    if (returnIntervalLogL) eventLogL[i] <- eventValues$logLikelihood
    if (returnEventProbabilities) EventProbabilities[[i]] <- eventValues$pMatrix

    logLikelihood <- logLikelihood + eventValues$logLikelihood
    score <- score + eventValues$score
    informationMatrix <- informationMatrix + eventValues$informationMatrix

    # update progress bar
    # if (showProgressBar && !progressEndReached) {
    #   if (i %in% dotEvents) {
    #     pos <- which(i == dotEvents)
    #     n <- length(dotEvents)
    #     cat("\r[", rep(".", pos), rep(" ", n - pos), "]", sep = "")
    #   }
    #   if (i == nEvents) {
    #     cat("\n")
    #     progressEndReached <- T
    #   }
    # }
  }

  returnList <- list(
    logLikelihood = logLikelihood,
    score = score,
    informationMatrix = informationMatrix
  )


  # if (parallelize && require("snowfall", quietly = TRUE)) {
  #   snowfall::sfStop()
  # }

  # if(returnIntervalLogL)
  #   returnList$eventLogL <- sapply(
  #     eventValues, function(v) v$logLikelihood, simplify = TRUE)
  if (returnIntervalLogL) returnList$eventLogL <- eventLogL
  # if(returnEventProbabilities)
  #   returnList$pMatrix <- lapply(eventValues, getElement, "pMatrix")
  if (returnEventProbabilities) returnList$pMatrix <- EventProbabilities

  return(returnList)
}

# Function to calculate the log likelihoods for each tie i<->j
getLikelihoodMM <- function(multinomialProbabilities) {
  symP <- multinomialProbabilities * t(multinomialProbabilities)
  diag(symP) <- 0
  denominator <- sum(symP) / 2
  return(symP / denominator)
}
# likelihoods <- symP / denominator


# Calculate the information matrix for multinomial models based on
# the schema in Cramer (2003), page 111 and others
# The derivatives are of the log likelihood and need to be transformed by
#  multiplying with P
getMultinomialInformationMatrix <- function(likelihoods, derivatives) {
  nParams <- dim(derivatives)[3]
  nActors <- dim(derivatives)[1]
  matrixSize <- nActors * nActors

  # take upper triangle of the likelihoods matrix so
  # that we do not count probabilities twice
  likelihoodsTriangle <- likelihoods * upper.tri(likelihoods)

  # In the Hessian formula H_{ijh} (7.11), p.111,
  # we include the log likelihood derivatives and
  #  multiply each of the two with P_{is}

  # Multiply each pair of slices of the log likelihood with
  # each other times the likelihood
  indexes <- cbind(seq_len(nParams), rep(seq_len(nParams), each = nParams))

  values <- apply(
    indexes, 1,
    \(ind) {
      sum(derivatives[, , ind[1]] * derivatives[, , ind[2]] *
        likelihoodsTriangle)
    }
  )
  informationMatrix <- matrix(values, nParams, nParams, byrow = FALSE)

  informationMatrix
}


getMultinomialInformationMatrixM <- function(
    eventProbabilities,
    firstDerivatives) {
  nParams <- dim(firstDerivatives)[2]
  # nActors <- dim(firstDerivatives)[1]

  # all indexes: 1-1, 1-2, ..., nParams-nParams
  indexes <- expand.grid(seq_len(nParams), seq_len(nParams))

  temp <- apply(
    indexes, 1,
    \(ind) {
      firstDerivatives[, ind[1]] * firstDerivatives[, ind[2]] *
        eventProbabilities
    }
  )
  if (!is.null(dim(temp))) {
    values <- colSums(temp)
  } else {
    # in case that temp is a scalar
    values <- temp
  }
  information <- matrix(values, nParams, nParams)
}


# Function to calculate a matrix of i->j multinomial choice probabilities
# (non-logged)
# for one term of the
getMultinomialProbabilities <- function(
    statsArray,
    activeDyad,
    parameters,
    actorNested = TRUE,
    allowReflexive = TRUE,
    isTwoMode = FALSE) {
  # allow this for a two- OR a three-dimensional array provided as input,
  # to be make
  nDimensions <- length(dim(statsArray))
  if (!(nDimensions %in% c(2, 3))) {
    stop(
      "StatsArray in getMultinomialProbabilities has to be",
      " two- or three-dimensional.",
      call. = FALSE
    )
  }

  # nParams <- dim(statsArray)[nDimensions]
  nActors1 <- dim(statsArray)[1]
  nActors2 <- dim(statsArray)[2]
  if (nDimensions == 3) {
    matrixSize <- nActors1 * nActors2
    # multiply parameters with the statistics; slice by slice
    # the cube has to be transposed for third-dimension-wise multyplication
    weightedStatsArray <- statsArray * rep(parameters, each = matrixSize)
    # get utility = exp( value of objective function )
    utility <- exp(apply(weightedStatsArray, c(1, 2), sum))
    if (!allowReflexive && !isTwoMode) diag(utility) <- 0
    if (actorNested) {
      denominators <- rowSums(utility)
    } else {
      # for REM
      denominators <- sum(utility)
    }
  }
  if (nDimensions == 2) {
    weightedStatsArray <- sweep(statsArray, MARGIN = 2, parameters, "*")
    utility <- exp(rowSums(weightedStatsArray))
    # allow reflexive?
    if (!allowReflexive && !isTwoMode) utility[activeDyad[1]] <- 0
    denominators <- sum(utility)
  }
  utility / denominators
}


# Utility function to modify the statistics list
modifyStatisticsList <- function(
    statsList,
    modelType,
    reduceMatrixToVector = FALSE,
    reduceArrayToMatrix = FALSE,
    excludeParameters = NULL,
    addInterceptEffect = FALSE) {
  # exclude effect statistics
  if (!is.null(excludeParameters)) {
    unknownIndexes <- setdiff(
      excludeParameters,
      seq_len(dim(statsList$initialStats)[3])
    )
    if (length(unknownIndexes) > 0) {
      stop(
        "Unknown parameter indexes in 'excludeIndexes': ",
        paste(unknownIndexes, collapse = " ")
      )
    }

    statsList$initialStats <- statsList$initialStats[, , -excludeParameters]
  }

  # reduce for dynam-m RATE model
  if (modelType == "DyNAM-M-Rate") {
    statsList <- reduceStatisticsList(statsList,
      # dropZeroTimespans = TRUE,
      reduceMatrixToVector = TRUE,
      addInterceptEffect = addInterceptEffect
    )
  }

  if (modelType == "DyNAM-M-Rate-ordered") {
    statsList <- reduceStatisticsList(statsList,
      reduceMatrixToVector = TRUE,
      dropRightCensored = FALSE
    )
  }


  # reduce for REM (continuous-time)
  if (modelType == "REM") {
    statsList <- reduceStatisticsList(statsList,
      addInterceptEffect = addInterceptEffect
    )
  }

  # reduce for dynam-m CHOICE model
  if (modelType == "DyNAM-M") {
    # drop the diagonal elements??
    statsList <- reduceStatisticsList(statsList,
      reduceArrayToMatrix = TRUE,
      dropRightCensored = FALSE
    )
  }

  # reduce for ORDERED REM
  if (modelType == "REM-ordered") {
    statsList <- reduceStatisticsList(statsList,
      dropRightCensored = FALSE
    )
  }

  return(statsList)
}

reduceStatisticsList <- function(
    statsList,
    addInterceptEffect = FALSE,
    dropRightCensored = FALSE,
    reduceMatrixToVector = FALSE,
    reduceArrayToMatrix = FALSE,
    dropZeroTimespans = FALSE) {
  if (dropRightCensored) {
    # reorder the intervals with only dependent events
    idep <- 1
    irc <- 1
    newintervals <- list()
    for (i in seq_along(statsList$orderEvents)) {
      if (statsList$orderEvents[[i]] == 1) {
        newintervals <- append(newintervals, statsList$intervals[[idep]])
        idep <- idep + 1
      }
      if (statsList$orderEvents[[i]] > 1) {
        newintervals[[length(newintervals)]] <-
          newintervals[[length(newintervals)]] +
          statsList$rightCensoredIntervals[[irc]]
        irc <- irc + 1
      }
    }
    statsList$orderEvents <- rep(1, length(statsList$intervals))

    # information on right censoring and intervals is no longer needed
    statsList$rightCensoredStatsChange <- list()
    statsList$rightCensoredIntervals <- list()
  }

  # This part should be uncommented once we are sure about how to use
  # these reductions through the whole estimation.
  # For now we reduce at each step

  # # reduce statistics matrix to a vector
  # if(reduceMatrixToVector) {
  #   apply(statsList$initialStats, c(3, 1), function(row) {
  #     if(min(row, na.rm = TRUE) != max(row, na.rm = TRUE))
  #       stop("Rate variable varies within event senders.")
  #   })
  # generates a matrix from an array
  #   statsList$initialStats <-
  #   apply(statsList$initialStats, 3, rowMeans, na.rm = TRUE)
  # }
  #
  # # reduce array to matrices
  # if(reduceArrayToMatrix) {
  #   oldDim <- dim(statsList$initialStats)
  #   statsList$initialStats <-
  #   matrix(statsList$initialStats[1, , ], oldDim[2], oldDim[3])
  # }

  # add a rate intercept that is 1 for everyone (dummy for \theta_0)
  # The format may be a vector or a matrix (see above)
  if (addInterceptEffect) {
    dimensions <- dim(statsList$initialStats)
    # data is in matrix format
    if (length(dimensions) == 3) {
      oldValues <- statsList$initialStats
      newValues <- matrix(1, dimensions[1], dimensions[2])
      statsList$initialStats <-
        array(c(newValues, oldValues), dim = dimensions + c(0, 0, 1))
    }
    # data is in vector format
    if (length(dimensions) == 2) {
      statsList$initialStats <- cbind(1, statsList$initialStats)
    }
  }

  # drop statistics with a time span of zero
  if (dropZeroTimespans) {
    hasZeroTime <- which(statsList$intervals == 0)
    statsList$dependentStatsChange <-
      statsList$dependentStatsChange[-hasZeroTime]
    statsList$intervals <- statsList$intervals[-hasZeroTime]
    hasZeroTime <- which(statsList$rightCensoredIntervals == 0)
    statsList$rightCensoredStatsChange <-
      statsList$rightCensoredStatsChange[-hasZeroTime]
    statsList$rightCensoredIntervals <-
      statsList$rightCensoredIntervals[-hasZeroTime]
  }

  return(statsList)
}

Try the goldfish package in your browser

Any scripts or data that you put into this service are public.

goldfish documentation built on Sept. 14, 2024, 9:08 a.m.