R/data.R

#' Example parameter table
#'
#' Example of the parameter table used through \code{serosolver}. This data frame is used to control everything related to the model parameters, including values, names, fixed/estimates, and uniform prior bounds.
#' @docType data
#' @usage data(example_par_tab)
#' @format A data frame with 21 rows and 8 variables:
#' \describe{
#'     \item{names}{string names of the model parameters}
#'     \item{values}{numeric values of the parameters}
#'     \item{fixed}{binary values indicating if the parameter should be fixed (1) or estimated (0) during the MCMC procedure}
#'     \item{lower_bound}{lower numeric bound for the parameter during fitting (lower uniform prior bound)}
#'     \item{upper_bound}{upper numeric bound for the parameter during fitting (upper uniform prior bound)}
#'     \item{lower_start}{can be used to set the lower allowable random starting value for the MCMC}
#'     \item{upper_start}{can be used to set the upper allowable random starting value for the MCMC}
#'     \item{par_type}{used for bookeeping during MCMC, set to 1 for normal model parameters, 0 for model options, 2 for attack rate terms, phi or 3 for measurement offset terms, rho}
#'     \item{biomarker_group}{gives the biomarker group each parameter corresponds to, matching entries in the antibody_data object}
#' }
#' @family example_data
"example_par_tab"

#' Example antigenic map
#'
#' Example of the 2-dimensional antigenic map used through \code{serosolver}. This data frame specifies the antigenic coordinates for each pathogen/variant, with the assumption that only one representative variant circulates in each time period 
#' @docType data
#' @usage data(example_antigenic_map)
#' @format A data frame with 48 rows and 3 variables:
#' \describe{
#'     \item{x_coord}{x-coordinate for this entry in the 2D map}
#'     \item{y_coord}{y-coordinate for this entry in the 2D map}
#'     \item{inf_times}{circulation time of this entry. By default, this can be considered as the circulation year, though the logic extends to quarters, months etc}
#' }
#' @family example_data
"example_antigenic_map"

#' Example infection history matrix
#'
#' Example of the infection history matrix for a cohort of 50 individuals, matching \code{\link{example_antibody_data}} and \code{\link{example_antigenic_map}}
#' @docType data
#' @usage data(example_inf_hist)
#' @format A matrix with 50 rows (individuals) and 48 columns. Each entry can only be 1 (presence of infection) or a 0 (absence of infection)
#' @family example_data
"example_inf_hist"

#' Example antibody data
#'
#' Example of the antibody data frame generated by \code{\link{simulate_data}}, with matching infection history in \code{\link{example_inf_hist}}
#' @docType data
#' @usage data(example_antibody_data)
#' @format A data frame with 4800 rows (each corresponding to a unique antibody measurement) and 6 variables:
#' \describe{
#'     \item{individual}{index starting from 1 for the individual ID. This should be continuous from 1:n.}
#'     \item{sample_time}{time the serum sample was taken, matching the time resolution used in the overall model.}
#'     \item{biomarker_id}{ID of the biomarker being measured. Usually this would correspond to the time period in which an antigen was assumed to be circulating, e.g., an entry of 2015 would correspond to the variant assumed to be circulating throughout 2015.}
#'     \item{biomarker_group}{Group ID of the biomarker being measured. This will usually just be 1, but can be used where there are multiple biomarker types (not just antigens/targets) per sample. For example, this might represent antibody levels and avidity.}
#'     \item{measurement}{the antibody measurement for this biomarker ID at this sampling time.}
#'     \item{repeat_number}{where there are repeat measurements, gives the repeat number for this serum sample and strain measured.}
#'     \item{population_group}{group index of this individual, for example, corresponding to a study location.}
#'     \item{birth}{date of birth matching the time resolution of the model.}
#' }
#' @family example_data
"example_antibody_data"


#' Example MCMC chain for kinetics parameters
#'
#' Outputs of a quick MCMC run corresponding to the example data. See \code{\link{example_inf_chain}} for corresponding infection history chain output
#' @docType data
#' @usage data(example_theta_chain)
#' @format A data frame with 100 rows (each corresponding to a sample from the posterior), and each column corresponding to one variable in \code{\link{example_par_tab}}
#' @family example_data
"example_theta_chain"

#' Example MCMC chain for infection histories
#'
#' Outputs of a quick MCMC run corresponding to the example data. See \code{\link{example_theta_chain}} for corresponding theta chain output. Note that this was run with prior version 2
#' @docType data
#' @usage data(example_inf_chain)
#' @format A data table, with entries for each MCMC sample for each individual (i) and time period (j) for which there was an infection inferred (x) for each saved MCMC sample (samp_no)
#' @family example_data
"example_inf_chain"


#' MCMC chains case study 2, real
#'
#' MCMC outputs from case study 2 analysis on real data. This example data object contains 3 MCMC chains thinned down to 100 samples from the posterior for each chain. The structure of the data is as returned from \code{\link{load_mcmc_chains}}.
#' @docType data
#' @usage data(cs2_chains_real)
#' @format A list of length 4, as returned by \code{\link{load_mcmc_chains}}.
#' @family case_study_data
"cs2_chains_real"

#' MCMC chains case study 2, real, for sim
#'
#' MCMC outputs from case study 2 analysis on real data, for the second call of load_mcmc_chains. This example data object contains 3 MCMC chains thinned down to 100 samples from the posterior for each chain. The structure of the data is as returned from \code{\link{load_mcmc_chains}}.
#' @docType data
#' @usage data(cs2_chains_real_b)
#' @format A list of length 4, as returned by \code{\link{load_mcmc_chains}}.
#' @family case_study_data
"cs2_chains_real_b"

#' MCMC chains case study 2, sim
#'
#' MCMC outputs from case study 2 analysis on simulated data. This example data object contains 3 MCMC chains thinned down to 100 samples from the posterior for each chain. The structure of the data is as returned from \code{\link{load_mcmc_chains}}.
#' @docType data
#' @usage data(cs2_chains_sim)
#' @format A list of length 4, as returned by \code{\link{load_mcmc_chains}}.
#' @family case_study_data
"cs2_chains_sim"


#' MCMC chains case study 1, real
#'
#' MCMC outputs from case study 1 analysis on real data. This example data object contains 3 MCMC chains thinned down to 100 samples from the posterior for each chain. The structure of the data is as returned from \code{\link{load_mcmc_chains}}.
#' @docType data
#' @usage data(cs1_chains_real)
#' @format A list of length 4, as returned by \code{\link{load_mcmc_chains}}.
#' @family case_study_data
"cs1_chains_real"


#' MCMC chains case study 1, sim
#'
#' MCMC outputs from case study 1 analysis on simulated data. This example data object contains 3 MCMC chains thinned down to 100 samples from the posterior for each chain. The structure of the data is as returned from \code{\link{load_mcmc_chains}}.
#' @docType data
#' @usage data(cs1_chains_sim)
#' @format A list of length 4, as returned by \code{\link{load_mcmc_chains}}.
#' @family case_study_data
"cs1_chains_sim"



#' MCMC chains case study 1, real, for sim
#'
#' MCMC outputs from case study 1 analysis on real data, for the second call of load_mcmc_chains. This example data object contains 3 MCMC chains thinned down to 100 samples from the posterior for each chain. The structure of the data is as returned from \code{\link{load_mcmc_chains}}.
#' @docType data
#' @usage data(cs1_chains_real_b)
#' @format A list of length 4, as returned by \code{\link{load_mcmc_chains}}.
#' @family case_study_data
"cs1_chains_real_b"
seroanalytics/serosolver documentation built on Aug. 18, 2024, 12:46 p.m.