Nothing
#' A function to prepare mortality data with stratification (e.g. by product, sex/gender, country, cause of death etc.)
#'
#' Construct a 3-dimensional data array (dim 1: strata, dim 2: ages, dim 3: years) from raw data frames/arrays suitable for fitting various Bayesian Stochastic Mortality Models.
#' @param data_df_array data (normally death counts or exposures) to be provided either as a **data frame** or a **3-dimensional array**. If providing data frame, it should be structured as (column 1: ages, column 2: years, column 3: death/expo data, column 4: strata); if providing array, it should be as (dim 1: strata, dim 2: ages, dim 3: years). \cr
#' If only one stratum (AP data), either remove column 4 or ensure it contains only one stratum label. Alternatively, one can also provide a **2-dimensional matrix** (age\eqn{\times }year) but please set \code{data_matrix=TRUE}.
#' @param strat_name a vector of strings indicating names of each stratum.
#' @param ages a numeric vector indicating which ages to use.
#' @param years a numeric vector indicating which years to use.
#' @param data_matrix a logical value indicating if a 2-dimensional matrix (age\eqn{\times }year) is supplied.
#' @param round a logical value indicating whether data entries should be rounded.
#' @return A list with components:
#' \describe{
#' \item{\code{data}}{A 3-dimensional data array (dim 1: strata, dim 2: ages, dim 3: years).}
#' \item{\code{strat_name}}{A vector of strings describing the names of each stratum.}
#' \item{\code{ages}}{A numeric vector describing the ages used.}
#' \item{\code{years}}{A numeric vector describing the years used.}
#' \item{\code{n_strat}}{A numeric value describing the number of strata.}
#' \item{\code{n_ages}}{A numeric value describing the number of ages.}
#' \item{\code{n_years}}{A numeric value describing the number of years.}
#' }
#' @keywords utilities
#' @concept data preprocessing
#' @concept data formatting
#' @concept mortality data
#' @concept stratification
#' @importFrom dplyr filter
#' @importFrom magrittr `%>%`
#' @importFrom rlang .data
#' @export
#' @examples
#' ##################
#' ##if p>1 (more than 1 strata)
#' ##################
#'
#' #Input: data.frame
#' data("data_summarised")
#' head(data_summarised)
#'
#' #prepare death data
#' death<-preparedata_fn(data_summarised[,c("Age","Year","Claim","Product")],
#' strat_name = c("ACI","DB","SCI"),ages=35:65)
#' #prepare exposure data
#' expo<-preparedata_fn(data_summarised[,c("Age","Year","Exposure","Product")],
#' strat_name = c("ACI","DB","SCI"),ages=35:65)
#'
#' #visualise data
#' str(death);str(expo)
#'
#' #works also if data.frame only contains only 1 stratum
#' death<-preparedata_fn(data_summarised[,
#' c("Age","Year","Claim","Product")][data_summarised$Product=="ACI",],ages=35:65)
#'
#' expo<-preparedata_fn(data_summarised[,
#' c("Age","Year","Exposure","Product")][data_summarised$Product=="ACI",],ages=35:65)
#'
#' str(death);str(expo)
#'
#' #Input: 3D data array
#' data("dxt_array_product");data("Ext_array_product")
#' death<-preparedata_fn(dxt_array_product,ages=35:65)
#' expo<-preparedata_fn(Ext_array_product,ages=35:65)
#'
#' str(death);str(expo)
#'
#' ##################
#' ##if p=1 (only 1 stratum)
#' ##################
#'
#' #specifying only one of the strats from the data.frame
#' death<-preparedata_fn(data_summarised[,c("Age","Year","Claim","Product")],
#' strat_name = "ACI",ages=35:65)
#' expo<-preparedata_fn(data_summarised[,c("Age","Year","Exposure","Product")],
#' strat_name = "ACI",ages=35:65)
#' str(death);str(expo)
#'
#' #if data.frame only contains 1 strat (4 columns)
#' death<-preparedata_fn(data_summarised[,c("Age","Year","Claim","Product")]
#' [data_summarised$Product=="ACI",],ages=35:65)
#' expo<-preparedata_fn(data_summarised[,c("Age","Year","Exposure","Product")]
#' [data_summarised$Product=="ACI",],ages=35:65)
#' str(death);str(expo)
#'
#' #if data.frame only contains 1 strat (3 columns)
#' death<-preparedata_fn(data_summarised[,c("Age","Year","Claim")]
#' [data_summarised$Product=="ACI",],ages=35:65)
#' expo<-preparedata_fn(data_summarised[,c("Age","Year","Exposure")]
#' [data_summarised$Product=="ACI",],ages=35:65)
#' str(death);str(expo)
#'
#' #Input: 3D data array
#' death<-preparedata_fn(dxt_array_product,strat_name="ACI",ages=35:65)
#' expo<-preparedata_fn(Ext_array_product,strat_name="ACI",ages=35:65)
#' str(death);str(expo)
#'
#' #Input: 2D matrix
#' death<-preparedata_fn(dxt_array_product["ACI",,],data_matrix=TRUE,ages=35:65)
#' expo<-preparedata_fn(Ext_array_product["ACI",,],data_matrix=TRUE,ages=35:65)
#' str(death);str(expo)
preparedata_fn<-function(data_df_array,data_matrix=FALSE,strat_name=NULL,
ages=NULL,years=NULL,round=TRUE){
if (data_matrix){
original_dimname<-dimnames(data_df_array)
temp<-data_df_array
p<-1
A<-dim(temp)[1]
T<-dim(temp)[2]
if (is.null(strat_name)){strat_name<-paste0("strat_",1)}
if (!is.null(original_dimname)){
#data_df_array<-provideDimnames(data_df_array,base=list(strat_name,original_dimname[[1]],original_dimname[[2]]))
if (!is.null(ages)){
A<-length(ages)
ages_name<-as.character(ages)
#data_df_array<-data_df_array[,as.character(ages),,drop=FALSE]
temp<-temp[ages_name,]
}else{
ages<-as.numeric(original_dimname[[1]])
ages_name<-original_dimname[[1]]
}
if (!is.null(years)){
T<-length(years)
years_name<-as.character(years)
#data_df_array<-data_df_array[,,as.character(years),drop=FALSE]
temp<-temp[,years_name]
}else{
years<-as.numeric(original_dimname[[2]])
years_name<-original_dimname[[2]]
}
}
else {
if (is.null(strat_name)){
strat_name<-paste0("strat_",1)
}
if (is.null(ages)){
ages<-1:A
ages_name<-as.character(ages)
} else {
if (length(ages)<A){
min_age<-min(ages)
#data_df_array<-data_df_array[,ages-min_age+1,,drop=FALSE]
temp<-temp[ages-min_age+1,]
ages_name<-as.character(ages)
A<-length(ages)
} else if (length(ages)==A){
ages_name<-as.character(ages)}
else {stop("No such ages in the data.")}
}
if (is.null(years)){
years<-1:T
years_name<-as.character(years)
} else {
if (length(years)<T){
min_year<-min(years)
#data_df_array<-data_df_array[,,years-min_year+1,drop=FALSE]
temp<-temp[,years-min_year+1]
years_name<-as.character(years)
T<-length(years)
} else if (length(years)==T){
years_name<-as.character(years)}
else {stop("No such years in the data.")}
}
}
data_df_array<-array(dim=c(1,A,T))
data_df_array[1,,]<-temp
data_df_array<-provideDimnames(data_df_array,base=list(strat_name,ages_name,years_name))
} else {
dim_data<-dim(data_df_array)
#data supplied in cube/3D-array
if(length(dim_data)==3){
p<-dim_data[1]
A<-dim_data[2]
T<-dim_data[3]
original_dimname<-dimnames(data_df_array)
temp<-data_df_array
if (!is.null(original_dimname)){
if (!is.null(strat_name)){
data_df_array<-data_df_array[strat_name,,,drop=FALSE]
p<-length(strat_name)
}else{strat_name<-original_dimname[[1]]}
if (!is.null(ages)){
data_df_array<-data_df_array[,as.character(ages),,drop=FALSE]
A<-length(ages)
}else{ages<-as.numeric(original_dimname[[2]])}
if (!is.null(years)){
data_df_array<-data_df_array[,,as.character(years),drop=FALSE]
T<-length(years)
}else{years<-as.numeric(original_dimname[[3]])}
}
else {
if (is.null(strat_name)){
strat_name<-paste0("strat_",1:p)
}
if (is.null(ages)){
ages<-1:A
ages_name<-as.character(ages)
} else {
if (length(ages)<A){
min_age<-min(ages)
data_df_array<-data_df_array[,ages-min_age+1,,drop=FALSE]
ages_name<-as.character(ages)
A<-length(ages)
} else if (length(ages)==A){
ages_name<-as.character(ages)}
else {stop("No such ages in the data.")}
}
if (is.null(years)){
years<-1:T
years_name<-as.character(years)
} else {
if (length(years)<T){
min_year<-min(years)
data_df_array<-data_df_array[,,years-min_year+1,drop=FALSE]
years_name<-as.character(years)
T<-length(years)
} else if (length(years)==T){
years_name<-as.character(years)}
else {stop("No such years in the data.")}
}
data_df_array<-provideDimnames(data_df_array,base=list(strat_name,ages_name,years_name))
}
} #data supplied in 2D-data frame
else if (length(dim_data)==2){
if (all(class(data_df_array)!="data.frame")) stop ("Please provide a data.frame object.")
if (ncol(data_df_array)==4){
#2D-data frame with 4 columns (APP data)
names(data_df_array)<-c("Age","Year","Data","Strat")
p<-length(unique(data_df_array$Strat))
Strat_name<-unique(data_df_array$Strat)
if(is.null(ages)){
ages <- unique(data_df_array$Age)
}
if(any(is.na(ages))){stop("ages contain NA!")}
if(is.null(years)){
years <- unique(data_df_array$Year)
}
if(any(is.na(years))){stop("years contain NA!")}
if(is.null(strat_name)){
strat_name <- unique(data_df_array$Strat)
}
if(any(is.na(strat_name))){stop("strata contain NA!")}
data_df_array<-as.data.frame(data_df_array)
data_df_array <- magrittr::`%>%`(
data_df_array,
dplyr::filter(.data$Age %in% ages,
.data$Year %in% years,
.data$Strat %in% strat_name)
)
for (i in 1:p){
assign(paste0("data_",Strat_name[i]),data_df_array[data_df_array$Strat==Strat_name[i],])
assign(paste0(Strat_name[i],"_length"),dim(get(paste0("data_",Strat_name[i])))[1])
}
#min_age<-min(data_df_array$Age);max_age<-max(data_df_array$Age)
#A<-max_age-min_age+1
#min_year<-min(data_df_array$Year);max_year<-max(data_df_array$Year)
#T<-max_year-min_year+1
A<-length(ages);T<-length(years)
data_df_array<-array(dim=c(p,A,T))
for (i in 1:p){
#assign(paste0("data_df_array_",Strat_name[i]),matrix(NA,nrow=A,ncol=T))
temp_temp_data<-matrix(NA,nrow=A,ncol=T)
temp_data<-get(paste0("data_",Strat_name[i]))
for (j in 1:get(paste0(Strat_name[i],"_length"))){
#index_i<-temp_data$Age[j]-min_age+1
#index_j<-temp_data$Year[j]-min_year+1
#assign(paste0("data_df_array_",Strat_name[i],"[",index_i,index_j,"]"),temp_data[j])
index_i<-which(temp_data$Age[j]==ages)
index_j<-which(temp_data$Year[j]==years)
temp_temp_data[index_i,index_j]<-temp_data$Data[j]
#rownames(temp_temp_data)<-as.character((1:A)+min_age-1)
#colnames(temp_temp_data)<-as.character((1:T)+min_year-1)
#assign(paste0("data_df_array_",Strat_name[i]),temp_temp_data)
}
data_df_array[i,,]<-temp_temp_data
}
#ages_name<-as.character((1:A)+min_age-1)
#years_name<-as.character((1:T)+min_year-1)
ages_name<-as.character(ages)
years_name<-as.character(years)
data_df_array<-provideDimnames(data_df_array,base=list(Strat_name,ages_name,years_name))
if (!is.null(strat_name)){
p<-length(strat_name)
if (p==1){
temp<-data_df_array[strat_name,,]
data_df_array<-array(dim=c(1,A,T))
data_df_array[1,,]<-temp
data_df_array<-provideDimnames(data_df_array,base=list(strat_name,ages_name,years_name))
}
else {
data_df_array<-data_df_array[strat_name,,]
}
} else{
strat_name<-Strat_name
}
if (!is.null(ages)){
if(p==1){
temp<-data_df_array[,as.character(ages),]
A<-length(ages)
data_df_array<-array(dim=c(1,A,T))
data_df_array[1,,]<-temp
data_df_array<-provideDimnames(data_df_array,base=list(strat_name,as.character(ages),years_name))
}else{
data_df_array<-data_df_array[,as.character(ages),]
A<-length(ages)}
} else{
ages<-as.numeric(ages_name)
}
if (!is.null(years)){
if (p==1){
temp<-data_df_array[,,as.character(years)]
T<-length(years)
data_df_array<-array(dim=c(1,A,T))
data_df_array[1,,]<-temp
data_df_array<-provideDimnames(data_df_array,base=list(strat_name,as.character(ages),as.character(years)))
}else{
data_df_array<-data_df_array[,,as.character(years)]
T<-length(years)}
} else{
years<-as.numeric(years_name)
}
} else if (ncol(data_df_array)==3) {
#2D-data frame with 3 columns (AP data)
names(data_df_array)<-c("Age","Year","Data")
p<-1
if (is.null(strat_name)){Strat_name<-paste0("strat_",1)}
data_df_array$Strat<-Strat_name
if(is.null(ages)){
ages <- unique(data_df_array$Age)
}
if(any(is.na(ages))){stop("ages contain NA!")}
if(is.null(years)){
years <- unique(data_df_array$Year)
}
if(any(is.na(years))){stop("years contain NA!")}
if(is.null(strat_name)){
strat_name <- unique(data_df_array$Strat)
}
if(any(is.na(strat_name))){stop("strata contain NA!")}
data_df_array<-as.data.frame(data_df_array)
data_df_array <- magrittr::`%>%`(
data_df_array,
dplyr::filter(.data$Age %in% ages,
.data$Year %in% years,
.data$Strat %in% strat_name)
)
for (i in 1:p){
assign(paste0("data_",Strat_name[i]),data_df_array[data_df_array$Strat==Strat_name[i],])
assign(paste0(Strat_name[i],"_length"),dim(get(paste0("data_",Strat_name[i])))[1])
}
min_age<-min(data_df_array$Age);max_age<-max(data_df_array$Age)
A<-max_age-min_age+1
min_year<-min(data_df_array$Year);max_year<-max(data_df_array$Year)
T<-max_year-min_year+1
data_df_array<-array(dim=c(p,A,T))
for (i in 1:p){
#assign(paste0("data_df_array_",Strat_name[i]),matrix(NA,nrow=A,ncol=T))
temp_temp_data<-matrix(NA,nrow=A,ncol=T)
temp_data<-get(paste0("data_",Strat_name[i]))
for (j in 1:get(paste0(Strat_name[i],"_length"))){
index_i<-temp_data$Age[j]-min_age+1
index_j<-temp_data$Year[j]-min_year+1
#assign(paste0("data_df_array_",Strat_name[i],"[",index_i,index_j,"]"),temp_data[j])
temp_temp_data[index_i,index_j]<-temp_data$Data[j]
#rownames(temp_temp_data)<-as.character((1:A)+min_age-1)
#colnames(temp_temp_data)<-as.character((1:T)+min_year-1)
#assign(paste0("data_df_array_",Strat_name[i]),temp_temp_data)
}
data_df_array[i,,]<-temp_temp_data
}
ages_name<-as.character((1:A)+min_age-1)
years_name<-as.character((1:T)+min_year-1)
data_df_array<-provideDimnames(data_df_array,base=list(Strat_name,ages_name,years_name))
if (!is.null(strat_name)){
p<-length(strat_name)
if (p==1){
temp<-data_df_array[strat_name,,]
data_df_array<-array(dim=c(1,A,T))
data_df_array[1,,]<-temp
data_df_array<-provideDimnames(data_df_array,base=list(strat_name,ages_name,years_name))
}
else {
data_df_array<-data_df_array[strat_name,,]
}
} else{
strat_name<-Strat_name
}
if (!is.null(ages)){
if(p==1){
temp<-data_df_array[,as.character(ages),]
A<-length(ages)
data_df_array<-array(dim=c(1,A,T))
data_df_array[1,,]<-temp
data_df_array<-provideDimnames(data_df_array,base=list(strat_name,as.character(ages),years_name))
}else{
data_df_array<-data_df_array[,as.character(ages),]
A<-length(ages)}
} else{
ages<-as.numeric(ages_name)
}
if (!is.null(years)){
if (p==1){
temp<-data_df_array[,,as.character(years)]
T<-length(years)
data_df_array<-array(dim=c(1,A,T))
data_df_array[1,,]<-temp
data_df_array<-provideDimnames(data_df_array,base=list(strat_name,as.character(ages),as.character(years)))
}else{
data_df_array<-data_df_array[,,as.character(years)]
T<-length(years)}
} else{
years<-as.numeric(years_name)
}
} else {
stop("Data supplied incorrectly.")
}
} else{
stop("Data supplied incorrectly.")
}}
if(round) data_df_array<-round(data_df_array)
if(any(is.na(data_df_array))){warning("Data contains NA!")}
list(data=data_df_array,strat_name=strat_name,ages=ages,years=years,n_strat=p,n_ages=A,n_years=T)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.