#' Feature Generation
#'
#' @param V Total expected number of claims.
#' @param LoB.dist Categorical distribution for the allocation of the claims to the four lines of business.
#' @param inflation Grow parameters (per LOB) for the numbers of claims in the 12 accident years.
#' @param seed Random seed.
#' @keywords internal
Feature.Generation <- function(V = 1000000, LoB.dist = c(0.25, 0.30, 0.20, 0.25),
inflation = c(0.01, 0.01, 0.01, 0.01), seed = 100) {
### Weights in LoB.dist have to be nonnegative
if (any((LoB.dist) < 0))
stop("The weights determining the distribution amongst the lines of business cannot be negative.",
call. = FALSE)
# We will store the number of claims for each combination of accident year and line of business in V.dist
V.dist <- array(NA, c(13, 4))
# Determine the number of claims per line of business
maybe_set_seed(seed)
V.dist[1, ] <- stats::rmultinom(n = 1, size = V, prob = LoB.dist)
# Determine the number of claims per accident year (for all lines of business)
V.dist[2, ] <- 0
maybe_set_seed(seed + 1)
V.dist[-c(1, 2), ] <- stats::rnorm(44, mean = rep(inflation, each = 11), sd = rep(abs(inflation), each = 11))
V.dist[-c(1, 2), ] <- apply(V.dist[-c(1, 2), ], 2, cumsum)
V.dist[-1, ] <- exp(V.dist[-1, ])
V.dist[-1, ] <- t(t(V.dist[-1, ]) / colSums(V.dist[-1, ]))
V.dist[-1, ] <- t(t(V.dist[-1, ]) * V.dist[1, ])
maybe_set_seed(seed + 2)
V.dist[-1, ] <- stats::rpois(n = 48, lambda = V.dist[-1, ])
V.dist[1, ] <- colSums(V.dist[-1, ])
# Create the array where we will store the observations
features <- as.data.frame(array(NA, c(sum(V.dist[1, ]), 7)))
colnames(features) <- c("ClNr", "LoB", "cc", "AY", "AQ", "age", "inj_part")
# Store line of business
features[, 2] <- rep(1:4, times = V.dist[1, ])
# Store accident year
features[, 4] <- rep(which(V.dist[-1, ] > 0) %% 12, times = V.dist[-1, ][which(V.dist[-1, ] > 0)])
features[, 4][which(features[, 4] == 0)] <- 12
features[, 4] <- 1993 + features[, 4]
# Add artificial observations that prevent data sets below from being empty
features[nrow(features) + 1:2, ] <- c(-1, -1, 1, 1, NA, NA, 1994, 1994, NA, NA, NA, NA, NA, NA)
features[nrow(features) + 1:2, ] <- c(-1, -1, 3, 3, NA, NA, 1994, 1994, NA, NA, NA, NA, NA, NA)
# Feature generation for LOB 1 and 2
features_12 <- generate_features_lob(
features = features,
lobs = 1:2,
Sigma = .lob_12$Covariances$Covariance,
param_cc = .lob_12$Parameters$cc,
translator_cc = .lob_12$Translators$cc,
param_age = .lob_12$Parameters$age,
param_inj_part = .lob_12$Parameters$inj_part,
translator_inj_part = .lob_12$Translators$inj_part,
f.cc = f.cc_12,
f.age1 = f.age1_12,
f.age2 = f.age2_12,
f.inj_part = f.inj_part_12,
seed = seed + 3
)
features[which(features$LoB <= 2), c(3, 5, 6, 7)] <- features_12
# Feature generation for LOB 3 and 4
features_34 <- generate_features_lob(
features = features,
lobs = 3:4,
Sigma = .lob_34$Covariances$Covariance,
param_cc = .lob_34$Parameters$cc,
translator_cc = .lob_34$Translators$cc,
param_age = .lob_34$Parameters$age,
param_inj_part = .lob_34$Parameters$inj_part,
translator_inj_part = .lob_34$Translators$inj_part,
f.cc = f.cc_34,
f.age1 = f.age1_34,
f.age2 = f.age2_34,
f.inj_part = f.inj_part_34,
seed = seed + 4
)
features[which(features$LoB > 2), c(3, 5, 6, 7)] <- features_34
# Order the data: first random order then order according to the accident year AY
maybe_set_seed(seed + 5)
order1 <- sample(1:nrow(features), nrow(features))
features <- features[order1, ]
order2 <- order(features$AY)
features <- features[order2, ]
# Remove the artificial observations
features <- features[-which(features$ClNr == -1), ]
# Convert LoB, cc and inj_part to factors
features[nrow(features) + 1, ] <- c(NA, 1, 15, 1994, 4, 37, 30) ### add an additional observation for the case that features is empty
features$LoB <- factor(features$LoB)
features$cc <- factor(features$cc)
features$inj_part <- factor(features$inj_part)
# Number the claims from 1 to nrow(features)
features[, 1] <- 1:nrow(features)
# Adjust the rownames
rownames(features) <- paste("", 1:nrow(features), sep = "")
features <- features[-nrow(features), ] ### delete the additional observation
features
}
generate_features_lob <- function(features,
lobs,
# parameters
Sigma, param_cc, translator_cc, param_age, param_inj_part,
translator_inj_part,
# functions
f.cc, f.age1, f.age2, f.inj_part,
seed) {
# Generate observations from a multivariate normal distribution
# maybe_set_seed(seed + 3)
maybe_set_seed(seed)
features_new <- MASS::mvrnorm(n = nrow(features[which(features$LoB %in% lobs), ]), mu = rep(0, 4), Sigma = Sigma)
# Transform marginals such that they have a uniform distribution on [0,1]
features_new <- stats::pnorm(features_new, 0, 1)
# Transform marginals such that they have the appropriate distribution
# Claim code
features_new[, 1] <- ceiling(f.cc(features_new[, 1], alpha = param_cc[1], beta = param_cc[2], const = param_cc[3]))
features_new[, 1] <- translator_cc[features_new[, 1]]
# Accident quarter (AQ)
features_new[, 2] <- ceiling(features_new[, 2] * 4)
# Age of the injured
features_new[which(features_new[, 3] <= param_age[5]), 3] <- ceiling(f.age1(features_new[which(features_new[, 3] <= param_age[5]), 3], alpha = param_age[1], beta = param_age[2], const = param_age[4]))
features_new[which(features_new[, 3] <= 1), 3] <- ceiling(f.age2(features_new[which(features_new[, 3] <= 1), 3], alpha = param_age[1], beta = param_age[2], gamma = param_age[3], const = param_age[4]))
# Injured part
features_new[, 4] <- ceiling(f.inj_part(features_new[, 4], alpha = param_inj_part[1], beta = param_inj_part[2], const = param_inj_part[3]))
features_new[, 4] <- translator_inj_part[features_new[, 4]]
features_new
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.