#' Get offset from model object
#'
#' @noRd
#'
#' @param model Must be of class glm
#'
#' @return Character string with offset term
#'
#' @keywords internal
get_offset <- function(model) {
if( !inherits(model, "lm") ) {
stop("Input must be of class (g)lm", call. = FALSE)
}
nm1 <- names(attributes(model$terms)$dataClasses)
n_offsets <- sum(lengths(regmatches(nm1, gregexpr("offset", nm1))))
if ( n_offsets > 1 ) {
stop("Length of offset-terms must be equal to 1", call. = FALSE)
}
if( '(offset)' %in% nm1 ) {
deparse(as.list(model$call)$offset)
} else {
out <- sub("offset\\((.*)\\)$", "\\1", grep('offset', nm1, value = TRUE))
if ( identical(out, character(0)) ) {
out <- NULL
}
out
}
}
#' Remove offset term from formula
#'
#' @noRd
#'
#' @param formula Formula of class formula
#'
#' @keywords internal
remove_offset_formula <- function(formula) {
if( !inherits(formula, "formula") ) {
stop("Input must be of class formula", call. = FALSE)
}
proc <- function(x) {
if (length(x) == 1) return(x)
if (x[[1]] == as.name("offset")) return(x[[1]])
replace(x, -1, lapply(x[-1], proc))
}
update(proc(formula), . ~ . - offset)
}
#' Remove risk factor from formula
#'
#' @noRd
#'
#' @param fm Formula of class formula
#' @param remove_term Risk factor to remove
#'
#' @keywords internal
update_formula_remove <- function(fm, remove_term){
if( !inherits(fm, "formula") ) {
stop("Input must be of class formula.", call. = FALSE)
}
if ( !is.character(remove_term) ){
stop("Column must be a character.", call. = FALSE)
}
fm_new <- update(fm, paste("~ . -", remove_term))
if ( identical(fm, fm_new ) ) {
warning("Column '", remove_term, "' must be in model call.\n")
}
fm_new
}
#' Create new offset-term and new formula
#'
#' @param offset_term String obtained from get_offset()
#' @param fm_no_offset Obtained from remove_offset_formula()
#' @param add_term Name of restricted risk factor to add
#'
#' @keywords internal
update_formula_add <- function(offset_term, fm_no_offset, add_term){
if( !inherits(fm_no_offset, "formula") ) {
stop("Input must be of class formula", call. = FALSE)
}
if (is.null(offset_term)){
new_offset <- paste0("log(", add_term, ")")
}
if (!is.null(offset_term)){
new_offset <- paste0("log(", add_term, ")", " + ", offset_term)
}
new_offset_term <- paste0("offset(", new_offset, ")")
new_fm <- update(fm_no_offset, paste("~ . +", new_offset_term))
list(new_fm, new_offset)
}
#' @keywords internal
cut_borders_df <- function(df, col){
if (!col %in% names(df)) stop("Column name must be available in data",
call. = FALSE)
df_vec <- df[[col]]
if( anyNA(df_vec) ){
message("NAs detected in column ", col)
}
suppressWarnings({
s1 <- substr(df_vec, start = 1, stop = 1)
nchr <- if ( is.factor(df_vec) ){ nchar(levels(df_vec)[df_vec]) } else {
nchar(df_vec)
}
e1 <- substr(df_vec, start = nchr, stop = nchr)
m <- substr(df_vec, start = 2, stop = nchr - 1)
df$start_oc <- ifelse(s1 == "(", "open", ifelse(s1 == "[", "closed", NA))
df$end_oc <- ifelse(e1 == ")", "open", ifelse(e1 == "]", "closed", NA))
df$start_ <- as.numeric(gsub("^(.*?),.*", "\\1", m))
df$end_ <- as.numeric(sub('.*,', '', m))
})
df$avg_ <- rowMeans(df[, c('start_', 'end_')], na.rm = TRUE)
return(df)
}
#' @keywords internal
cut_borders_model <- function(model, x_cut){
if ( inherits(model, "glm") ){
rf <- rating_factors1(model)
}
if ( inherits(model, "smooth") ){
rf <- rating_factors1(model$model_out)
}
if ( inherits(model, "restricted")){
rf <- rating_factors1(model$model_out)
}
rf_xcut <- rf[rf$risk_factor == x_cut, ]
cut_borders_df(rf_xcut, "level")
}
#' @noRd
#'
#' @importFrom stats lm
#'
#' @keywords internal
fit_polynomial <- function(borders_model, x_org, degree = NULL, breaks = NULL){
if ( is.null(breaks) ){
breaks <- seq(min(borders_model$start_), max(borders_model$end_),
length.out = nrow(borders_model))
}
# Take halfway points of breaks to fit polynomial
breaks_min <- breaks[-length(breaks)]
breaks_max <- breaks[-1]
breaks_mid <- (breaks + c(breaks[-1], NA)) / 2
breaks_mid <- breaks_mid[!is.na(breaks_mid)]
unique_borders <- unique(c(breaks_min, breaks_max))
levels_borders <- levels(cut(breaks_min, breaks = unique_borders,
include.lowest = TRUE, dig.lab = 9))
lm_poly <- lm(estimate ~ poly(avg_, degree = degree), data = borders_model)
new_poly_df <- data.frame(avg_ = breaks_mid)
poly_line <- data.frame(avg_ = breaks)
poly_line$yhat <- as.numeric(predict(lm_poly, poly_line))
new_poly_df$yhat <- as.numeric(predict(lm_poly, new_poly_df))
new_poly_df$breaks_min <- breaks_min
new_poly_df$breaks_max <- breaks_max
new_poly_df$cuts <- levels_borders
new_poly_df$risk_factor <- paste0(x_org, "_smooth")
colnames(new_poly_df)[1] <- x_org
# new_colname_cat <- paste0(x_org, "_cat_new")
new_colname_cat <- paste0(x_org, "_smooth")
colnames(new_poly_df)[5] <- new_colname_cat
new_poly_df <- cut_borders_df(new_poly_df, new_colname_cat)
colnames(poly_line)[1] <- x_org
new_rf <- new_poly_df[, c("risk_factor", new_colname_cat, "yhat")]
colnames(new_rf)[2] <- "level"
list(new_poly_df = new_poly_df, poly_line = poly_line, new_rf = new_rf)
}
#' Join new data to nearest data point
#'
#' @noRd
#'
#' @param dat data.frame with model data
#' @param reference data.frame with polynomial fit
#' @param x character string to join on
#'
#' @importFrom data.table data.table
#' @importFrom data.table setnames
#' @importFrom data.table setkeyv
#' @importFrom data.table foverlaps
#' @importFrom data.table setcolorder
#'
#' @keywords internal
join_to_nearest <- function(dat, reference, x){
ref <- data.table::data.table(reference)
dat <- data.table::data.table(dat)
# due to NSE notes in R CMD check
end_oc = lookup_start = lookup_start2 = start_oc = idkey = NULL
ref[start_oc == "open", breaks_min := breaks_min + 1e-6][
end_oc == "open", breaks_max := breaks_max - 1e-6][
, c(x) := NULL]
data.table::setnames(dat, x, "lookup_start")
dat[, lookup_start2 := lookup_start]
data.table::setkeyv(ref, c("breaks_min", "breaks_max"))
data.table::setkeyv(dat, c("lookup_start", "lookup_start2"))
if( anyNA( dat[["lookup_start"]]) ){
message("NAs detected in column ", x)
dat[, idkey := seq_along(lookup_start)]
dat2 <- dat[!is.na(lookup_start2)]
fov <- data.table::foverlaps(dat2, ref, type = "within",
mult = "all",
nomatch = 0L)
keycols <- names(dat)
data.table::setkeyv(fov, keycols)
data.table::setkeyv(dat, keycols)
fov <- fov[dat][, idkey := NULL]
}
if( !anyNA( dat[["lookup_start"]]) ){
fov <- data.table::foverlaps(dat, ref, type = "within",
mult = "all",
nomatch = 0L)
}
fov[start_oc == "open", breaks_min := breaks_min - 1e-6][
end_oc == "open", breaks_max := breaks_max + 1e-6][
, lookup_start2 := NULL]
data.table::setnames(fov, "lookup_start", x)
data.table::setcolorder(fov, x)
as.data.frame(fov)
}
#' Join restricted data to model data
#'
#' @noRd
#'
#' @param model_data data.frame with original model data
#' @param restrictions_df data.frame with two columns, column 1 must include
#' the levels of the risk factor, and column 2 must include the new restricted
#' coefficients.
#'
#' @importFrom dplyr left_join
#'
#' @keywords internal
add_restrictions_df <- function(model_data, restrictions_df){
rcol1 <- names(restrictions_df)[1]
rcol2 <- names(restrictions_df)[2]
if ( !rcol1 %in% names(model_data) ) {
stop("Can't find column '", rcol1, "' in model call.", call. = FALSE )
}
if ( ncol(restrictions_df) != 2){
stop("Number of columns must be equal to 2.", call. = FALSE)
}
if ( length(unique(restrictions_df[[1]])) != nrow(restrictions_df) ) {
stop(rcol1, " in restricted data must have unique values.", call. = FALSE)
}
if ( rcol2 %in% names(model_data) ) {
stop("Column '", rcol2,
"' in restricted data must be different from existing columns.",
call. = FALSE)
}
if ( is.factor(model_data[[rcol1]]) ){
restrictions_df[[rcol1]] <- as.factor(restrictions_df[[rcol1]])
}
model_df_restrictions <- dplyr::left_join(model_data, restrictions_df,
by = rcol1)
if ( anyNA(model_df_restrictions[[rcol2]]) ) {
warning("Can't match all existing factor levels to new levels.\n")
}
model_df_restrictions
}
#' Create one data.frame from multiple restriction data.frames
#'
#' @noRd
#'
#' @param restricted_df data.frame with restrictions
#'
#' @keywords internal
restrict_df <- function(restricted_df){
restricted_df$risk_factor <- colnames(restricted_df)[2]
colnames(restricted_df)[2] <- "yhat"
colnames(restricted_df)[1] <- "level"
restricted_df$level <- as.character(restricted_df$level)
restricted_df
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.