R/utils_model_design.R

Defines functions add_response add_independent_unit add_effect_counts get_protein_matrix add_protein_design get_random_design get_fixed_design get_empty_design get_full_design get_design_matrices get_candidate_formula parse_formula

Documented in add_effect_counts add_independent_unit add_protein_design add_response get_candidate_formula get_design_matrices get_empty_design get_fixed_design get_full_design get_protein_matrix get_random_design parse_formula

#' Get design matrices and response from formula and input data
#' @param model_formula formula
#' @param model_data data.table
#' @return list
#' @keywords internal
parse_formula = function(model_formula, model_data) {
    formula_chr = as.character(model_formula)
    has_random = any(grepl("|", model_formula, fixed = TRUE))
    has_fixed = !all(grepl("|", setdiff(attr(terms(model_formula),
                                             "term.labels"), c("..", "log(..)")), fixed = TRUE))
    has_fixed = has_fixed | attr(terms(model_formula), "intercept") == 1
    candidate_formula = get_candidate_formula(formula_chr)
    design_matrices = get_design_matrices(candidate_formula, has_random, has_fixed, model_data)
    design_matrices
}


#' Get updated formula for use in lmer
#' @param formula_chr as.character(formula)
#' @keywords internal
get_candidate_formula = function(formula_chr) {
    if (grepl("log", formula_chr[3])) {
        formula_chr[3] = gsub("+ log(..) ", "", formula_chr[3], fixed = TRUE)
        if (grepl("log", formula_chr[3])) {
            if (!grepl("-1", formula_chr[3]) | grepl("0", formula_chr[3])) {
                formula_chr[3] = gsub("log(..)", "1", formula_chr[3], fixed = TRUE)
            } else {
                formula_chr[3] = gsub("log(..)", "", formula_chr[3], fixed = TRUE)
            }
        }
    } else {
        formula_chr[3] = gsub("+ .. ", "", formula_chr[3], fixed = TRUE)
        if (grepl("..", formula_chr[3])) {
            if (!grepl("-1", formula_chr[3]) | grepl("0", formula_chr[3])) {
                formula_chr[3] = gsub("..", "1", formula_chr[3], fixed = TRUE)
            } else {
                formula_chr[3] = gsub("..", "", formula_chr[3], fixed = TRUE)
            }
        }
    }
    formula_chr = paste(formula_chr[c(2, 1, 3)], collapse = " ")
    as.formula(formula_chr)
}


#' Get design matrices
#' @param candidate_formula formula
#' @param has_random if TRUE, there are random effects
#' @param has_fixed, if TRUE, there are fixed effects
#' @param model_data data.table
#' @keywords internal
get_design_matrices = function(candidate_formula, has_random,
                               has_fixed, model_data) {
    if (has_random & has_fixed) {
        design_matrices = get_full_design(candidate_formula, model_data)
    } else if (!has_random & !has_fixed) {
        design_matrix = get_empty_design(model_data)
    } else if (!has_random & has_fixed) {
        design_matrices = get_fixed_design(candidate_formula, model_data)
    } else {
        design_matrices = get_random_design(candidate_formula, model_data)
    }
    design_matrices = add_protein_design(design_matrices, model_data)
    design_matrices = add_effect_counts(design_matrices, candidate_formula,
                                        model_data)
    design_matrices = add_independent_unit(design_matrices, model_data)
    design_matrices = add_response(design_matrices, candidate_formula,
                                   model_data)
    design_matrices
}


#' Design matrices for full model
#' @inheritParams get_design_matrices
#' @keywords internal
get_full_design = function(candidate_formula, model_data) {
    lmer_model = lme4::lmer(candidate_formula, data = model_data)
    fixed_design = lme4::getME(lmer_model, "X")
    random_design = lme4::getME(lmer_model, "Z")
    list(fixed = fixed_design,
         random = as.matrix(random_design))
}


#' Design matrices for model with just proteins
#' @inheritParams get_design_matrices
#' @keywords internal
get_empty_design = function(model_data) {
    list(fixed = NULL,
         random = NULL)
}

#' Get design matrices for model with no random effects
#' @inheritParams get_design_matrices
#' @keywords internal
get_fixed_design = function(candidate_formula, model_data) {
    fixed_design = model.matrix(candidate_formula, data = model_data)
    list(fixed = fixed_design,
         random = NULL)
}


#' Get design matrices for model with no fixed effects
#' @inheritParams get_design_matrices
#' @keywords internal
get_random_design = function(candidate_formula, model_data) {
    lmer_model = lme4::lmer(candidate_formula, data = model_data)
    random_design = lme4::getME(lmer_model, "Z")
    list(fixed = NULL,
         random = as.matrix(random_design))
}


#' Add design matrix for proteins
#' @param design_matrices list of design matrices
#' @inheritParams get_design_matrices
#' @keywords internal
add_protein_design = function(design_matrices, model_data) {
    protein_design = get_protein_matrix(model_data)
    design_matrices[["protein"]] = protein_design
    design_matrices
}


#' Create design matrix for proteins
#' @inheritParams get_design_matrices
#' @keywords internal
get_protein_matrix = function(model_data) {
    protein_cols = setdiff(colnames(model_data),
                           c("intensity", "y", "isoDistr", "iso_prob",
                             "precursor_scan", "charge_in_scan", "charge",
                             "rep", "sequence", "iso_id", "sequence_in_rep"))
    protein_formula = paste("intensity ~ -1 +",
                            paste(protein_cols, collapse = " + "))
    model.matrix(as.formula(protein_formula), data = model_data)
}


#' Add counts of levels of random effects to list of design matrices
#' @inheritParams add_protein_design
#' @inheritParams get_design_matrices
#' @import data.table
#' @keywords internal
add_effect_counts = function(design_matrices, candidate_formula, model_data) {
    if (!is.null(design_matrices[["random"]])) {
        column_names = colnames(design_matrices$random)
        all_effects = strsplit(as.character(candidate_formula)[3],
                               " + ", fixed = TRUE)[[1]]
        random_effects = all_effects[grepl("|", all_effects, fixed = TRUE)]
        random_effects = gsub("[\\(\\)1 \\|]", "", random_effects)
        random_effects


        random_effects_order = sapply(random_effects, function(x) {
            min(which(column_names %in% unique(model_data[[x]])))
        })
        random_effects = random_effects[order(random_effects_order)]
        random_effects_counts = model_data[, lapply(.SD, data.table::uniqueN),
                                           .SDcols = random_effects]
        random_effects_counts = unlist(random_effects_counts, use.names = TRUE)
    } else {
        random_effects_counts = integer()
    }
    design_matrices[["counts"]] = random_effects_counts
    design_matrices
}


#' Add vector with independent unit to list of design matrices
#' @inheritParams add_protein_design
#' @keywords internal
add_independent_unit = function(design_matrices, model_data) {
    # TODO: make it general?
    if (is.null(design_matrices[["random"]])) {
        unit = character(0)
    } else {
        random_effects_names = names(design_matrices[["counts"]])
        counts = sapply(model_data[, random_effects_names, with = FALSE],
                        data.table::uniqueN)
        independent_unit = random_effects_names[which.min(counts)]
        unit = as.character(model_data[[independent_unit]])
    }
    design_matrices[["independent_unit"]] = unit
    design_matrices
}


#' Add response to list of design matrices
#' @inheritParams add_protein_design
#' @param model_formula formula
#' @keywords internal
add_response = function(design_matrices, model_formula, model_data) {
    response_chr = as.character(model_formula)[2]
    response = eval(str2expression(response_chr), envir = model_data)
    design_matrices[["response"]] = response
    design_matrices
}
mstaniak/SharedPeptidesIso documentation built on Dec. 21, 2021, 10:11 p.m.