#' Runs CAPE
#'
#' This function takes in a data object and genotype object that
#' have been formatted for cape, as well as a string identifying
#' a parameter file. It runs cape on the data using the parameters
#' specified in the file.
#'
#' This function assumes you already have all required libraries
#' and functions loaded.
#'
#' @param pheno_obj the cape object holding the phenotype data returned by
#' @param geno_obj the genotype object
#' @param results_file the name of the saved data_obj RDS file. The base
#' name is used as the base name for all saved RDS files.
#' @param p_or_q A threshold indicating the maximum adjusted p value
#' considered significant. Or, if FDR p value correction is used, the
#' the maximum q value considered significant.
#' @param n_cores The number of CPUs to use if run_parallel is set to TRUE
#' @param initialize_only, If TRUE, cape will not be run. Instead an initialized
#' data object will be returned. This data object will contain normalized and mean-centered
#' trait values, and eigentraits, and will have covariates specified. However, the
#' singlescan, pairscan, etc. will not be run.
#' @param verbose Whether progress should be printed to the screen
#' @param run_parallel boolean, if TRUE runs certain parts of the code as parallel blocks
#' @param param_file yaml full path to the parameter file
#' @param yaml_params yaml string containing the parameters. Either the param_file or
#' yaml_params can be null.
#' @param results_path path that results should be written to.
#' @param plot_pdf boolean, TRUE by default. If FALSE no pdf will be generated by the analysis.
#'
#' @return This function invisibly returns the data object with all final
#' data included. In addition, data saved to the data_obj$results_path directory
#'
#' @importFrom utils read.table
#'
#' @examples
#' \dontrun{
#' final_data_obj <- run_cape(pheno_obj, geno_obj)
#' }
#'
#' @export
run_cape <- function(pheno_obj, geno_obj,
results_file = "cross.RDS", p_or_q = 0.05, n_cores = 4,
initialize_only = FALSE, verbose = TRUE, run_parallel = FALSE,
param_file = NULL, yaml_params = NULL, results_path = NULL, plot_pdf = TRUE){
# Instantiate the Cape R6 object
data_obj <- Cape$new(
parameter_file = param_file,
yaml_parameters = yaml_params,
results_path = results_path,
pheno = pheno_obj$pheno,
chromosome = pheno_obj$chromosome,
marker_num = pheno_obj$marker_num,
marker_location = pheno_obj$marker_location,
geno_names = pheno_obj$geno_names,
geno = geno_obj,
plot_pdf = plot_pdf
)
results_base_name <- gsub(".RDS", "", results_file)
# since this is the main data_obj, we can't allow it to return FALSE,
#check for the file first
prior_data_obj <- data_obj$read_rds(results_file)
if (isFALSE(prior_data_obj)) {
data_obj <- compare_markers(data_obj, geno_obj)
} else {
# things can get pretty confusing if these values don't match between
#the parameter file and the old data_obj
prior_data_obj$save_results <- data_obj$save_results
prior_data_obj$use_saved_results <- data_obj$use_saved_results
data_obj <- prior_data_obj
}
#===============================================================
# figure out how to synchronize get_eigentraits and
# the calculation of the kinship matrix. They both may
# remove individuals. Maybe trim the kinship object to match
# the eigentraits
#===============================================================
#if we want to use a kinship correction
if(as.logical(data_obj$use_kinship)){
kin_file_name <- paste0(results_base_name, "_kinship.RDS")
kin_obj <- data_obj$read_rds(kin_file_name)
if(isFALSE(kin_obj)){
#if there isn't a kinship object already, we need to make one
kin_obj <- kinship(data_obj, geno_obj, type = "overall",
pop = data_obj$pop)
data_obj$save_rds(kin_obj, kin_file_name)
}
#===============================================================
# We need a complete genotype matrix to calculate the kinship
# adjusted genotypes later on.
# Check for missing values in the genotype matrix.
# If there are missing values, impute them.
# Write out the imputed matrix, or read this in if it already
# exists.
#===============================================================
#we need to impute the missing values
imp_data_file <- paste0(results_base_name, "_data_imputed.RDS")
imp_geno_file <- paste0(results_base_name, "_geno_imputed.RDS")
# check if there is already a saved genotype object
geno <- data_obj$read_rds(imp_geno_file)
if (isFALSE(geno)) { #if the imputation hasn't been done already
geno <- get_geno(data_obj, geno_obj)
missing_vals <- which(is.na(geno))
if (length(missing_vals) > 0) { #if there are missing values, impute them
message("There are missing values in geno_obj. Running impute_missing_geno...\n")
geno_imp <- impute_missing_geno(data_obj, geno_obj = geno_obj, k = 10,
ind_missing_thresh = 0, marker_missing_thresh = 0, prioritize = "fewer",
max_region_size = NULL, min_region_size = NULL, run_parallel = run_parallel,
verbose = verbose, n_cores = n_cores)
# update and save the data_obj
data_obj <- geno_imp$data_obj
data_obj$save_rds(data_obj, imp_data_file)
# update and save the geno_obj
geno_obj <- geno_imp$geno_obj
data_obj$save_rds(geno_obj, imp_geno_file)
# recalculate the kinship matrix with the updated objects
#kin_obj <- kinship(data_obj, geno_obj, type = "overall", pop = data_obj$pop)
#data_obj$save_rds(kin_obj, kin_file_name)
} #end case for when there are missing values but no imputed genotypes
} else { #if the imputation has been done, then it must have been done for the data_obj too
#data_obj <- geno_imp$data_obj
geno_obj <- geno
}
}
if(verbose){cat("Removing unused markers...\n")}
combined_data_obj <- delete_underscore(data_obj, geno_obj)
data_obj <- combined_data_obj$data_obj
geno_obj <- combined_data_obj$geno_obj
data_obj <- remove_unused_markers(data_obj, geno_obj, verbose = verbose)
#because the genotype object can be changed by the above step,
#save the final version. (or change the above step so it doesn't
final_geno_file <- paste0(results_base_name, "_geno.RDS")
data_obj$save_rds(geno_obj, final_geno_file)
#str(data_obj$geno_names)
#str(dimnames(geno_obj))
if(!is.null(data_obj$covariates)){
if(verbose){cat("Running pheno2covar...\n")}
data_obj <- pheno2covar(data_obj, data_obj$covariates)
}
if(!is.null(data_obj$marker_covariates)){
if(verbose){cat("Running marker2covar...\n")}
data_obj <- marker2covar(data_obj, geno_obj, markers = data_obj$marker_covariates)
}
data_obj <- select_pheno(data_obj, pheno_which = data_obj$traits)
if(length(grep("eig", data_obj$scan_what, ignore.case = TRUE)) > 0){
data_obj <- get_eigentraits(
data_obj,
scale_pheno = as.logical(data_obj$traits_scaled),
normalize_pheno = as.logical(data_obj$traits_normalized)
)
if(data_obj$save_results){
if (data_obj$plot_pdf) {
if(verbose){cat("Plotting svd.pdf...\n")}
data_obj$plotSVD("svd.pdf")
}
if(verbose){cat("Plotting svd.jpg...\n")}
data_obj$plotSVD("svd.jpg")
}
# TODO update select_eigentraits
if(verbose){cat("Selecting eigentraits...\n")}
data_obj <- select_eigentraits(data_obj, traits_which = data_obj$eig_which)
}
if(verbose){cat("Saving data_obj into rds file...\n")}
data_obj$save_rds(data_obj, results_file)
if(initialize_only){
return(data_obj)
}
#===============================================================
# run singlescan
#===============================================================
if(verbose){cat("Running Singlescan...\n")}
singlescan_results_file <- paste0(results_base_name, "_singlescan.RDS")
singlescan_obj <- data_obj$read_rds(singlescan_results_file)
if(!data_obj$use_kinship) {
kin_obj <- NULL
}
if (isFALSE(singlescan_obj)) {
singlescan_obj <- singlescan(
data_obj, geno_obj, kin_obj = kin_obj, n_perm = data_obj$singlescan_perm,
alpha = data_obj$alpha, verbose = verbose, run_parallel = run_parallel,
n_cores = n_cores, model_family = "gaussian", overwrite_alert = FALSE
)
data_obj$save_rds(singlescan_obj, singlescan_results_file)
if(data_obj$save_results){
if(verbose){cat("Plotting singlescan standardized results...\n")}
for(ph in 1:ncol(singlescan_obj$singlescan_effects)){
filename <- paste0("Singlescan_", colnames(singlescan_obj$singlescan_effects)[ph], "_Standardized.jpg")
data_obj$plotSinglescan(filename, singlescan_obj, width = 20, height = 6,
units = "in", res = 300, standardized = TRUE, allele_labels = NULL,
alpha = data_obj$alpha, include_covars = TRUE, line_type = "l", pch = 16, cex = 0.5,
lwd = 3, traits = colnames(singlescan_obj$singlescan_effects)[ph])
}
}
if(data_obj$save_results){
if(verbose){cat("Plotting singlescan effects results...\n")}
for(ph in 1:ncol(singlescan_obj$singlescan_effects)){
filename <- paste0("Singlescan_", colnames(singlescan_obj$singlescan_effects)[ph], "_Effects.jpg")
data_obj$plotSinglescan(filename, singlescan_obj, width = 20, height = 6, units = "in", res = 300,
standardized = FALSE, allele_labels = NULL, alpha = data_obj$alpha, include_covars = TRUE,
line_type = "l", pch = 16, cex = 0.5, lwd = 3, traits = colnames(singlescan_obj$singlescan_effects)[ph])
}
}
}
#===============================================================
# run pairscan
#===============================================================
if(verbose){cat("Running pairscan...\n")}
pairscan_file <- paste0(results_base_name, "_pairscan.RDS")
pairscan_obj <- data_obj$read_rds(pairscan_file)
if (isFALSE(pairscan_obj) | is.null(data_obj$geno_for_pairscan)) {
marker_selection_method <- data_obj$marker_selection_method
num_alleles_in_pairscan <- data_obj$num_alleles_in_pairscan
peak_density <- data_obj$peak_density
max_pair_cor <- data_obj$max_pair_cor
min_per_genotype <- data_obj$min_per_genotype
pairscan_null_size <- data_obj$pairscan_null_size
scan_what <- data_obj$scan_what
if(marker_selection_method == "top_effects"){
data_obj <- select_markers_for_pairscan(data_obj, singlescan_obj, geno_obj,
num_alleles = num_alleles_in_pairscan, peak_density = peak_density,
verbose = verbose, plot_peaks = FALSE)
}
if(marker_selection_method == "from_list"){
if(is.null(data_obj$snp_file)){stop("Please specify a marker list in the parameter file.\n")}
snp_file <- file.path(results_path, data_obj$snp_file)
if(!file.exists(snp_file)){stop("Can't fine the specified marker list.\n")}
specific_markers <- as.matrix(read.table(snp_file, sep = "\t", stringsAsFactors = FALSE))
data_obj <- select_markers_for_pairscan(data_obj, singlescan_obj, geno_obj,
specific_markers = specific_markers[,1], verbose = verbose, plot_peaks = FALSE)
}
# if(marker_selection_method == "by_gene"){
# gene_list_mat <- read.table("gene_list.txt", sep = "\t", stringsAsFactors = FALSE)
# gene_list <- gene_list_mat[,1]
# data_obj <- select_markers_for_pairscan_by_gene(data_obj, geno_obj, gene_list = gene_list,
# bp_buffer = data_obj$bp_buffer, organism = data_obj$organism)
# } else {
# gene_list <- NULL
# }
data_obj$save_rds(data_obj, results_file)
pairscan_obj <- pairscan(data_obj, geno_obj, scan_what = scan_what,
pairscan_null_size = pairscan_null_size, min_per_genotype = min_per_genotype,
max_pair_cor = max_pair_cor, verbose = verbose, num_pairs_limit = Inf,
overwrite_alert = FALSE, run_parallel = run_parallel, n_cores = n_cores,
kin_obj = kin_obj)
data_obj$save_rds(pairscan_obj, pairscan_file)
if(data_obj$save_results){
if (data_obj$plot_pdf) {
if(verbose){cat("Plotting pairscan_regression.pdf...\n")}
data_obj$plotPairscan("Pairscan_Regression.pdf", pairscan_obj,
phenotype = NULL, show_marker_labels = TRUE, show_alleles = FALSE)
}
if(verbose){cat("Plotting pairscan_regression.jpg...\n")}
data_obj$plotPairscan("Pairscan_Regression.jpg", pairscan_obj,
phenotype = NULL, show_marker_labels = TRUE, show_alleles = FALSE)
}
data_obj$save_rds(data_obj, results_file)
}
#===============================================================
# run reparameterization
#===============================================================
if(verbose){cat("Running reparameterization...\n")}
if(!data_obj$use_saved_results || is.null(data_obj$var_to_var_influences)){
data_obj <- error_prop(data_obj, pairscan_obj, perm = FALSE, verbose = verbose,
n_cores = n_cores, run_parallel = run_parallel)
data_obj$save_rds(data_obj, results_file)
}
if(!data_obj$use_saved_results || is.null(data_obj$var_to_var_influences_perm)){
data_obj <- error_prop(data_obj, pairscan_obj, perm = TRUE, verbose = verbose,
n_cores = n_cores, run_parallel = run_parallel)
data_obj$save_rds(data_obj, results_file)
}
if(!data_obj$use_saved_results || is.null(data_obj$var_to_var_p_val)){
data_obj <- calc_p(data_obj, pval_correction = data_obj$pval_correction)
}
#if(length(grep("e", data_obj$scan_what, ignore_case = TRUE)) > 0){
# transform_to_phenospace <- TRUE
#}else{
# transform_to_phenospace <- FALSE
#}
if(!data_obj$use_saved_results || is.null(data_obj$max_var_to_pheno_influence)){
data_obj <- direct_influence(data_obj, pairscan_obj,
transform_to_phenospace = data_obj$transform_to_phenospace, verbose = verbose,
pval_correction = data_obj$pval_correction, save_permutations = TRUE,
n_cores = n_cores)
data_obj$save_rds(data_obj, results_file)
}
if(data_obj$save_results){
if(verbose){cat("Saving Variant_influences results...\n")}
data_obj$writeVariantInfluences("Variant_Influences.csv", p_or_q = max(c(p_or_q, 0.2)))
if(verbose){cat("Saving Variant_Interactions results...\n")}
data_obj$writeVariantInfluences("Variant_Influences_Interactions.csv",
include_main_effects = FALSE, p_or_q = max(c(p_or_q, 0.2)))
if (data_obj$plot_pdf) {
if(verbose){cat("Plotting Variant_influences.pdf...\n")}
data_obj$plotVariantInfluences("variant_influences.pdf", width = 10, height = 7,
p_or_q = p_or_q, standardize = FALSE, not_tested_col = "lightgray",
covar_width = NULL, pheno_width = NULL)
}
if(verbose){cat("Plotting Variant_influences.jpg...\n")}
data_obj$plotVariantInfluences("variant_influences.jpg", width = 10, height = 7,
p_or_q = p_or_q, standardize = FALSE, not_tested_col = "lightgray",
covar_width = NULL, pheno_width = NULL)
}
if(!data_obj$use_saved_results || is.null(data_obj$full_net)){
data_obj <- get_network(data_obj, geno_obj, p_or_q = p_or_q,
collapse_linked_markers = FALSE)
}
if(!data_obj$use_saved_results || is.null(data_obj$collapsed_net)){
data_obj <- get_network(data_obj, geno_obj, p_or_q = p_or_q, threshold_power = 1,
collapse_linked_markers = TRUE, plot_linkage_blocks = FALSE)
}
data_obj$save_rds(data_obj, results_file)
if(data_obj$save_results){
if (data_obj$plot_pdf) {
if(verbose){cat("Plotting Network_Circular.pdf...\n")}
data_obj$plotNetwork("Network_Circular.pdf", label_gap = 10, label_cex = 1.5, show_alleles = FALSE)
}
if(verbose){cat("Plotting Network_Circular.jpg...\n")}
data_obj$plotNetwork("Network_Circular.jpg", label_gap = 10, label_cex = 1.5, show_alleles = FALSE)
if(dim(geno_obj)[2] == 8){
if (data_obj$plot_pdf) {
if(verbose){cat("Plotting Network_Circular_DO.pdf...\n")}
data_obj$plotNetwork("Network_Circular_DO.pdf", label_gap = 10, label_cex = 1.5, show_alleles = TRUE)
}
if(verbose){cat("Plotting Network_Circular_DO.jpg...\n")}
data_obj$plotNetwork("Network_Circular_DO.jpg", label_gap = 10, label_cex = 1.5, show_alleles = TRUE)
}
if (data_obj$plot_pdf) {
data_obj$plotFullNetwork("Network_View.pdf", zoom = 1.2, node_radius = 0.3,
label_nodes = TRUE, label_offset = 0.4, label_cex = 0.5, bg_col = "lightgray",
arrow_length = 0.1, layout_matrix = "layout_with_kk", legend_position = "topright",
edge_lwd = 1, legend_radius = 2, legend_cex = 0.7, xshift = -1)
}
data_obj$plotFullNetwork("Network_View.jpg", zoom = 1.2, node_radius = 0.3,
label_nodes = TRUE, label_offset = 0.4, label_cex = 0.5, bg_col = "lightgray",
arrow_length = 0.1, layout_matrix = "layout_with_kk", legend_position = "topright",
edge_lwd = 1, legend_radius = 2, legend_cex = 0.7, xshift = -1)
}
data_obj$save_rds(data_obj, results_file)
invisible(data_obj)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.