## DO NOT MODIFY THIS BLOCK (unless you know what you're doing) library(knitr) library(rprojroot) opts_knit$set(root.dir = find_root(has_file(".Rprofile") | is_rstudio_project | is_r_package | is_git_root)) opts_chunk$set(echo = TRUE) opts_chunk$set(message = TRUE)
## LOAD PACKAGES HERE library(NMproject) library(dplyr)
Read THEOPP.csv from SourceData
d <- read.csv("SourceData/THEOPP.csv", na = ".") kable(head(d, 20))
Modify WT column so it's filled in for obs rows.
d <- d %>% group_by(ID) %>% mutate(WT = na.omit(WT))
Save the dataset into DerivedData
d %>% write_derived_data("DerivedData/data")
Additional step - create bootstrap datasets. Why do these here before we've even created the run? By creating them, we make it easy to run bootstraps (and other advanced techniques like bootstrap cross-validation) for model evaluation and model selection throughout the entirety of the model development. The same bootstrapped datasets will be used whenever we have bootstrap guaranteeing that our results will be comparible. Here we use the rsample
package (part of the tidyverse
) to create our bootstrapped datasets and store them bootsplit_data.csv.RData
. Latter scripts will use this to for bootstrapping and cross-validation.
## Define STRATA (no spaces) d <-d %>% mutate(IDGRP = ID <= 6, WT_C = cut(WT, breaks = 2, labels = FALSE), STRATA = paste(IDGRP, WT_C, sep = "_")) d_id <- d %>% distinct(ID, STRATA) set.seed(123) ## create large set of resamples (to enable simulation to grow without ruining seed) bootsplits <- rsample::bootstraps(d_id, 100, strata = "STRATA") dir.create("DerivedData", showWarnings = FALSE) bootsplits %>% saveRDS("DerivedData/bootsplit_data.csv.RData")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.