#| include: false knitr::opts_chunk$set(fig.path = "man/figures/README-")
midfieldr is an R package that provides tools and methods for studying undergraduate student-level records from the MIDFIELD database.
#| echo: false #| out.width: "15%" knitr::include_graphics("man/figures/logo.png")
midfieldr provides these functions for manipulating student-level data:
add_completion_status()
Determine completion status for every studentadd_data_sufficiency()
Determine data sufficiency for every studentadd_timely_term()
Calculate a timely completion term for every studentfilter_cip()
Filter CIP data to match search stringsprep_fye_mice()
Prepare FYE data for multiple imputationselect_required()
Select required midfieldr variablesAdditional functions for processing intermediate results:
order_multiway()
Order categorical variables of multiway dataR packages in examples and vignettes
In this example, we gather all students ever enrolled in Engineering and summarize their graduation status (in any major), grouping by race/ethnicity and sex. If you are writing your own script to follow along, we use these packages in this example:
library(midfieldr) library(midfielddata) library(data.table)
Load the practice data. Reduce initial dimensions of data tables.
# Load the practice data data(student, term, degree) # Reduce dimensions of source data tables student <- select_required(student) term <- select_required(term) degree <- select_required(degree) # View example result term
Filter for data sufficiency.
# Initialize the working data frame DT <- term[, .(mcid, cip6)] # Filter observations for data sufficiency DT <- add_timely_term(DT, term) DT <- add_data_sufficiency(DT, term) DT <- DT[data_sufficiency == "include"] DT
Filter for degree-seeking students ever enrolled in Engineering.
# Inner join to filter observations for degree-seeking cols_we_want <- student[, .(mcid)] DT <- cols_we_want[DT, on = c("mcid"), nomatch = NULL] # Filter observations for engineering programs DT <- DT[cip6 %like% "^14"] # Filter observations for unique students (first instance) DT <- DT[, .SD[1], by = c("mcid")] DT
Determine completion status.
# Add completion status variable DT <- add_completion_status(DT, degree) DT
Aggregate observations by groupings.
# Left join to add race/ethnicity and sex variables (omit unknowns) cols_we_want <- student[, .(mcid, race, sex)] DT <- student[DT, on = c("mcid")] DT <- DT[!(race %ilike% "unknown" | sex %ilike% "unknown")] # Create a variable combining race/ethnicity and sex DT[, people := paste(race, sex)] # Aggregate observations by groupings DT_display <- DT[, .N, by = c("completion_status", "people")] setorderv(DT_display, c("completion_status", "people")) DT_display
Reshape and display results.
# Transform to row-record form DT_display <- dcast(DT_display, people ~ completion_status, value.var = "N", fill = 0) # Prepare the table for display setcolorder(DT_display, c("people", "timely", "late")) setkeyv(DT_display, c("people")) setnames(DT_display, old = c("people", "timely", "late", "NA"), new = c("People", "Timely completion", "Late completion", "Did not complete") )
#| echo: false library(gt) DT_display |> gt() |> tab_caption("Table 1: Completion status of engineering undergraduates in the practice data") |> tab_options(table.font.size = "small") |> opt_stylize(style = 1, color = "gray") |> tab_style( style = list(cell_fill(color = "#c7eae5")), locations = cells_column_labels(columns = everything()) )
"Timely completion" is the count of graduates completing their programs in no more than 6 years; "Late completion" is the count of those graduating in more than 6 years; "Did not complete" is the count of non-graduates.
Install from CRAN with:
#| eval: false install.packages("midfieldr")
Install latest development version from GitHub with:
#| eval: false install.packages("pak") pak::pkg_install("MIDFIELDR/midfieldr")
midfieldr interacts with practice data provided in the midfielddata data package. midfielddata is too large for CRAN so we deploy it to a drat package repository. Install midfielddata using:
#| eval: false install.packages("midfielddata", repos = "https://MIDFIELDR.github.io/drat/", type = "source" )
The installed size of midfielddata is about 24 Mb, so the installation takes some time.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.