# Add LENA information to the database
library("dplyr")
library("L2TDatabase")
library("stringr")
source("inst/paths.R")
# Find all csvs in folder that contains the TP1 LENA csv
csvs <- list.files(
dirname(paths$tp1_lenas),
pattern = "_hourly.csv",
full.names = TRUE)
load_lena_csv <- . %>% readr::read_csv() %>% rename(ShortResearchID = Subject)
lena_csvs <- csvs %>% lapply(load_lena_csv)
lena_csvs
# Download/backup db beforehand
cnf_file <- file.path(getwd(), "inst/l2t_db.cnf")
l2t <- l2t_connect(cnf_file, "backend")
l2t_dl <- l2t_backup(l2t, "inst/backup")
# Combine child-study-childstudy tbls
df_cds <- tbl(l2t, "ChildStudy") %>%
left_join(tbl(l2t, "Study")) %>%
left_join(tbl(l2t, "Child")) %>%
select(ShortResearchID, Study, ChildStudyID, Birthdate) %>%
collect()
# Calculate chronological ages, default to NA if error encountered
chr_age <- failwith(NA, chrono_age)
df_lena_hours <- lena_csvs %>%
bind_rows() %>%
left_join(df_cds) %>%
rename(LENA_Date = LENADate) %>%
mutate(LENA_Age = unlist(Map(chr_age, LENA_Date, Birthdate)))
df_lena_hours %>% filter(is.na(ChildStudyID))
df_lena_hours %>% filter(is.na(LENA_Age))
# Keep just the columns in the LENA_Admin table. Since there are several hours
# per admin, use `distinct` to get one row per admin
df_remote_admins <- l2t_dl$LENA_Admin %>% readr::type_convert()
df_local_admins <- df_lena_hours %>%
match_columns(df_remote_admins) %>%
distinct()
n_distinct(df_local_admins$ChildStudyID)
nrow(df_local_admins)
# The remote table has a Notes column, whereas the local copy doesn't.
df_remote_admins
df_local_admins
# Remove duplicated data.
df_local_admins_to_add <- find_new_rows_in_table(
data = df_local_admins,
ref_data = df_remote_admins,
required_cols = "ChildStudyID")
df_local_admins_to_add
# Update db
append_rows_to_table(l2t, "LENA_Admin", df_local_admins_to_add)
## Find records that need to be updated
# Redownload the table
df_remote_admins <- tbl(l2t, "LENA_Admin") %>%
collect()
df_remote_admins %>%
count(ChildStudyID) %>%
filter(n != 1)
# Attach the database keys to latest data
df_remote_admins_indices <- df_remote_admins %>%
select(ChildStudyID, LENAID)
df_local <- df_local_admins %>%
inner_join(df_remote_admins_indices) %>%
arrange(LENAID) %>%
mutate(LENA_Date = format(LENA_Date))
# Keep just the columns in the latest data
df_remote_admins <- match_columns(df_remote_admins, df_local) %>%
filter(ChildStudyID %in% df_local$ChildStudyID) %>%
arrange(LENAID)
# Preview changes with daff
library("daff")
daff <- diff_data(df_remote_admins, df_local, context = 0)
stamp <- format(Sys.time(), "%Y-%m-%d_%H-%M")
render_diff(daff)
anti_join(df_local, df_remote_admins)
anti_join(df_remote_admins, df_local)
# Here we would update the remote table with any changes if needed
# Or see them itemized in a long data-frame
create_diff_table(df_local, df_remote_admins, "LENAID")
overwrite_rows_in_table(l2t, "LENA_Admin", rows = df_local, preview = TRUE)
# overwrite_rows_in_table(l2t, "LENA_Admin", rows = df_local, preview = FALSE)
# Add local hourly data to remote admin table
df_remote_admins <- collect("LENA_Admin" %from% l2t) %>%
readr::type_convert()
df_with_hours <- df_remote_admins %>%
inner_join(df_lena_hours)
# Keep just the columns in the LENA_Hours tbl
df_remote_hours <- collect("LENA_Hours" %from% l2t)
df_with_hours <- match_columns(df_with_hours, df_remote_hours)
# Make sure the local Hour and the remote Hour columns have the same formatting
# for the anti_join to work. Assume either Y-M-D H:M or Y-M-D H:M:S, convert to
# date objects, convert back to Y-M-D H:M:S strings.
library("lubridate")
df_remote_hours$Hour <- df_remote_hours$Hour %>%
parse_date_time(orders = c("%Y%m%d %H%M%S", "%Y%m%d %H%M"), tz = "America/Chicago") %>%
format()
df_with_hours$Hour <- df_with_hours$Hour %>%
parse_date_time(orders = c("%Y%m%d %H%M%S", "%Y%m%d %H%M"), tz = "America/Chicago") %>%
format
# Remove duplicated rows
df_new_hours <- df_with_hours %>%
anti_join(df_remote_hours, by = c("LENAID", "Hour")) %>%
arrange(LENAID, Hour)
df_new_hours
# Update the remote table. An error here is a good thing if there are no new
# rows to add
append_rows_to_table(l2t, "LENA_Hours", df_new_hours)
## Find records that need to be updated
# Redownload the table
df_remote_hours <- tbl(l2t, "LENA_Hours") %>%
collect()
# Attach the database keys to latest data
df_remote_hours_indices <- df_remote_hours %>%
select(LENAHourID, LENAID, Hour)
df_local <- df_with_hours %>%
left_join(df_remote_hours_indices) %>%
arrange(LENAID, LENAHourID)
# Keep just the columns in the latest data
df_remote_hours <- match_columns(df_remote_hours, df_local) %>%
arrange(LENAID, LENAHourID)
# Preview changes with daff
daff <- diff_data(df_remote_hours, df_local, context = 0)
render_diff(daff)
anti_join(df_local, df_remote_hours)
anti_join(df_remote_hours, df_local)
# Here we would update the remote table with any changes if needed
# Or see them itemized in a long data-frame
create_diff_table(df_local, df_remote_hours, "LENAHourID")
overwrite_rows_in_table(l2t, "LENA_Hours", rows = df_local, preview = TRUE)
# overwrite_rows_in_table(l2t, "LENA_Admin", rows = df_local, preview = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.