#' Calculates ages based on birthdates and test/MRI dates
#'
#' \code{mutate_ages} Creates a dataframe where age and subject timepoint
#' have been added to the data. Both MRI and Test age are calculated,
#' and 'Age' is the mean between these two. If only one of MRI or Test
#' age are present, this one is used for Age.
#'
#' @param data The MOAS or a MOAS generated file.
#'
#' @return a data frame with added/replaced age and timepoint variables.
#' @examples
#' \dontrun{
#' mutate_ages(data)
#'}
#'
#' @importFrom dplyr select select_ group_by ungroup summarise
#' @importFrom dplyr rename filter mutate lag first left_join
#' @importFrom dplyr arrange as_tibble
#' @importFrom magrittr "%>%"
#'
#' @export
mutate_ages = function(data) {
data2 <- mutate_dates(data)
# Get ages in numeric years adjusting for leap years
data2 = data2 %>%
mutate(MRI_Age = ifelse(!is.na(MRI_Date),
as.numeric(MRI_Date-Birth_Date)/365.25,
NA),
Test_Age = ifelse(!is.na(Test_Date),
as.numeric(Test_Date-Birth_Date)/365.25,
NA),
Age = ifelse((is.na(MRI_Age) & is.na(Test_Age)),
Age,
as.numeric(Date - Birth_Date)/365.25)) %>%
dplyr::mutate(Age = ifelse(is.na(Age),
as.numeric(Date - Birth_Date)/365.25,
Age))
data4 %>%
dplyr::arrange(CrossProject_ID, Age) %>%
dplyr::select(-Date) %>%
dplyr::as_tibble()
}
mutate_intervals <- function(data){
data2 <- data %>%
dplyr::select(-dplyr::matches("Interval_LastVisit|Interval_FirstVisit|Subject_Timepoint"))
## Workaround for Novel biomarkers round 4 that is messing up things.
novelBM <- data2 %>%
dplyr::filter(Project_Number == 90) %>%
dplyr::select(CrossProject_ID, Project_Number, Project_Wave) %>%
dplyr::distinct() %>%
dplyr::group_by(CrossProject_ID) %>%
dplyr::mutate(Subject_Timepoint = dplyr::row_number()) %>%
dplyr::ungroup() %>%
dplyr::full_join(data2,by = c("CrossProject_ID", "Project_Number", "Project_Wave")) %>%
dplyr::filter(Project_Number == 90)
data3 = data2 %>%
dplyr::filter(Project_Number != 90)
# Calculate the intervals
data3 %>%
dplyr::bind_rows(novelBM) %>%
dplyr::group_by(CrossProject_ID) %>%
dplyr::mutate(lagAge = dplyr::lag(Age)) %>%
dplyr::mutate(
Interval_FirstVisit = Age-min(Age),
Interval_LastVisit = ifelse(is.na(lagAge), 0, Age-lagAge),
tt = dplyr::lag(ifelse(Interval_FirstVisit == dplyr::lead(Interval_FirstVisit), T, F)),
Interval_LastVisit = ifelse(tt == TRUE, dplyr::lag(Interval_LastVisit), Interval_LastVisit),
Interval_LastVisit = ifelse(is.na(tt),0, Interval_LastVisit),
Interval_MRI_Test = ifelse(!is.na(Test_Date) & !is.na(MRI_Date),
Test_Date-MRI_Date, NA)
)
}
mutate_tp <- function(data){
nec <- c("CrossProject_ID", "Project_Wave", "Project_Name")
nec <- nec[! nec %in% names(data)]
if(length(nec) > 0)
stop(paste0("Necessary columns are missing from the data: ",
paste(nec, collase=", ")), call.=FALSE)
# Calculate subject timepoint
# There might be double/triple scanned
# so standerd row_number might not be enough
data %>%
dplyr::group_by(CrossProject_ID, Project_Wave, Project_Name) %>%
dplyr::mutate(tt = dplyr::row_number(),
tt = ifelse(tt==1, 1, 0)) %>%
dplyr::group_by(CrossProject_ID) %>%
dplyr::mutate(Subject_Timepoint = cumsum(tt)) %>%
dplyr::select(-tt)
}
#' Calculate or infer the mean data for
#' data collected
#'
#' The data in LCBC are usually collected in
#' two sets each time, one testing and one MRI.
#' These two are usually seperated by some days
#' to a month, but some times even more.
#' We also miss some dates of testing for some
#' participants. This causes some difficulties
#' when calculating ages etc.
#'
#' This function will create a new column named code[date],
#' which will contain wither the mean date between
#' MRI and test, given we have dates on both, or either
#' one of test or MRI date we have. If both are missing,
#' the function will add in the mean date for the specific
#' Project and Wave of collection, in order to properly
#' order the data in most likely chronological order.
#'
#'
#' @param data MOAS-like data
#'
#' @return data frame
#' @export
mutate_mean_date <- function(data){
nec <- c("CrossProject_ID", "MRI_Date", "Test_Date", "Project_Wave", "Project_Name")
nec <- nec[! nec %in% names(data)]
if(length(nec) > 0)
stop(paste0("Necessary columns are missing from the data: ",
paste(nec, collase=", ")), call.=FALSE)
# Create a single Date columns. with test-Date if MRI-date is missing
data2 = data %>%
dplyr::mutate(gg = dplyr::row_number()) %>%
dplyr::group_by(gg) %>%
dplyr::mutate(Date = dplyr::case_when(
!is.na(MRI_Date) & !is.na(Test_Date) ~ mean_date(MRI_Date, Test_Date),
!is.na(MRI_Date) & is.na(Test_Date) ~ MRI_Date,
is.na(MRI_Date) & !is.na(Test_Date) ~ Test_Date,
)
) %>%
dplyr::ungroup() %>%
dplyr::select(-gg)
# Calculate mean dates for all projects and project waves. For those participants we are missing Dates, so we may sort the
# data chronologically This is a necessary workaround because of participants having participated across projects, and
# subject timepoints need to be chronological, not by project.
DATES = data2 %>%
dplyr::group_by(Project_Name, Project_Wave) %>%
dplyr::summarise(Dates = mean.Date(Date, na.rm = TRUE) )
# Replace NA-dates in the data with the mean date for that project
for (i in which(is.na(data2$Date))) {
idx = which(DATES$Project_Name %in% data2$Project_Name[i] &
DATES$Project_Wave %in% data2$Project_Wave[i])
data2$Date[i] = DATES$Dates[idx]
}
# Sort the data according to ID then Date
data2 %>%
dplyr::filter(!is.na(CrossProject_ID)) %>%
dplyr::arrange(CrossProject_ID, Date)
}
mean_date <- function(date1, date2){
dts <- cbind(date1,date2)
diff <- rowMeans(dts, na.rm=TRUE)
as.Date(diff, origin = "1970-01-01")
}
## quiets concerns of R CMD check
if(getRversion() >= "2.15.1"){
utils::globalVariables(c("MRI_Date",
"Test_Date",
"Project_Name",
"Project_Wave",
"Date",
"CrossProject_ID",
"Birth_Date",
"MRI_Age",
"Test_Age",
"Age",
"Project_Number",
"full_join",
"bind_rows",
"lagAge",
"Interval_FirstVisit",
"lead",
"tt",
"Interval_LastVisit",
"row_number",
"Subject_Timepoint",
"mutate_dates", "data2",
"data4", "gg"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.