R/metadata.R

Defines functions .layout .resolve .processKeys .handleNonstandardLayout resolvePlateMeta checkMeta .datesFormat .safe2 .safe1 getPlateMeta metadata

Documented in checkMeta .datesFormat getPlateMeta .layout metadata .processKeys .resolve resolvePlateMeta .safe1 .safe2

#' metadata
#'
#' Downloads and processes a metadata, formated according to a standard
#' template, for a specific plate and path on Google Drive.
#'
#' @name metadata
#' @rdname metadata
#' @param plate Character; Corresponds to the plate to process metadata for.
#' Must correspond to a file name in the path argument.
#' @param path Character; The location of the plate metadata on Google Drive.
#' @return A tibble with the metadata specified in the file.
#' @author Jason Serviss
#' @author Martin Enge
#' @examples
#'
#' \dontrun{metadata("test1", 'Enge_lab/GFP_mouse/Annotation/package_testing')}
#'
NULL
#' @export

metadata <- function(
  plate,
  path = 'data/package_testing/',
  verbose = FALSE,
  local = FALSE,
  save.local = NULL
){
  meta <- getPlateMeta(plate=plate, path=path, verbose=verbose, safe=TRUE, local=local, save.local=save.local)
  checkMeta(meta)
  meta <- resolvePlateMeta(meta)
  return(meta)
}

#' getPlateMeta
#'
#' Downloads and reads the metadata into R.
#'
#' @name getPlateMeta
#' @keywords internal
#' @rdname getPlateMeta
#' @param plate Character; Corresponds to the plate to process metadata for.
#' Must correspond to a file name in the path argument.
#' @param path Character; The location of the plate metadata on Google Drive.
#' @param verbose Logical; Indicates if function should be verbose.
#' @param safe Logical; for internal use.
#' @param local Logical; for internal use. 
#' @return List; Length 3 list with the Wells, Columns, and Plate sheets,
#' respectivley.
#' @author Jason Serviss
#' @author Martin Enge
NULL
#' @importFrom googledrive drive_download
#' @importFrom purrr map
#' @importFrom readxl read_excel
#' @importFrom tidyr spread
#' @importFrom stringr str_detect
#' @importFrom rlang .data

getPlateMeta <- function(
  plate, path = 'data/package_testing/', verbose=TRUE, safe = TRUE, local = FALSE, save.local=NULL
  ){
  plate_id <- plate
  if(safe) {
      plate_id <- .safe1(plate, path)
  }
  #download plate data
  if(local) {
    p <- path
  } else {
    if(!is.null(save.local)) {
      p <- file.path(save.local, paste0(plate, ".xlsx"))
    } else {
      p <- file.path(tempdir(), paste0(plate, ".xlsx"))
    }
    drive_download(plate_id, path = p, overwrite = TRUE, verbose = verbose)
  }
  
  if(safe) .safe2(p)
  
  #read and extract data
  meta <- map(c("Wells", "Columns", "Plate"), function(s) {
    d <- read_excel(path = p, sheet = s)
    if(s == "Plate") d <- spread(d, .data$Key, .data$Value, convert = TRUE)
    d
  })
  
  names(meta) <- rep(plate, 3)
  
  #format dates
  .datesFormat(meta)
}

#' .safe1
#'
#' Checks for file on googledrive before attempting to download.
#'
#' @name .safe1
#' @keywords internal
#' @rdname dot-safe1
#' @param plate Character; Corresponds to the plate to process metadata for.
#' Must correspond to a file name in the path argument.
#' @param path Character; The location of the plate metadata on Google Drive.
#' @author Jason Serviss
NULL
#' @importFrom googledrive drive_ls
#' @importFrom dplyr pull
#' @importFrom rlang .data

.safe1 <- function(plate, path) {
    files <- drive_ls(path = path)
    correctfile <- files$name %in% plate
    if(sum(correctfile) == 0) {
        stop(paste0(plate, " not found on Google Drive."))
    }
    if(sum(correctfile) > 1) {
        stop(paste0("Multiple files called ", plate, " found on Google Drive directory."))
    }
    return(files[correctfile,])
}

#' .safe2
#'
#' Checks that all sheets are present before loading the file.
#'
#' @name .safe2
#' @keywords internal
#' @rdname dot-safe2
#' @param path Character; The location of the plate metadata locally.
#' @author Jason Serviss
NULL
#' @importFrom readxl excel_sheets

.safe2 <- function(path) {
  if(!all(c("Plate", "Columns", "Wells") %in% excel_sheets(path))) {
    s <- c("Plate", "Columns", "Wells")
    missing <- s[which(!s %in% excel_sheets(path))]
    stop(paste0(missing, " is not present in the sheets of plate ", path))
  }
}

#' .datesFormat
#'
#' Date formatting helper to \code{\link{getPlateMeta}}.
#'
#' @name .datesFormat
#' @keywords internal
#' @rdname dot-datesFormat
#' @param meta Output from \code{\link{getPlateMeta}} function.
#' @return List of tibbles with ymd formatted dates.
#' @author Jason Serviss
NULL
#' @importFrom lubridate ymd
#' @importFrom purrr map map2
#' @importFrom stringr str_detect
#' @importFrom dplyr mutate sym
#' @importFrom rlang ":=" "!!"

.datesFormat <- function(meta) {
  dateCols <- map(meta, function(d) {
    colnames(d)[str_detect(colnames(d), "date")]
  })
  
  map2(meta, dateCols, function(x, y) {
    if(length(y) == 0) {x} else {
      mutate(x, !! y := ymd(!! dplyr::sym(y)))
    }
  })
}

#' checkMeta
#'
#' Checks that assumptions concerning the metadata in Google Drive are fufilled.
#'
#' @name checkMeta
#' @keywords internal
#' @rdname checkMeta
#' @param meta List; Length 3 list with the Wells, Columns, and Plate sheets,
#' respectivley.
#' @return Nothing.
#' @author Jason Serviss
NULL
#' @importFrom rlang .data

checkMeta <- function(meta) {
  #check that unique_key exists
  if(!"unique_key" %in% colnames(meta[[3]])) {
    stop("unique_key key is missing from Plate sheet.")
  }
  #check that file name matches unique key
  if(unique(names(meta)) != pull(meta[[3]], .data$unique_key)) {
    stop("File name and unique_key do not match.")
  }
  #check that the wells_in_plate variable is present in the Plate sheet
  if(!"wells_in_plate" %in% colnames(meta[[3]])) {
    stop("wells_in_plate key missing from Plate sheet")
  }
  #check that wells_in_plate is numeric
  if(!is.numeric(pull(meta[[3]], .data$wells_in_plate))) {
    stop("wells_in_plate key must equal 384 or 96 or be the number of samples")
  }
  
  #Only run column and well checks when the layout is 384 or 96
  wells <- pull(meta[[3]], .data$wells_in_plate)
  if(wells %in% c(384, 96)) {
    #check that the Column key in the Columns sheet is present.
    if(!"Column" %in% colnames(meta[[2]])) {
      stop("The Column key in the Columns sheet is missing.")
    }
    #check that the Column key in the Columns sheet is correct.
    wells <- pull(meta[[3]], .data$wells_in_plate)
    if(!identical(unique(.layout(wells)$Column), meta[[2]]$Column)) {
      mess <- paste0(
        "The Column key in the Columns sheet is malformated. ", 
        "The plate format is: ", wells,
        " so Column should contain: ", 
        paste(unique(.layout(wells)$Column), collapse = ", ")
      )
      stop(mess)
    }
    #check that the Well key in the Wells sheet is present.
    if(!"Well" %in% colnames(meta[[1]])) {
      stop("The Well key in the Wells sheet is missing.")
    }
    #check that the Wells key in the Wells sheet is correct.
    if(!identical(.layout(wells)$Well, meta[[1]]$Well)) {
      mess <- paste0(
        "The Well key in the Wells sheet is malformated. ", 
        "The plate format is ", wells,
        " so Well should contain: ", paste(.layout(wells)$Well, collapse = ", ")
      )
      stop(mess)
    }
  }
}

#' resolvePlateMeta
#'
#' Resolves the precedence of the of the keys in the metadata file with the
#' hierarchy Wells > Columns > Plates.
#'
#' @name resolvePlateMeta
#' @keywords internal
#' @rdname resolvePlateMeta
#' @param meta List; Length 3 list with the Wells, Columns, and Plate sheets,
#' respectivley. Typically from the \code{\link{getPlateMeta}} function.
#' @return A tibble with the resolved metadata.
#' @author Jason Serviss
NULL
#' @importFrom dplyr pull bind_cols full_join select matches "%>%"
#' @importFrom purrr map_dfr map_dfc
#' @importFrom tibble is_tibble
#' @importFrom rlang .data

resolvePlateMeta <- function(meta) {
  
  #layout plate
  wells <- pull(meta[[3]], .data$wells_in_plate)
  if(!wells %in% c(384, 96)) {
    ns <- .handleNonstandardLayout(meta, wells)
    return(ns)
  }
  layout <- .layout(wells)

  #Add plate data to layout
  base <- layout
  newmeta <- map_dfr(1:nrow(layout), function(x) meta[[3]])
  if(length(newmeta) != 0) {
    base <- bind_cols(base, newmeta)
  }

  #resolve Plate and Column prescedence
  bind1 <- full_join(base, meta[[2]], by = "Column")
  keys1 <- .processKeys(colnames(bind1))
  add1 <- map_dfc(keys1,  ~.resolve(bind1, .x))
  full1 <- bind1
  if(length(add1) != 0) {
    full1 <- bind_cols(bind1, add1)
  }
  resolved1 <- select(full1, -(dplyr::matches("\\.[x-y]")))

  #resolve Well and combined Plate and Column prescedence
  bind2 <- full_join(resolved1, meta[[1]], by = "Well")
  keys2 <- .processKeys(colnames(bind2))
  add2 <- map_dfc(keys2,  ~.resolve(bind2, .x))
  full2 <- bind2
  if(length(add2) != 0) {
    full2 <- bind_cols(bind2, add2)
  }
  out <- select(full2, -(dplyr::matches("\\.[x-y]")))
  
  return(out)
}

.handleNonstandardLayout <- function(meta, wells) {
  if(length(meta[[2]]) != 0) {
    stop("Column annotation detected in non-standard layout.")
  }
  base <- map_dfr(1:wells, function(x) meta[[3]])
  if(length(meta[[1]]) == 0) {
    return(base)
  }
  bind_cols(meta[[1]], base)
}

#' .processKeys
#'
#' Helper for \code{\link{resolvePlateMeta}}. Extracts duplicated key names with
#' a ".x" or ".y" suffix.
#'
#' @name .processKeys
#' @keywords internal
#' @param data A tibble with potentially dupicated keys in the colnames.
#' @return A character vector of the duplicated keys without their suffix.
#' @author Jason Serviss
NULL
#' @importFrom stringr str_replace

.processKeys <- function(keys) {
  kp <- str_replace(keys, "(.*)\\.[x-y]", "\\1")
  kp[duplicated(kp)]
}

#' .resolve
#'
#' Helper for \code{\link{resolvePlateMeta}}. Resolved key precedence.
#' Expects the greater suffix (i.e. .y > .x) to have greater precedence.
#' Expects only .x and .y suffixes.
#'
#' @name .resolve
#' @keywords internal
#' @param data Tibble; including the keys to be resolved.
#' @param key Character; the key to be resolved. key.x and key.y are expected
#' in colnames(data).
#' @return A character vector of the duplicated keys without their suffix.
#' @author Jason Serviss
NULL
#' @importFrom dplyr select mutate sym "%>%"
#' @importFrom rlang ":=" "!!"
#' @importFrom purrr map2
#' @importFrom tidyr unnest

.resolve <- function(data, key) {
  if(length(key) == 0) return(data)

  key.x <- paste0(key, ".x"); key.y <- paste0(key, ".y")
  key.x.sym <- sym(key.x); key.y.sym <- sym(key.y)

  if(!all(c(key.x, key.y) %in% colnames(data))) {
    stop("Keys missing from the data.")
  }

  data %>%
    select(key.x, key.y) %>%
    mutate(!! key := map2(!! key.y.sym, !! key.x.sym, function(y, x) {
      replace(y, is.na(y), x)
    })) %>%
    select(-key.x, -key.y) %>%
    unnest(cols=key)
}

#' .layout
#'
#' Helper for \code{\link{resolvePlateMeta}}. Sets up the plate layout.
#'
#' @name .layout
#' @keywords internal
#' @param format Numeric; Indicates which plate to setup. Can be 384 or 96.
#' @return A tibble with the plate layout.
#' @author Jason Serviss
NULL
#' @importFrom tibble tibble is_tibble

.layout <- function(format) {
  Row <- NULL; Column <- NULL; Well <- NULL
  
  if(format == 384) {
    r <- LETTERS[1:16]
    c <- c(paste0("0", 1:9), 10:24)
  } else {
    r <- LETTERS[1:8]
    c <- c(paste0("0", 1:9), 10:12)
  }

  tibble(
    Row = rep(r, each = length(c)),
    Column = rep(c, length(r)),
    Well = paste0(Row, Column)
  )
}
EngeLab/EngeMetadata documentation built on March 23, 2021, 8:19 p.m.