R/text.R

Defines functions sanitise.whitespace tokenise.whitespace text2factor my.usubjid

Documented in sanitise.whitespace text2factor tokenise.whitespace

#' @export
sanitise.whitespace <- function(tt) return(gsub(" $", "", gsub("^ ", "", gsub(" +", " ", tt))))

#' @export
tokenise.whitespace <- function(tt) {
  tmp <- unlist(strsplit(unname(tt), '\\s+'))
  return(tmp[tmp != ""])
}

#' @export
text2factor <- function(t) return(as.factor(ifelse(t != "", t, NA)))

## GSK format, but not strinctly CDER/CDISC compliant

## because USUBJID must be 1:1 with subjects across all datasets
## supporting a single submission.  E.g. VEG105192 is randomised study
## and VEG107769 is an open label rollover, given subject would need
## to have same USUBJID in both datasets.  Similarly ANORO priming +
## followup trials...

my.usubjid <- function(studyid, subjid) {
  return(paste(studyid, gsub(" ", "0", format(subjid, digits = 0, width = 7, scientific = FALSE)), sep = "."))
}
tobyjohnson/gtx documentation built on Aug. 30, 2019, 8:07 p.m.