getUniqueId <- function(Names, idstaken, idrange=NULL){
#This functioncreates a unique(random) id for a covariate
#It is borrowed from Tom Seinen's package Triton
##ParallelLogger::logInfo("\t\tCreating unique covariate ids")
Names <- as.data.frame(Names)
colnames(Names) <- "name"
uniqueNames <- as.data.frame(unique(Names))
colnames(uniqueNames) <- "name"
## generate the ids
if(is.null(idrange)){idrange <- c(1, 2147482)}
uniqueNames$ids <- as.integer(paste0((as.integer(runif(n = nrow(uniqueNames), min = idrange[1], max = idrange[2]))), 999))
## change the ids if duplicated or taken
while((length(uniqueNames$ids[duplicated(uniqueNames$ids)])>0) || (!all(is.na(match(uniqueNames$ids,idstaken))))) {
dubs <- duplicated(uniqueNames$ids) # duplicated ids
taken <- !is.na(match(uniqueNames$ids,idstaken)) # ids that are already taken
both <- dubs | taken # combine both
uniqueNames$ids[both] <- as.integer(paste0((as.integer(runif(n = length(both[both]), min = 0, max = 2147482))), 999)) # create new id for those
}
Names <- dplyr::left_join(Names, uniqueNames, by = "name")
return(Names$ids)
}
toCovariateData <- function(inputFile, objectWithIds){
inputfile = vroom::vroom(file = inputFile, col_names = FALSE, col_types = "c", trim_ws = TRUE, progress = TRUE, delim = "///" )
rowIds <- objectWithIds$rowId
if (any(stringi::stri_detect_fixed(inputfile$X1, "#SID", max_count = 1)) == FALSE) {
stop("The input file provided does not contain sequence IDs")
}
covariateTidy <- inputfile %>%
dplyr::mutate(rowId = gsub(x = X1, pattern = ".*SID: ", replacement = ""),
Sequences = gsub(x = X1, pattern =".#.*", replacement = "")) %>%
dplyr::select(Sequences, rowId) %>%
tidyr::separate_rows(rowId, sep = " ") %>%
dplyr::mutate(covariateValue = 1) %>%
dplyr::arrange(as.numeric(rowId))
# Making rowId numeric
covariateTidy$rowId <- as.numeric(covariateTidy$rowId)
# Fixing names of sequences
covariateTidy$Sequences <- gsub(x = covariateTidy$Sequences, pattern = "-1", replacement = "=>")
covariateTidy$Sequences <- gsub(x = covariateTidy$Sequences,pattern = "=>$", replacement = "")
# Constructing unique covariate Ids
uniqueSeqs <- unique(covariateTidy$Sequences)
uniqueCovariates <- data.frame(covariateName = uniqueSeqs)
uniqueCovariateIds <- uniqueCovariates %>%
dplyr::mutate(covariateId = as.numeric(getUniqueId(Names = covariateName, idstaken = NULL)))
# Assign the true patient id to rowId
#trueRowIds <- as.data.frame(unique(objectWithIds$rowId))
#colnames(trueRowIds) <- "trueRowIds"
#trueRowIds <- trueRowIds %>%
# dplyr::mutate(row_no = dplyr::row_number(),
# java_row_id = row_no - 1)
trueRowIds <- objectWithIds %>%
select("rowId", "SPMFrowId")
trueIdDf <- data.frame("rowId" = unique(trueRowIds$rowId),
"SPMFrowId" = unique(trueRowIds$SPMFrowId))
#outputRowIds <- as.data.frame(unique(covariateTidy$rowId))
# colnames(outputRowIds) <- "outputRowIds"
#idData <- dplyr::bind_cols(outputRowIds, trueRowIds) #%>%
#dplyr::rename("outputRowIds" = ...1,
# "trueRowIds" = ...2)
# include unique ids in the data
covariateDataFp <- dplyr::left_join(x = covariateTidy, y = uniqueCovariateIds, by = c("Sequences" = "covariateName"))
# include true row ids
covariateDataFp <- dplyr::left_join(x = covariateDataFp, y= trueIdDf, by = c("rowId" = "SPMFrowId"))
# adding 1 to match R's enum
covariateDataFp$rowId <- covariateDataFp$rowId + 1
# Constructing covariateData's object $covariates
covariates <- covariateDataFp %>%
dplyr::select(rowId, covariateId, covariateValue) #%>%
# dplyr::rename("rowId" = "trueRowIds")
# Constructing covariateData's object $covariateRef
covariateRef <- uniqueCovariateIds %>%
dplyr::mutate(analysisId = 999,
conceptId = 0)
# Constructing covariateData's object $analysisRef
analysisRef <- data.frame(analysisId = 999,
analysisName = "FrequentPatterns",
domainId = "FP",
startDay = 0,
endDay = 0,
isBinary = "Y",
missingMeansZero = "Y",
stringsAsFactors = TRUE)
metadata <- list()
metadata$populationSize <- length(unique(covariates$rowId))
metadata$cohortId <- -1
result <- Andromeda::andromeda(covariates = covariates,
covariateRef = covariateRef,
analysisRef = analysisRef)
attr(result, "metaData") <- metadata
class(result) <- "CovariateData"
attr(class(result), "package") <- "FeatureExtraction"
return(result)
}
appendCovariateData<- function(tempCovariateData,covariateData){
##==## appends covariate objects ##==##
if (is.null(covariateData)) {
covariateData <- tempCovariateData
} else {
if (hasData(covariateData$covariates)) {
if (hasData(tempCovariateData$covariates)) {
Andromeda::appendToTable(covariateData$covariates, tempCovariateData$covariates)
}
} else if (hasData(tempCovariateData$covariates)) {
covariateData$covariates <- tempCovariateData$covariates
}
if (hasData(covariateData$covariatesContinuous)) {
if (hasData(tempCovariateData$covariatesContinuous)) {
Andromeda::appendToTable(covariateData$covariatesContinuous, tempCovariateData$covariatesContinuous)
} else if (hasData(tempCovariateData$covariatesContinuous)) {
covariateData$covariatesContinuous <- tempCovariateData$covariatesContinuous
}
}
Andromeda::appendToTable(covariateData$covariateRef, tempCovariateData$covariateRef)
Andromeda::appendToTable(covariateData$analysisRef, tempCovariateData$analysisRef)
for (name in names(attr(tempCovariateData, "metaData"))) {
if (is.null(attr(covariateData, "metaData")[name])) {
attr(covariateData, "metaData")[[name]] <- attr(tempCovariateData, "metaData")[[name]]
} else {
attr(covariateData, "metaData")[[name]] <- list(attr(covariateData, "metaData")[[name]],
attr(tempCovariateData, "metaData")[[name]])
}
}
}
return(covariateData)
}
hasData <- function(data) {
##==## checks if data has data ##==##
return(!is.null(data) && (data %>% count() %>% pull()) > 0)
}
getTextFile <- function(fpObject, fileName){
fpObject %>%
dplyr::mutate(new_seq = stringr::str_replace_all(Seqs, pattern = "_", replacement = " "),
new_seq = stringr::str_replace_all(Seqs, pattern = "=>", replacement = " -1 "),
new_seq = paste(new_seq, "-1", sep = " ")) %>%
dplyr::group_by(new_seq) %>%
dplyr::mutate(new_ids = paste0(new_row_Ids, collapse = " "),
output = paste(new_seq, "#SUP:", new_Count, "#SID:", new_ids, sep = " ")) %>%
dplyr::ungroup() %>%
dplyr::select(output) %>%
dplyr::distinct() %>%
readr::write_table(file = paste0(fileName))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.