pattern <- "a.b"
strings <- c("abb", "a.b")
str_detect(strings, pattern)
str_detect(strings, fixed(pattern))
str_detect(strings, coll(pattern))
# coll() is useful for locale-aware case-insensitive matching
i <- c("I", "\u0130", "i")
i
str_detect(i, fixed("i", TRUE))
str_detect(i, coll("i", TRUE))
str_detect(i, coll("i", TRUE, locale = "tr"))
# Word boundaries
words <- c("These are some words.")
str_count(words, boundary("word"))
str_split(words, " ")[[1]]
str_split(words, boundary("word"))[[1]]
# Regular expression variations
str_extract_all("The Cat in the Hat", "[a-z]+")
str_extract_all("The Cat in the Hat", regex("[a-z]+", TRUE))
str_extract_all("a\nb\nc", "^.")
str_extract_all("a\nb\nc", regex("^.", multiline = TRUE))
str_extract_all("a\nb\nc", "a.")
str_extract_all("a\nb\nc", regex("a.", dotall = TRUE))
# today 9/5/19 ------------------------------------------------------------
library(data.table)
library(stringr)
library(fst)
gb_ddir <- "inst/data/mldata/"
gb_cols <- c(
'CaseNumber','StartTimeGMT',
'ProcedureID', 'Hospital', "Surgeon",
'TuID', 'PartNumber', 'SequenceNum','ToolName', 'ToolType',
'ProcedureCategory', 'ProcedureSubject', 'ProcedureName',
'DurationMin', 'ToolDurationMin'
)
"%_&_%" <- function(a, b){
randomvar_set_in_funenviron <- "hello world"
return(rlang::current_fn())
rlang::curr
rlang::call_name()
# "%_&_%"
# Reduce("&", lapply(a, function(i) stringr::str_detect(b, i)))
}
obj <- do.call("%_&_%", list(a =1, b = 1))
env <- rlang::env_clone(rlang::as_closure())
rlang::env_names(env)
ls(envir = env)
rlang::call_name()
"%_|_%" <- function(a, b)
Reduce("|", lapply(a, function(i) stringr::str_detect(b, i)))
getMorphDT <- function(){
DT <- rbindlist(
lapply(
list.files(gb_ddir, "*_full.fst", TRUE, TRUE),
fst::read_fst,
as.data.table = TRUE,
columns = gb_cols
))
oldnam <- c("ProcedureSubject", "ProcedureName", "ProcedureCategory", "ToolName", "ToolType")
newnam <- c("psub", "pnam", "pcat", "tnam", "ttype", )
setnames(DT, oldnam, newnam)
for(nam in newnam){
set(DT, NULL, nam, stringr::str_to_lower(DT[, get(nam)]))
}
DT[, "corp_pnam" := .(str_split(ProcedureCategory, boundary("word"))) ]
DT[, "corp_psub" := .(str_split(ProcedureSubject, boundary("word"))) ]
DT[, "corp_pcat" := .(str_split(ProcedureName, boundary("word"))) ]
DT[, "corp_tool" := .(str_split(ToolName, boundary("word"))) ]
## pull out reloads into a dedicted column and filter out reduntant rows
##
sterms <- c("stapler", "sureform")
colrs <- c("blue", "green", "white", "gray", "black")
##
##
## IM HERE- START HERE AFTER ARRIVE AT WORK
##
##
## CHECK IF SUREFORM AND STAPLER SHARE A PARTNUMBER AND IF SO STANDARDIZE!!
##
##
##
##
##
setkeyv(DT, c("ProcedureName", "ProcedureSubject", "ProcedureCategory"))
DT[ProcedureCategory == "Urology", .N, .(ProcedureSubject, ProcedureName)][order(ProcedureName)]
cDT <- DT[
DT[, .N, c("ProcedureSubject", "ProcedureID")][, .(n_obs = sum(N), n_procs = .N), "ProcedureSubject"]
]
DT <- cDT[, as.data.table(
sort(table(str_split_fixed(ToolName, " ", 2)[, 1])), keep.rownames = TRUE),
.(ProcedureSubject, n_procs, n_obs)
][ order(ProcedureSubject, -N),
.(ProcedureSubject,
tn = V1,
pct_tot_obs = round(N/tot_obs, 3))][
order(tn, -pct_tot_obs)
]
# pobsDT[,1-sum(.SD[1:5][["pct_tot_obs"]], na.rm = TRUE), ProcedureSubject]
plotDT <- pobsDT[,({
rows_top5 <- .SD[seq(1, min(5, .N))]
as.list(
rbind(
rows_top5,
data.table(tn = "Other",
pct_tot_obs = 1-sum(rows_top5[["pct_tot_obs"]]))
)
)
}), ProcedureSubject]
plotDT[, "tn" := factor(tn, levels = c("Other", sort(unique(tn)[-which(unique(tn) == "Other")]))) ]
plotDT[ProcedureSubject == "Dvp"]
plotDT[, levels(tn)]
ggplot(plotDT) +
geom_bar(aes(ProcedureSubject, pct_tot_obs,fill=tn),stat="identity")
setkeyv(DT, c("ProcedureSubject"))
stapleDT <- DT[
sterms %_|_% ToolName,
list(
ProcedureID, ProcedureSubject, ProcedureName,
PartNumber, SequenceNum,
ToolName,
ToolNameKey = "stapler",
is_sureform = str_detect(ToolName, "sureform"),
is_reload = str_detect(ToolName, "reload"),
colr_reload = str_extract(ToolName, paste0(colrs, collapse = "|")),
size_reload = str_extract(ToolName, "[0-9]+")
)][!str_detect(ToolName, "gui|\\$motor")]
stapleDT[,.(
tot_reloads = sum(is_reload),
tot_less_reloads = .N - sum(is_reload),
med_seqnum = list(unique(SequenceNum))
), ProcedureID][order(tot_less_reloads)]
stapleDT[, .N, .(ProcedureSubject, ProcedureName)][order(-N)]
DT[c("stapl", "sure") %_|_% ToolName, .N, ToolName
][, table(stringr::str_remove_all(ToolName, paste0(c(sterms, " *"), collapse = "|")))]
DT[c("sure", "reload") %_&_% ToolName, .N, ToolName]
## pull out reloads into a dedicted column and filter out reduntant rows
DT[, "tot_reloads" :=
sum(stringr::str_detect(ToolName, "Reload")),
ProcedureID]
cDT <- DT[!stringr::str_detect(ToolName, "Reload|Stapler\\$Motor")]
return(DT[])
}
getFreqTable <- function(search_terms=NULL,
std_toolname=NULL,
stat_groupby=NULL,
.data = NULL){
## if data not given then fetch it
if(is.null(.data)){
DT <- getMorphDT()
}
## main computation done in this function \
## - SELECTION - all terms provided as arg "search_terms" must be detected
##
MAP <- .data[, ({
ind <- search_terms %_&_% ToolName
fq_sel <- as.data.table(table(ToolName[ ind ]), TRUE)
fq_else <- as.data.table(table(ToolName[ !ind ]), TRUE)
names(fq_sel) <- c("tn_sel", "N")
names(fq_else) <- c("tn_not", "N")
data.table(
"tn_select" = list(fq_sel),
"tn_others" = list(fq_else),
"totObs" = .N,
"count_ok" = sum(fq_sel$N, fq_else$N) == .N
)
}),
keyby = stat_groupby]
return(MAP)
}
tnFreq <- getFreqTable()
setkey(DT, ProcedureSubject, ProcedureName)
getToolFreq(
tn_filter_terms = c("large?", "clip", "applier?"),
map_val = "large clip applier",
freq_grp_by = "ProcedureSubject",
.data = DT
)
getToolFreq(
tn_terms = c("needle", "drive"),
map_value = "",
count_grp = "ProcedureSubject",
.data = DT
)
##
## NEEDLE DRIVER TOOLS
##
tn <- c("needle", "drive")
expr <- substitute(paste0(X, paste0(tn, collapse = " "), "r"))
map_entry(c("larg", tn), raw_tn, eval(expr, list(X = "large ")))
map_entry(c("mega", tn), raw_tn, eval(expr, list(X = "mega ")))
map_entry(c("curve", tn), raw_tn, eval(expr, list(X = "curved ")))
map_entry(c("wrist", tn), raw_tn, eval(expr, list(X = "wristed ")))
map_entry(c("^su|^ne", tn), raw_tn, eval(expr, list(X = "")))
map_entry(c("suction", "irrigat"), raw_tn, "suction irrigator")
map_entry(c("ves", "seal", "ext"), raw_tn, "vessel sealer extend")
map_entry("harmonic", raw_tn, "harmonic ace")
map_entry("grasper", raw_tn, "")
map_entry("retract", raw_tn, "")
map_entry("scissor", raw_tn, "")
map_entry(c("atrial", "retract"), raw_tn, "atrial retractor")
map_entry("fenestrated grasper", raw_tn, "fenestrated grasper")
map_entry("endoscope", raw_tn, "endoscope")
DT[ stringr::str_detect(mToolName, "staple.+(?<=reload)"),
mToolName :=
paste0("stapler reload ", stringr::str_extract(mToolName, "blue|green|white|gray|black"))
]
DT[ stringr::str_detect(ToolName, "needle.*(?<=drive)"),
# stringr::str_trim(paste0(stringr::str_extract(ToolName, "mega|large|"), " needle driver"), "both"))
unique(ToolName)
]
DT[ stringr::str_detect(DT$mToolName, "sureform.+(?<=reload)"),
mToolName :=
paste0("sureform reload ", stringr::str_extract(mToolName, "blue|green|white|black"))
]
DT[ stringr::str_detect(mToolName, "stapler( [0-9]+|\\$)"), mToolName := "stapler 30/45"]
###
### FORM THE FINALIZED MAPPING
###
toolMapIndex <- DT[, .N, .("tn_morpheus" = ToolName, "tn_standardized" = mToolName)]
# as.data.table(unlist(stringr::str_split(res[, .N, mapped][, mapped], " ")))[, .N, V1][order(V1)]
return(res[])
}
# misc code below ---------------------------------------------------------
mail
-s "test subject"
-F bfatemi07@gmail.com
-c cc-addr
-b
< /dev/null
SYS_CMD <- "ping localhost"
WORK_DIR <- getwd()
ENV_VARS <- c(VAR1 = "hello", VAR2 = "world")
rstudioapi::terminalExecute(command = SYS_CMD,
workingDir = WORK_DIR,
env = ENV_VARS,
show = TRUE)
rstudioapi::terminalBuffer()
rstudioapi::terminalExecute()
rstudioapi::restartSession()
system(paste0("kill -9 ", Sys.getpid()))
# previous ----------------------------------------------------------------
GLOB_DDIR <- "inst/data/mldata/"
GLOB_COLS <- c(
'CaseNumber','StartTimeGMT',
'ProcedureID', 'Hospital', "Surgeon",
'TuID', 'PartNumber', 'SequenceNum',
'ToolName', 'ToolType',
'ProcedureCategory', 'ProcedureSubject', 'ProcedureName',
'DurationMin', 'ToolDurationMin',
"UsesRemaining", "UsesUsedThisProc", "MaxToolUses"
# "WorkData1", "WorkData2", "WorkData3"
)
# "%_&_%" <- function(a, b)
# which(Reduce("&", lapply(a, function(i) stringr::str_detect(b, i))))
"%_&_%" <- function(a, b)
Reduce("&", lapply(a, function(i) stringr::str_detect(b, i)))
"%_|_%" <- function(a, b)
which(Reduce("|", lapply(a, function(i) stringr::str_detect(b, i))))
# getToolMap <- function(DT, as_copy = FALSE, tn_only = TRUE){
paths <- list.files(GLOB_DDIR, "*_full.fst", TRUE, TRUE)
DT <- data.table::rbindlist(
lapply(
paths, fst::read_fst,
as.data.table = TRUE,
columns = GLOB_COLS
))
DT[, ToolName := stringr::str_to_lower(ToolName)][]
getFreqTable <- function(search_terms=NULL,
std_toolname=NULL,
stat_groupby=NULL,
.data = NULL){
.data <- copy(DT)
search_terms <- c(".")
std_toolname <- NA_character_
stat_groupby <- c("ProcedureSubject")
out <- .data[, ({
# TN <- .SD[["ToolName"]]
ind <- search_terms %_&_% ToolName
fq_sel <- as.data.table(table(ToolName[ ind ]), TRUE)
fq_else <- as.data.table(table(ToolName[ !ind ]), TRUE)
names(fq_sel) <- c("tn_sel", "N")
names(fq_else) <- c("tn_not", "N")
data.table(
"tn_select" = list(fq_sel),
"tn_others" = list(fq_else),
"totObs" = .N,
"count_ok" = sum(fq_sel$N, fq_else$N) == .N
)
}),
keyby = stat_groupby]
return(out)
}
fwrite(getFreqTable()[ProcedureSubject == "Dvp", tn_select][[1]][order(tn_sel)], "data-raw/dvp_tool_freq.csv")
tnFreq <- getFreqTable()
setkey(DT, ProcedureSubject, ProcedureName)
getToolFreq(
tn_filter_terms = c("large?", "clip", "applier?"),
map_val = "large clip applier",
freq_grp_by = "ProcedureSubject",
.data = DT
)
getToolFreq(
tn_terms = c("needle", "drive"),
map_value = "",
count_grp = "ProcedureSubject",
.data = DT
)
##
## NEEDLE DRIVER TOOLS
##
tn <- c("needle", "drive")
expr <- substitute(paste0(X, paste0(tn, collapse = " "), "r"))
map_entry(c("larg", tn), raw_tn, eval(expr, list(X = "large ")))
map_entry(c("mega", tn), raw_tn, eval(expr, list(X = "mega ")))
map_entry(c("curve", tn), raw_tn, eval(expr, list(X = "curved ")))
map_entry(c("wrist", tn), raw_tn, eval(expr, list(X = "wristed ")))
map_entry(c("^su|^ne", tn), raw_tn, eval(expr, list(X = "")))
map_entry(c("suction", "irrigat"), raw_tn, "suction irrigator")
map_entry(c("ves", "seal", "ext"), raw_tn, "vessel sealer extend")
map_entry("harmonic", raw_tn, "harmonic ace")
map_entry("grasper", raw_tn, "")
map_entry("retract", raw_tn, "")
map_entry("scissor", raw_tn, "")
map_entry(c("atrial", "retract"), raw_tn, "atrial retractor")
map_entry("fenestrated grasper", raw_tn, "fenestrated grasper")
map_entry("endoscope", raw_tn, "endoscope")
DT[ stringr::str_detect(mToolName, "staple.+(?<=reload)"),
mToolName :=
paste0("stapler reload ", stringr::str_extract(mToolName, "blue|green|white|gray|black"))
]
DT[ stringr::str_detect(ToolName, "needle.*(?<=drive)"),
# stringr::str_trim(paste0(stringr::str_extract(ToolName, "mega|large|"), " needle driver"), "both"))
unique(ToolName)
]
DT[ stringr::str_detect(DT$mToolName, "sureform.+(?<=reload)"),
mToolName :=
paste0("sureform reload ", stringr::str_extract(mToolName, "blue|green|white|black"))
]
DT[ stringr::str_detect(mToolName, "stapler( [0-9]+|\\$)"), mToolName := "stapler 30/45"]
###
### FORM THE FINALIZED MAPPING
###
toolMapIndex <- DT[, .N, .("tn_morpheus" = ToolName, "tn_standardized" = mToolName)]
# as.data.table(unlist(stringr::str_split(res[, .N, mapped][, mapped], " ")))[, .N, V1][order(V1)]
return(res[])
}
# misc code below ---------------------------------------------------------
mail
-s "test subject"
-F bfatemi07@gmail.com
-c cc-addr
-b
< /dev/null
SYS_CMD <- "ping localhost"
WORK_DIR <- getwd()
ENV_VARS <- c(VAR1 = "hello", VAR2 = "world")
rstudioapi::terminalExecute(command = SYS_CMD,
workingDir = WORK_DIR,
env = ENV_VARS,
show = TRUE)
rstudioapi::terminalBuffer()
rstudioapi::terminalExecute()
rstudioapi::restartSession()
system(paste0("kill -9 ", Sys.getpid()))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.