library(PheCAP)
# Specify the source of EHR data.
ehr_data <- "path_to_ehr_data.txt"
# Uncomment the following line to use the sample data
# that ships with the PheCAP package
# data(ehr_data)
# Prepare data for phenotyping.
# The last argument, 0.4, refers to the percentage of labels
# reserved as test set.
data <- PhecapData(ehr_data, "healthcare_utilization", "label", 0.4)
data
# Specify the surrogate used for
# surrogate-assisted feature extraction (SAFE).
# The typical way is to specify a main ICD code, a main NLP CUI,
# as well as their combination.
# In some cases one may want to define surrogate through lab test.
# The default lower_cutoff is 1, and the default upper_cutoff is 10.
# Feel free to change the cutoffs based on domain knowledge.
surrogates <- list(
PhecapSurrogate(
variable_names = "main_ICD",
lower_cutoff = 1, upper_cutoff = 10),
PhecapSurrogate(
variable_names = "main_NLP",
lower_cutoff = 1, upper_cutoff = 10),
PhecapSurrogate(
variable_names = c("main_ICD", "main_NLP"),
lower_cutoff = 1, upper_cutoff = 10))
# Run surrogate-assisted feature extraction (SAFE) and show result.
feature_selected <- phecap_run_feature_extraction(data, surrogates)
feature_selected
# Train phenotyping model and show the fitted model,
# with the AUC on the training set as well as random splits.
model <- phecap_train_phenotyping_model(data, surrogates, feature_selected)
model
# Apply the model to all the patients to obtain predicted phenotype.
phenotype <- phecap_predict_phenotype(data, model)
# Validate phenotyping model using validation label,
# and show the AUC and ROC.
validation <- phecap_validate_phenotyping_model(data, model)
validation
phecap_plot_roc_curves(validation)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.