Nothing
#' Thyroid disease dataset
#' @description The dataset is to be used with a supervised classification ML model to classify thyroid disease.
#' The dataset was sourced and adapted from the UCI Machine Learning repository \url{https://archive.ics.uci.edu/ml/index.php}.
#' @docType data
#' @keywords thyroid disease
#' @format A data frame with 3772 rows and 28 variables:
#' \describe{
#' \item{ThryroidClass}{binary classification label indicating whether \strong{sick = 1} or \strong{negative=0}}
#' \item{patient_age}{age of the patient}
#' \item{patient_gender}{flag indicating gender of patient - \strong{1=Female} and \strong{0=Male}}
#' \item{presc_thyroxine}{flag to indicate whether thyroxine replacement prescribed \strong{1=Thyroxine prescribed}}
#' \item{queried_why_on_thyroxine}{flag to indicate query has been actioned}
#' \item{presc_anthyroid_meds}{flag to indicate whether anti-thyroid medicine has been prescribed}
#' \item{sick}{flag to indicate sickness due to thyroxine depletion or over activity}
#' \item{pregnant}{flag to indicate whether the patient is pregnant}
#' \item{thyroid_surgery}{flag to indicate whether the patient has had thyroid surgery}
#' \item{radioactive_iodine_therapyI131}{indicates whether patient has had radioactive iodine treatment: \url{https://www.nhs.uk/conditions/thyroid-cancer/treatment/}}
#' \item{query_hypothyroid}{flag to indicate under active thyroid query \url{https://www.nhs.uk/conditions/underactive-thyroid-hypothyroidism/}}
#' \item{query_hyperthyroid}{flag to indicate over active thyroid query \url{https://www.nhs.uk/conditions/overactive-thyroid-hyperthyroidism/}}
#' \item{lithium}{Lithium carbonate administered to decrease the level of thyroid hormones}
#' \item{goitre}{flag to indicate swelling of the thyroid gland \url{https://www.nhs.uk/conditions/goitre/}}
#' \item{tumor}{flag to indicate a tumor}
#' \item{hypopituitarism}{flag to indicate a diagnosed under active thyroid}
#' \item{psych_condition}{indicates whether a patient has a psychological condition}
#' \item{TSH_measured}{a TSH level lower than normal indicates there is usually more than enough thyroid hormone in the body and may indicate hyperthyroidism}
#' \item{TSH_reading}{the reading result of the TSH blood test}
#' \item{T3_measured}{linked to TSH reading - when free triiodothyronine rise above normal this indicates hyperthyroidism}
#' \item{T3_reading}{the reading result of the T3 blood test looking for above normal levels of free triiodothyronine}
#' \item{T4_measured}{free thyroxine, also known as T4, is used with T3 and TSH tests to diagnose hyperthyroidism}
#' \item{T4_reading}{the reading result of th T4 test}
#' \item{thyrox_util_rate_T4U_measured}{flag indicating the thyroxine utilisation rate \url{https://pubmed.ncbi.nlm.nih.gov/1685967/}}
#' \item{thyrox_util_rate_T4U_reading}{the result of the test}
#' \item{FTI_measured}{flag to indicate measurement on the Free Thyroxine Index (FTI)\url{https://endocrinology.testcatalog.org/show/FRTUP}}
#' \item{FTI_reading}{the result of the test mentioned above}
#' \item{ref_src}{[nominal] indicating the referral source of the patient}
#' }
#' @source Prepared and adatped by Gary Hutson \email{hutsons-hacks@outlook.com}, Dec-2021 and sourced from Garavan Institute and J. Ross Quinlan.
#' @references Thyroid disease records supplied by the Garavan Institute and J. Ross Quinlan.
#' @examples
#' library(dplyr)
#' library(ConfusionTableR)
#' library(parsnip)
#' library(rsample)
#' library(recipes)
#' library(ranger)
#' library(workflows)
#' data("thyroid_disease")
#' td <- thyroid_disease
#' # Create a factor of the class label to use in ML model
#' td$ThryroidClass <- as.factor(td$ThryroidClass)
#' # Check the structure of the data to make sure factor has been created
#' str(td)
#' # Remove missing values, or choose more advaced imputation option
#' td <- td[complete.cases(td),]
#' #Drop the column for referral source
#' td <- td %>%
#' dplyr::select(-ref_src)
#' # Analyse class imbalance
#' class_imbalance <- prop.table(table(td$ThryroidClass))
#' class_imbalance
#' #Divide the data into a training test split
#' set.seed(123)
#' split <- rsample::initial_split(td, prop=3/4)
#' train_data <- rsample::training(split)
#' test_data <- rsample::testing(split)
#' # Create recipe to upsample and normalise
#' set.seed(123)
#'td_recipe <-
#' recipe(ThryroidClass ~ ., data=train_data) %>%
#' step_normalize(all_predictors()) %>%
#' step_zv(all_predictors())
#' # Instantiate the model
#' set.seed(123)
#' rf_mod <-
#' parsnip::rand_forest() %>%
#' set_engine("ranger") %>%
#' set_mode("classification")
#' # Create the model workflow
#' td_wf <-
#' workflow() %>%
#' workflows::add_model(rf_mod) %>%
#' workflows::add_recipe(td_recipe)
#'# Fit the workflow to our training data
#' set.seed(123)
#' td_rf_fit <-
#' td_wf %>%
#' fit(data = train_data)
#' # Extract the fitted data
#' td_fitted <- td_rf_fit %>%
#' extract_fit_parsnip()
#' # Predict the test set on the training set to see model performance
#' class_pred <- predict(td_rf_fit, test_data)
#' td_preds <- test_data %>%
#' bind_cols(class_pred)
#' # Convert both to factors
#' td_preds$.pred_class <- as.factor(td_preds$.pred_class)
#' td_preds$ThryroidClass <- as.factor(td_preds$ThryroidClass)
#' # Evaluate the data with ConfusionTableR
#' cm <- ConfusionTableR::binary_class_cm(td_preds$ThryroidClass ,
#' td_preds$.pred_class,
#' positive="sick")
#' #View Confusion matrix
#' cm$confusion_matrix
#' #View record level
#' cm$record_level_cm
"thyroid_disease"
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.