knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%" )
The goal of \code{doMIsaul} package is to provide functions to perform unsupervised and semisupervised learning for an incomplete dataset.
You can install the development version from GitHub with:
# install.packages("devtools") devtools::install_github("LilithF/doMIsaul")
This is a basic example which shows you how to perform unsupervised learning for an incomplete dataset:
library(doMIsaul) data(cancer, package = "survival") cancer$status <- cancer$status - 1 cancer <- cancer[, -1] set.seed(1243) res.unsup <- unsupMI(data = list(cancer), Impute = "MImpute_surv", Impute.m = 10, cleanup.partition = TRUE, return.detail = TRUE) cancer$part_unsup <- res.unsup$Consensus plot_MIpca(res.unsup$Imputed.data, 1:228, color.var = cancer$part_unsup, pca.varsel = c("age", "sex", "ph.ecog", "ph.karno", "pat.karno", "meal.cal", "wt.loss")) plot_boxplot(data = cancer, partition.name = "part_unsup", vars.cont = c("age", "meal.cal", "wt.loss"), unclass.name = "Unclassified", include.unclass = FALSE) plot_frequency(data = cancer, partition.name = "part_unsup", vars.cat = c("sex", "ph.ecog"))
This is a basic example which shows you how to perform semisupervised learning for an incomplete dataset with a survival outcome:
## With imputation included set.seed(345) res.semisup <- seMIsupcox(X = list(cancer[, setdiff(colnames(cancer), "part_unsup")]), Y = cancer[, c("time", "status")], Impute = TRUE, Impute.m = 10, center.init = TRUE, nfolds = 10, center.init.N = 50, cleanup.partition = TRUE, return.detail = TRUE) # This is an example, a larger value for center.init.N is recommended. cancer$part_semisup <- res.semisup$Consensus[[1]] plot_MIpca(res.semisup$Imputed.data, NULL, color.var = cancer$part_semisup, pca.varsel = c("age", "sex", "ph.ecog", "ph.karno", "pat.karno", "meal.cal", "wt.loss")) plot_boxplot(data = cancer, partition.name = "part_semisup", vars.cont = c("age", "meal.cal", "wt.loss"), unclass.name = "Unclassified", include.unclass = TRUE) plot_frequency(data = cancer, partition.name = "part_semisup", vars.cat = c("sex", "ph.ecog"))
You may find more details on the methods implemented in this package in the associated publications:
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.