Nothing
knitr::opts_chunk$set( collapse = TRUE, #dev="png", comment = "#>" ) library(targeted)
library("targeted") data(iris) set.seed(1) dat <- csplit(iris, 2) g0 <- NB(Species ~ ., data=dat[[1]]) g1 <- NB(Species ~ Sepal.Width + Petal.Length, data=dat[[1]]) g2 <- NB(Species ~ Sepal.Width, data=dat[[1]]) pr0 <- predict(g0, newdata=dat[[2]], wide=TRUE) pr1 <- predict(g1, newdata=dat[[2]], wide=TRUE) pr2 <- predict(g2, newdata=dat[[2]], wide=TRUE) table(colnames(pr1)[apply(pr1,1,which.max)], dat[[2]]$Species) table(colnames(pr2)[apply(pr2,1,which.max)], dat[[2]]$Species)
scoring(dat[[2]]$Species, NB1=pr1) scoring(dat[[2]]$Species, pr0=pr0, pr1=pr1, pr2=pr2)
\(A\) ...
## library(ranger) ## m1 <- ranger(Species ~ ., data=dat[[1]], num.threads=1, probability=TRUE) ## pr3 <- predict(m1, data=dat[[2]], num.threads=1)$predictions ## scoring(dat[[2]]$Species, pr3)
rf_pred <- function(x, newdata, ...) predict(x, newdata)$prediction ml_grf <- ml_model$new(~., info="grf::regression_forest", fit=function(x,y) grf::regression_forest(X=x,Y=y,...), pred=rf_pred) x <- ml_grf$clone() x$update(Sepal.Length ~ .) x$estimate(iris) x$predict(droplevels(head(iris))) m <- ml_model$new(Species ~ ., info="grf::probability_forest", fit=function(x,y) grf::probability_forest(X=x,Y=y,...), pred=rf_pred) m$estimate(dat[[1]], num.trees=1000) pr3 <- m$predict(dat[[2]]) scoring(dat[[2]]$Species, pr3)
rf_model <- ml_model$new(Sepal.Length ~ ., fit=function(x,y) grf::regression_forest(X=x,Y=y,...), pred=function(...) predict(...)$predictions) lm_model <- ml_model$new(Sepal.Length ~ ., fit=lm) a <- cv(list(lm=lm_model, rf=rf_model), data=iris, response="Sepal.Length") a
binary
n <- 1e4 x <- sample(1:5, n, replace=TRUE) y <- rbinom(n, 1, expit(x)) d <- data.frame(y=y, x=x) summary(glm(y ~ x, family=binomial, data=d)) counts <- table(x, y) dd <- data.frame(y=rep(0:1,each=5), x=rep(1:5, 2), w=c(counts[,1], counts[,2])) summary(g <- glm(y ~ x, family=binomial, data=dd, weights=w)) m <- ml_model$new(y ~ x, function(formula, data) glm(formula, data=data, family=binomial)) m$estimate(d) m <- ml_model$new(fit=function(data) glm(y~x, data=data, weights=data$w, family=binomial)) m$estimate(dd) m <- ml_model$new(y~x, function(formula,data) { glm(formula, data=data, weights=w, family=binomial) }) m$estimate(dd) m <- ml_model$new(y~x, fit=function(y,x,w,...) { glm.fit(x=cbind(1,x), y=y, weights=w, family=binomial()) }, pred=function(fit, newdata,...) { pr <- fit$family$linkinv(cbind(1, newdata)%*%fit$coefficients) cbind(1-pr, pr) }, specials=list(w=w)) fit <- m$estimate(dd) m$predict(dd)
Categorical response
rf <- function(formula, ...) ml_model$new(formula, info="grf::probability_forest", fit=function(x,y, ...) grf::probability_forest(X=x, Y=y, ...), pred=function(fit, newdata) predict(fit, newdata)$predictions, ...) models <- list(rf=rf(Species ~ .), nb=function(data, ...) NB(Species ~ ., data=data)) a <- cv(models, data=iris) a
rf <- function(formula, ...) ml_model$new(formula, info="grf::probability_forest", fit=function(x,y, ...) grf::probability_forest(X=x, Y=y, ...), pred=function(fit, newdata) predict(fit, newdata)$predictions, ...) args <- expand.list(num.trees=c(50,100,200,300,400,500), mtry=1:3, formula=c(Species ~ ., Species ~ Sepal.Length + Sepal.Width)) models <- lapply(args, function(par) do.call(rf, par)) x <- models[[1]]$clone() x$estimate(iris) #x$.__enclos_env__$private$formula #rtarget x <- ml_model$new(Species ~ ., info="grf::probability_forest", fit=function(x,y, ...) grf::probability_forest(X=x, Y=y, ...), num.trees=100, mtry=1) x$estimate(iris) a <- targeted::cv(models, data=iris) cbind(coef(a), attr(args, "table")[,1:2]) ## library(GPareto) ## dom <- t(nondominated_points(t(coef(a)))) ## plot(coef(a)) ## plotParetoEmp(dom, col="red") ## mp <- rbind(coef(a)[cbind(coef(a, min=TRUE),1:2)]) ## points(mp, pch=16)
a <- ml_model$new(fit=function(data,response="Species",...) { y <- data[,response] x <- data[,-which(names(data)%in%response),drop=FALSE] NB2(x=x,y=y) }, pred=function(fit, newdata) predict(fit, newdata)) a$estimate(iris) b <- ml_model$new(Species ~ ., fit=function(x,y,...) { grf::probability_forest(Y=y,X=x,...) }, pred=function(fit,newdata) predict(fit,newdata)$predictions, num.trees=50) cv(list(NB=a, RF=b), data=iris, response="Species")
plot(1:10)
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.