Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE,eval = FALSE,echo = T)
## -----------------------------------------------------------------------------
# library(magrittr)
# library(fastai)
#
# tst_param = function(val, grad = NULL) {
# "Create a tensor with `val` and a gradient of `grad` for testing"
# res = tensor(val) %>% float()
#
# if(is.null(grad)) {
# grad = tensor(val / 10)
# } else {
# grad = tensor(grad)
# }
#
# res$grad = grad %>% float()
# res
# }
## -----------------------------------------------------------------------------
# p = tst_param(1., 0.1)
# p
## -----------------------------------------------------------------------------
# sgd_step(p, 1.)
# p
## -----------------------------------------------------------------------------
# p$grad
## -----------------------------------------------------------------------------
# p = tst_param(1., 0.1)
# weight_decay(p, 1., 0.1)
# p
## -----------------------------------------------------------------------------
# p = tst_param(1., 0.1)
# l2_reg(p, 1., 0.1)
# p$grad
## -----------------------------------------------------------------------------
# params = L(lapply(0:3, function(x) tst_param(x)))
#
# opt = Optimizer(params, sgd_step, lr=0.1)
#
# opt$step()
#
# str(params$items)
## -----------------------------------------------------------------------------
# params = L(lapply(0:3, function(x) tst_param(x)))
#
# opt = Optimizer(params, list(weight_decay, sgd_step), lr=0.1, wd = 0.1)
#
# opt$step()
#
# str(params$items)
## -----------------------------------------------------------------------------
# params = L(lapply(0:3, function(x) tst_param(x)))
#
# opt = Optimizer(params, sgd_step, lr=0.1)
#
# try(params[3]$grad <- NULL,
# TRUE)
#
# params[3]$grad
#
# opt$step()
#
# str(params$items)
## -----------------------------------------------------------------------------
# params = L(lapply(0:3, function(x) tst_param(x)))
#
# opt = Optimizer(list(params[0:1],params[2:3]), sgd_step, lr=0.1)
#
# opt$hypers$items[[1]][[1]] = 0.01
#
# opt$step()
#
# str(params$items)
## -----------------------------------------------------------------------------
# params = L(lapply(0:3, function(x) tst_param(x)))
#
# opt = Optimizer(params, list(weight_decay, sgd_step), lr=0.1, wd = 0.1)
#
# opt$zero_grad()
#
# str(params$items)
## -----------------------------------------------------------------------------
# p = tst_param(c(1,2,3), c(4,5,6))
# state = average_grad(p, mom = 0.9, dampening = FALSE, grad_avg = NULL)
# p$grad
# # tensor([4., 5., 6.])
#
# state = average_grad(p, mom=0.9, dampening = TRUE)
# p$grad*0.1
# # tensor([0.4000, 0.5000, 0.6000])
# p$grad*(0.1*0.9+0.1)
# # tensor([0.7600, 0.9500, 1.1400])
## -----------------------------------------------------------------------------
# p = tst_param(c(1,2,3), c(4,5,6))
# state = average_sqr_grad(p, sqr_mom = 0.99, dampening = FALSE)
#
# p$grad$pow(2)
# # tensor([16., 25., 36.])
#
# p$grad$pow(2) * 1.99
# # tensor([31.8400, 49.7500, 71.6400])
#
# state = average_sqr_grad(p, sqr_mom = 0.99)
# p$grad$pow(2) * 1e-2
# # tensor([0.1600, 0.2500, 0.3600])
# state = average_sqr_grad(p, sqr_mom = 0.99)
#
# p$grad$pow(2)*(0.01*0.99+0.01)
# # tensor([0.3184, 0.4975, 0.7164])
#
# params = L(lapply(0:3, function(x) tst_param(x)))
# opt = Optimizer(params, sgd_step, lr = 0.1)
# opt$freeze_to(1L)
## -----------------------------------------------------------------------------
# params = L(lapply(0:3, function(x) tst_param(x)))
# opt = SGD(params, lr = 0.1)
# opt$step()
# str(params$items)
## -----------------------------------------------------------------------------
# params = L(lapply(0:3, function(x) tst_param(x)))
# opt = SGD(params, lr = 0.1, mom = 0.9)
# opt$step()
# str(params$items)
## -----------------------------------------------------------------------------
# params = L(lapply(0:3, function(x) tst_param(x)))
# #Weight decay
# opt = SGD(params, lr=0.1, mom=0.9, wd=0.1)
# opt$step()
# str(params$items)
## -----------------------------------------------------------------------------
# params = L(lapply(0:3, function(x) tst_param(x)))
# #L2 reg
# opt = SGD(params, lr=0.1, mom=0.9, wd=0.1, decouple_wd=FALSE)
# opt$step()
# str(params$items)
## -----------------------------------------------------------------------------
# params = tst_param(c(1:3), c(0.1,0.2,0.3))
# opt = RMSProp(params, lr=0.1)
# opt$step()
# opt$step()
# step = (-0.1 * 0.1) / (sqrt((0.01*0.99+0.01) * 0.1**2) + 1e-8)
# params; tensor(c(step, 1+step, 2+step))
## -----------------------------------------------------------------------------
# params = tst_param(c(1:3), c(0.1,0.2,0.3))
# opt = RMSProp(params, lr=0.1, mom=0.9)
# opt$step()
# opt$step()
# step = (- 0.1 * (0.1 + 0.9*0.1)) / (sqrt((0.01*0.99+0.01) * 0.1**2) + 1e-8)
# params; tensor(c(step, 1+step, 2+step))
## -----------------------------------------------------------------------------
# params = tst_param(c(1:3), c(0.1,0.2,0.3))
# opt = Adam(params, lr=0.1, wd=0)
# opt$step()
# step = (-0.1 * 0.1) / (sqrt(0.1**2) + 1e-8)
# params; tensor(c(1+step, 2+step, 3+step))
## -----------------------------------------------------------------------------
# opt$step()
# params;tensor(tensor(c(1+2*step, 2+2*step, 3+2*step)))
## -----------------------------------------------------------------------------
# beta = 0.99
# r_inf = 2/(1-beta) - 1
# rs = lapply(5:500, function(s) {r_inf - 2*s*beta**s/(1-beta**s)}) %>% as.numeric()
# v = sqrt(((rs-4) * (rs-2) * r_inf)/((r_inf-4)*(r_inf-2)*rs))
# df_high = data.frame(x = 1:length(v), y = v)
#
# library(highcharter)
# hchart(df_high,'line', hcaes(x,y))
## -----------------------------------------------------------------------------
# params = tst_param(c(1:3), c(0.1,0.2,0.3))
# opt = QHAdam(params, lr=0.1)
# opt$step()
# step = (-0.1 * (((1-0.7) * 0.1) + (0.7 * 0.1)) )/ (
# sqrt(((1-1.0) * 0.1**2) + (1.0 * 0.1**2)) + 1e-8)
# params; tensor(c(1+step, 2+step, 3+step))
# # tensor([0.9000, 1.9000, 2.9000])
# # tensor([0.9000, 1.9000, 2.9000])
# opt$step()
# params; tensor(c(1+2*step, 2+2*step, 3+2*step))
# # tensor([0.8000, 1.8000, 2.8000])
# # tensor([0.8000, 1.8000, 2.8000])
## -----------------------------------------------------------------------------
# params = list(tst_param(c(1:3), c(0.1,0.2,0.3)), tst_param(c(1:3), c(0.01,0.02,0.03)))
# opt = Larc(params, lr=0.1)
# opt$step()
# #First param local lr is 0.02 < lr so it's not clipped
# opt$state[params[[1]]]['local_lr']
## -----------------------------------------------------------------------------
# opt$state[params[[2]]]['local_lr']
## -----------------------------------------------------------------------------
# params = list(tst_param(c(1:3), c(0.1,0.2,0.3)), tst_param(c(1:3), c(0.01,0.02,0.03)))
# opt = Larc(params, lr=0.1, clip = FALSE)
# opt$step()
# #Second param local lr is 0.2 > lr so it's clipped
# opt$state[params[[1]]]['local_lr']
## -----------------------------------------------------------------------------
# opt$state[params[[2]]]['local_lr']
## -----------------------------------------------------------------------------
# params = tst_param(c(1:3), c(0.1,0.2,0.3))
# opt = Lamb(params, lr=0.1)
# opt$step()
# params
## -----------------------------------------------------------------------------
# params = tst_param(c(1:3), c(0.1,0.2,0.3))
# p = params$data$clone()
# g = tensor(c(0.1,0.2,0.3))
# opt = Lookahead(SGD(params, lr=0.1))
#
# for(i in 1:5) {
# opt$step()
# }
# #first 5 steps are normal SGD steps
# params; p - g * 0.5
# # tensor([0.9500, 1.9000, 2.8500])
# # tensor([0.9500, 1.9000, 2.8500])
#
# #Since k=6, sixth step is a moving average of the 6 SGD steps with the initial weight
# opt$step()
# params; p * 0.5 + (p-g*0.6) * 0.5
# # tensor([0.9700, 1.9400, 2.9100])
# # tensor([0.9700, 1.9400, 2.9100])
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.