Files in Nth-iteration-labs/contextual
Simulation and Analysis of Contextual Multi-Armed Bandit Policies

.Rbuildignore
.gitattributes
.gitignore
.travis.yml
DESCRIPTION
NAMESPACE
NEWS.md R/agent.R R/bandit.R R/bandit_basic_bernoulli.R R/bandit_basic_gaussian.R R/bandit_cmab_bernoulli.R R/bandit_cmab_binary.R R/bandit_cmab_hybrid.R R/bandit_cmab_linear.R R/bandit_cmab_logit.R R/bandit_cmab_precaching.R R/bandit_cmab_wheel.R R/bandit_continuum_function.R R/bandit_offline_bootstrapped_replay.R R/bandit_offline_direct_method.R R/bandit_offline_doubly_robust.R R/bandit_offline_propensity_weighting.R R/bandit_offline_replay_evaluator.R R/bandit_offline_replay_evaluator_lookup.R R/functions_generic.R R/functions_utility.R R/history.R R/plot.R R/policy.R R/policy_cmab_lin_epoch_greedy.R R/policy_cmab_lin_epsilon_greedy.R R/policy_cmab_lin_ts_disjoint.R R/policy_cmab_lin_ucb_disjoint.R R/policy_cmab_lin_ucb_disjoint_optimized.R R/policy_cmab_lin_ucb_general.R R/policy_cmab_lin_ucb_hybrid.R R/policy_cmab_lin_ucb_hybrid_optimized.R R/policy_cmab_logit_ts_bootstrap.R R/policy_cmab_probit_ts.R R/policy_cont_lif.R R/policy_fixed.R R/policy_mab_epsilon_first.R R/policy_mab_epsilon_greedy.R R/policy_mab_exp3.R R/policy_mab_gittins_bl.R R/policy_mab_gradient.R R/policy_mab_softmax.R R/policy_mab_ts.R R/policy_mab_ts_bootstrap.R R/policy_mab_ucb1.R R/policy_mab_ucb2.R R/policy_oracle.R R/policy_random.R R/simulator.R README.md
_pkgdown.yml
appveyor.yml
codecov.yml
contextual.Rproj
cran-comments.md
demo/00Index
demo/alternative_parallel_backends/azure/cluster.json
demo/alternative_parallel_backends/azure/credentials-sample.json
demo/alternative_parallel_backends/azure/demo_azure.R demo/alternative_parallel_backends/azure/simulator_azure.R demo/alternative_parallel_backends/azure/test_azure_connection.R demo/alternative_parallel_backends/redis/demo_redis.R demo/alternative_parallel_backends/redis/simulator_redis.R demo/alternative_parallel_backends/rmpi/demo_rmpi.R demo/alternative_parallel_backends/rmpi/simulator_rmpi.R demo/alternative_parallel_backends/rmpi/test_rmpi_connection.R demo/demo_bandit_algorithms_for_website_optimization.R demo/demo_cmab_policy_comparison_linear_bandit.R demo/demo_cmab_policy_comparison_weight_bandit.R demo/demo_epsilon_greedy_policy.R demo/demo_epsilon_greedy_to_epoch_greedy_policy.R demo/demo_lif_bandit.R demo/demo_mab_policy_comparison.R demo/demo_offline_cmab_alpha_linucb_direct_method.R demo/demo_offline_cmab_alpha_linucb_replay.R demo/demo_simpsons_paradox_propensity.R demo/demo_sine_bandit.R demo/demo_subsubclass.R demo/demo_sutton_barto.R demo/evaluations_on_public_datasets/demo_carskit_depaul.R demo/evaluations_on_public_datasets/demo_movielens_100k.R demo/evaluations_on_public_datasets/demo_movielens_10m.R demo/offline_bandit_evaluations/demo_offline_bootstrap_replay.R demo/offline_bandit_evaluations/demo_offline_direct_method.R demo/offline_bandit_evaluations/demo_offline_doubly_robust.R demo/offline_bandit_evaluations/demo_offline_propensity_score.R demo/replication_eckles_kaptein_2014/demo_bootstrap_fig_2.R demo/replication_eckles_kaptein_2014/demo_bootstrap_fig_3.R demo/replication_kruijswijk_2018/1_basic_synthetic_evaluation.R demo/replication_kruijswijk_2018/2a_main_synthetic_evaluation.R demo/replication_kruijswijk_2018/2b_dependent_observations_plot_bar.R demo/replication_kruijswijk_2018/3_offline_bootstrapped_persuasion.R demo/replication_kruijswijk_2018/bandit_bernoulli.R demo/replication_kruijswijk_2018/bandit_bootstrapped_replay.R demo/replication_kruijswijk_2018/bandit_replay.R
demo/replication_kruijswijk_2018/beta_binom_hier_model.stan
demo/replication_kruijswijk_2018/policy_pooled_egreedy.R demo/replication_kruijswijk_2018/policy_pooled_thompson.R demo/replication_kruijswijk_2018/policy_pooled_ucb.R demo/replication_kruijswijk_2019/README.md demo/replication_kruijswijk_2019/bandit_continuum_function_bimodal.R demo/replication_kruijswijk_2019/bandit_continuum_function_unimodal.R demo/replication_kruijswijk_2019/bandit_continuum_offon.R demo/replication_kruijswijk_2019/bandit_continuum_offon_kern.R demo/replication_kruijswijk_2019/demo_lif_bandit.R demo/replication_kruijswijk_2019/demo_tbl_bandit.R demo/replication_kruijswijk_2019/policy_cont_lif_randstart.R demo/replication_kruijswijk_2019/policy_tbl.R demo/replication_li_2010/1_import_yahoo_to_monetdb.R demo/replication_li_2010/2_run_simulation.R demo/replication_li_2010/3_plotter.R demo/replication_li_2010/4_plotter.R demo/replication_li_2010/alternative_db_scripts/1_import_yahoo_data_to_monetdb_lite.R demo/replication_li_2010/alternative_db_scripts/2_run_the_simulation_on_monetdb.R demo/replication_li_2010/alternative_db_scripts/2_run_the_simulation_on_monetdb_lite.R demo/replication_li_2010/alternative_db_scripts/yahoo_to_mysql.R demo/replication_li_2010/alternative_db_scripts/yahoo_to_postgresql.R demo/replication_li_2010/alternative_db_scripts/yahoo_to_sqlite.R demo/replication_li_2010/demo_yahoo_classes/yahoo_bandit.R demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_epsilon_greedy.R demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_epsilon_greedy_seg.R demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_linucb_disjoint.R demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_linucb_hybrid.R demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_random.R demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_ucb1_alpha.R demo/replication_li_2010/demo_yahoo_classes/yahoo_policy_ucb1_alpha_seg.R demo/replication_li_2010/demo_yahoo_exploration/exploration.R demo/replication_li_2010/demo_yahoo_exploration/plots.R demo/replication_van_emden_2018/section_2_3.R demo/replication_van_emden_2018/section_3_2_1.R demo/replication_van_emden_2018/section_3_2_2.R demo/replication_van_emden_2018/section_4_2_plot.R demo/replication_van_emden_2018/section_5_2.R demo/replication_van_emden_2018/section_5_3.R demo/replication_van_emden_2018/section_5_4.R demo/replication_van_emden_2018/section_6.R demo/replication_van_emden_2018/section_7.R demo/replication_van_emden_2018/section_8.R
docs/LICENSE-text.html
docs/LICENSE.html
docs/README.html
docs/articles/_only_pkgdown_/faq.html
docs/articles/arxiv_2018/fig/all_cmab_phases_Part1.pdf docs/articles/arxiv_2018/fig/all_cmab_phases_Part2.pdf docs/articles/arxiv_2018/fig/all_cmab_phases_Part3.pdf docs/articles/arxiv_2018/fig/all_cmab_phases_Part4.pdf docs/articles/arxiv_2018/fig/all_cmab_phases_Part5.pdf docs/articles/arxiv_2018/fig/all_cmab_phases_Part6.pdf docs/articles/arxiv_2018/fig/all_cmab_phases_Part7.pdf docs/articles/arxiv_2018/fig/all_cmab_phases_Part8.pdf docs/articles/arxiv_2018/fig/cmab_chart.pdf docs/articles/arxiv_2018/fig/contextual_class.pdf docs/articles/arxiv_2018/fig/contextual_sequence.pdf docs/articles/arxiv_2018/fig/offline_bandit.pdf docs/articles/arxiv_2018/fig/section_2_3.pdf docs/articles/arxiv_2018/fig/section_3_2_1.pdf docs/articles/arxiv_2018/fig/section_3_2_2.pdf docs/articles/arxiv_2018/fig/section_4_2_plot.pdf docs/articles/arxiv_2018/fig/section_5_2.pdf docs/articles/arxiv_2018/fig/section_5_3.pdf docs/articles/arxiv_2018/fig/section_5_4.pdf docs/articles/arxiv_2018/fig/section_5_5.pdf docs/articles/arxiv_2018/fig/section_8_bar.pdf docs/articles/arxiv_2018/fig/section_8_plot.pdf
docs/articles/arxiv_2018/jss.aux
docs/articles/arxiv_2018/jss.bbl
docs/articles/arxiv_2018/jss.bst
docs/articles/arxiv_2018/jss.cls
docs/articles/arxiv_2018/jss.out
docs/articles/arxiv_2018/jss.pdf
docs/articles/arxiv_2018/jss.synctex.gz
docs/articles/arxiv_2018/jsslogo.jpg
docs/articles/bandit_algorithms_for_website_optimization.html
docs/articles/basic_epsilon_greedy.jpeg
docs/articles/basic_epsilon_greedy.jpg
docs/articles/basic_epsilon_greedy.png
docs/articles/carskit_depaul.jpeg
docs/articles/carskit_depaul.jpg
docs/articles/carskit_depaul.png
docs/articles/cmabs.html
docs/articles/cmabs.jpeg
docs/articles/cmabs.jpg
docs/articles/cmabs.png
docs/articles/cmabsoffline.html
docs/articles/compare.png
docs/articles/contextual-fig-1.jpg
docs/articles/contextual-fig-1.png
docs/articles/contextual-fig-2.jpg
docs/articles/contextual-fig-2.png
docs/articles/eckles_kaptein.html
docs/articles/eckles_kaptein_1.jpg
docs/articles/eckles_kaptein_1.png
docs/articles/eg_average_reward.jpeg
docs/articles/eg_average_reward.jpg
docs/articles/eg_average_reward.png
docs/articles/eg_cumulative_reward.jpeg
docs/articles/eg_cumulative_reward.jpg
docs/articles/eg_cumulative_reward.png
docs/articles/eg_incorrect.jpeg
docs/articles/eg_incorrect.jpg
docs/articles/eg_incorrect.png
docs/articles/eg_optimal_action.jpeg
docs/articles/eg_optimal_action.jpg
docs/articles/eg_optimal_action.png
docs/articles/epsilongreedy.html
docs/articles/index.html
docs/articles/introduction.html
docs/articles/linucboffline.jpeg
docs/articles/linucboffline.jpg
docs/articles/linucboffline.png
docs/articles/mabs.html
docs/articles/mabs.jpeg
docs/articles/mabs.jpg
docs/articles/mabs.png
docs/articles/ml10m.html
docs/articles/ml10m.jpg
docs/articles/ml10m.png
docs/articles/offline_depaul_movies.html
docs/articles/only_pkgdown/faq.html
docs/articles/replication-fig-1.jpg
docs/articles/replication-fig-1.png
docs/articles/replication-fig-2.jpg
docs/articles/replication-fig-2.png
docs/articles/replication.html
docs/articles/simpsons.html
docs/articles/softmax_average_reward.jpeg
docs/articles/softmax_average_reward.jpg
docs/articles/softmax_average_reward.png
docs/articles/softmax_cumulative_reward.jpeg
docs/articles/softmax_cumulative_reward.jpg
docs/articles/softmax_cumulative_reward.png
docs/articles/softmax_optimal_action.jpeg
docs/articles/softmax_optimal_action.jpg
docs/articles/softmax_optimal_action.png
docs/articles/sutton_barto.html
docs/articles/sutton_eg_1.jpeg
docs/articles/sutton_eg_1.jpg
docs/articles/sutton_eg_1.png
docs/articles/sutton_eg_2.jpeg
docs/articles/sutton_eg_2.jpg
docs/articles/sutton_eg_2.png
docs/articles/sutton_gradient.jpeg
docs/articles/sutton_gradient.jpg
docs/articles/sutton_gradient.png
docs/articles/sutton_optimistic.jpeg
docs/articles/sutton_optimistic.jpg
docs/articles/sutton_optimistic.png
docs/articles/sutton_ucb.jpeg
docs/articles/sutton_ucb.jpg
docs/articles/sutton_ucb.png
docs/articles/sutton_violin.jpeg
docs/articles/sutton_violin.jpg
docs/articles/sutton_violin.png
docs/articles/ucb_average_reward.jpeg
docs/articles/ucb_average_reward.jpg
docs/articles/ucb_average_reward.png
docs/articles/ucb_cumulative_reward.jpeg
docs/articles/ucb_cumulative_reward.jpg
docs/articles/ucb_cumulative_reward.png
docs/articles/ucb_optimal_action.jpeg
docs/articles/ucb_optimal_action.jpg
docs/articles/ucb_optimal_action.png
docs/articles/website_optimization.html
docs/authors.html
docs/docsearch.css
docs/docsearch.js
docs/docsearch.json
docs/favicon.ico
docs/index.html
docs/jquery.sticky-kit.min.js
docs/link.svg
docs/news/index.html
docs/pkgdown.css
docs/pkgdown.js
docs/pkgdown.yml
docs/reference/Agent.html
docs/reference/Bandit.html
docs/reference/BasicBernoulliBandit.html
docs/reference/BasicGaussianBandit.html
docs/reference/BootstrapTSPolicy.html
docs/reference/ContextualBasicBandit.html
docs/reference/ContextualBernoulliBandit.html
docs/reference/ContextualBernoulliPrecachingBandit.html
docs/reference/ContextualBinaryBandit.html
docs/reference/ContextualEpochGreedyPolicy.html
docs/reference/ContextualEpsilonGreedy.html
docs/reference/ContextualEpsilonGreedyPolicy.html
docs/reference/ContextualHybridBandit.html
docs/reference/ContextualLinTSPolicy.html
docs/reference/ContextualLinearBandit.html
docs/reference/ContextualLogitBTSPolicy.html
docs/reference/ContextualLogitBandit.html
docs/reference/ContextualPrecachingBandit.html
docs/reference/ContextualTSProbitPolicy.html
docs/reference/ContextualThompsonSamplingPolicy.html
docs/reference/ContextualWheelBandit.html
docs/reference/ContinuumBandit.html
docs/reference/EpsilonFirstPolicy-1.png
docs/reference/EpsilonFirstPolicy-2.png
docs/reference/EpsilonFirstPolicy.html
docs/reference/EpsilonGreedyPolicy-1.png
docs/reference/EpsilonGreedyPolicy-2.png
docs/reference/EpsilonGreedyPolicy.html
docs/reference/Exp3Policy-1.png
docs/reference/Exp3Policy-2.png
docs/reference/Exp3Policy.html
docs/reference/FixedPolicy.html
docs/reference/GittinsBrezziLaiPolicy.html
docs/reference/GlmUCBPolicy.html
docs/reference/GradientPolicy-1.png
docs/reference/GradientPolicy-2.png
docs/reference/GradientPolicy.html
docs/reference/History.html
docs/reference/LifPolicy.html
docs/reference/LinUCBDisjointOptimizedPolicy.html
docs/reference/LinUCBDisjointPolicy.html
docs/reference/LinUCBGeneralPolicy.html
docs/reference/LinUCBHybridOptimizedPolicy.html
docs/reference/LinUCBHybridPolicy.html
docs/reference/OfflineBootstrappedReplayBandit.html
docs/reference/OfflineDirectMethodBandit.html
docs/reference/OfflineDoublyRobustBandit.html
docs/reference/OfflineLookupReplayEvaluatorBandit-TODO-colon-Needs-to-be-documented-more-fully..html
docs/reference/OfflineLookupReplayEvaluatorBandit.html
docs/reference/OfflinePolicyEvaluatorBandit.html
docs/reference/OfflinePropensityWeightingBandit.html
docs/reference/OfflineReplayEvaluatorBandit.html
docs/reference/OraclePolicy.html
docs/reference/Plot.html
docs/reference/Policy.html
docs/reference/RandomPolicy-1.png
docs/reference/RandomPolicy.html
docs/reference/Simulator.html
docs/reference/SoftmaxPolicy-1.png
docs/reference/SoftmaxPolicy-2.png
docs/reference/SoftmaxPolicy.html
docs/reference/ThompsonSamplingPolicy-1.png
docs/reference/ThompsonSamplingPolicy.html
docs/reference/UCB1Policy-1.png
docs/reference/UCB1Policy-2.png
docs/reference/UCB1Policy.html
docs/reference/UCB2Policy-1.png
docs/reference/UCB2Policy-2.png
docs/reference/UCB2Policy.html
docs/reference/ci_boot.html
docs/reference/clip.html
docs/reference/clipr.html
docs/reference/data_table_factors_to_numeric.html
docs/reference/dec-set.html
docs/reference/figures/1simulator.jpeg
docs/reference/figures/2agent.jpeg
docs/reference/figures/3abandit.jpeg
docs/reference/figures/3bpolicy.jpeg
docs/reference/figures/3cbandit.jpeg
docs/reference/figures/3dpolicy.jpeg
docs/reference/figures/algoepsilonfirst.jpg
docs/reference/figures/cmab_all.jpeg
docs/reference/figures/cmab_all_large.jpg
docs/reference/figures/cmab_all_medium.jpg
docs/reference/formatted_difftime.html
docs/reference/get_arm_context.html
docs/reference/get_full_context.html
docs/reference/inc-set.html
docs/reference/ind.html
docs/reference/index.html
docs/reference/inv.html
docs/reference/inv_logit.html
docs/reference/invgamma-1.png
docs/reference/invgamma.html
docs/reference/invlogit.html
docs/reference/is_rstudio.html
docs/reference/max_in.html
docs/reference/mvrnorm.html
docs/reference/one_hot.html
docs/reference/ones_in_zeroes.html
docs/reference/plot.history.html
docs/reference/print.history.html
docs/reference/prob_winner.html
docs/reference/sample_one_of.html
docs/reference/set_external.html
docs/reference/sherman_morrisson.html
docs/reference/sim_post.html
docs/reference/sum_of.html
docs/reference/summary.history.html
docs/reference/value_remaining-1.png
docs/reference/value_remaining.html
docs/reference/var_welford.html
docs/reference/which_max_list.html
docs/reference/which_max_tied.html
man/Agent.Rd man/Bandit.Rd man/BasicBernoulliBandit.Rd man/BasicGaussianBandit.Rd man/BootstrapTSPolicy.Rd man/ContextualBernoulliBandit.Rd man/ContextualBinaryBandit.Rd man/ContextualEpochGreedyPolicy.Rd man/ContextualEpsilonGreedyPolicy.Rd man/ContextualHybridBandit.Rd man/ContextualLinTSPolicy.Rd man/ContextualLinearBandit.Rd man/ContextualLogitBTSPolicy.Rd man/ContextualLogitBandit.Rd man/ContextualPrecachingBandit.Rd man/ContextualTSProbitPolicy.Rd man/ContextualWheelBandit.Rd man/ContinuumBandit.Rd man/EpsilonFirstPolicy.Rd man/EpsilonGreedyPolicy.Rd man/Exp3Policy.Rd man/FixedPolicy.Rd man/GittinsBrezziLaiPolicy.Rd man/GradientPolicy.Rd man/History.Rd man/LifPolicy.Rd man/LinUCBDisjointOptimizedPolicy.Rd man/LinUCBDisjointPolicy.Rd man/LinUCBGeneralPolicy.Rd man/LinUCBHybridOptimizedPolicy.Rd man/LinUCBHybridPolicy.Rd man/OfflineBootstrappedReplayBandit.Rd man/OfflineDirectMethodBandit.Rd man/OfflineDoublyRobustBandit.Rd man/OfflineLookupReplayEvaluatorBandit.Rd man/OfflinePropensityWeightingBandit.Rd man/OfflineReplayEvaluatorBandit.Rd man/OraclePolicy.Rd man/Plot.Rd man/Policy.Rd man/RandomPolicy.Rd man/Simulator.Rd man/SoftmaxPolicy.Rd man/ThompsonSamplingPolicy.Rd man/UCB1Policy.Rd man/UCB2Policy.Rd man/clipr.Rd man/data_table_factors_to_numeric.Rd man/dec-set.Rd
man/figures/1simulator.jpeg
man/figures/2agent.jpeg
man/figures/3abandit.jpeg
man/figures/3bpolicy.jpeg
man/figures/3cbandit.jpeg
man/figures/3dpolicy.jpeg
man/figures/algoepsilonfirst.jpg
man/figures/cmab_all.jpeg
man/figures/cmab_all_large.jpg
man/figures/cmab_all_medium.jpg
man/formatted_difftime.Rd man/get_arm_context.Rd man/get_full_context.Rd man/get_global_seed.Rd man/inc-set.Rd man/ind.Rd man/inv.Rd man/invgamma.Rd man/invlogit.Rd man/is_rstudio.Rd man/mvrnorm.Rd man/one_hot.Rd man/ones_in_zeroes.Rd man/plot.history.Rd man/print.history.Rd man/prob_winner.Rd man/sample_one_of.Rd man/set_external.Rd man/set_global_seed.Rd man/sherman_morrisson.Rd man/sim_post.Rd man/sum_of.Rd man/summary.history.Rd man/value_remaining.Rd man/var_welford.Rd man/which_max_list.Rd man/which_max_tied.Rd
tests/figs/deps.txt
tests/figs/plot/arm-plot.svg
tests/figs/plot/arms-color.svg
tests/figs/plot/arms-lims.svg
tests/figs/plot/average-regret-plot.svg
tests/figs/plot/average-reward-plot.svg
tests/figs/plot/basic-cumulative-plot.svg
tests/figs/plot/color-and-lty-stepping.svg
tests/figs/plot/cumulative-sd-plot.svg
tests/figs/plot/cumulative-traces-plot.svg
tests/figs/plot/legend-title-and-labels-plot.svg
tests/figs/plot/limits-plot.svg
tests/figs/plot/lwd-pot.svg
tests/figs/plot/only-sd-plot.svg
tests/figs/plot/plot-inc-var-no-color.svg
tests/figs/plot/traces-alpha-and-max-plot.svg
tests/figs/plot/traces-plot-smooth.svg
tests/figs/plot/ylim-plot.svg
tests/testthat.R
tests/testthat/history_context_test.ref
tests/testthat/history_context_theta_test.ref
tests/testthat/history_test.ref
tests/testthat/history_theta_test.ref
tests/testthat/setup_tests.R tests/testthat/teardown_tests.R tests/testthat/test_agent.R tests/testthat/test_bandits.R tests/testthat/test_history.R tests/testthat/test_plot.R tests/testthat/test_policies.R tests/testthat/test_policy.R tests/testthat/test_utility_functions.R
vignettes/1.png
vignettes/Rplot.png
vignettes/basic_epsilon_greedy.png
vignettes/carskit_depaul.png
vignettes/cmabs.R vignettes/cmabs.Rmd
vignettes/cmabs.png
vignettes/cmabsoffline.R vignettes/cmabsoffline.Rmd
vignettes/compare.png
vignettes/contextual-fig-1.png
vignettes/contextual-fig-2.png
vignettes/eckles_kaptein.R vignettes/eckles_kaptein.Rmd
vignettes/eckles_kaptein_0.png
vignettes/eckles_kaptein_1.png
vignettes/eg_average_reward.png
vignettes/eg_cumulative_reward.png
vignettes/eg_incorrect.png
vignettes/eg_optimal_action.png
vignettes/epsilongreedy.R vignettes/epsilongreedy.Rmd vignettes/introduction.R vignettes/introduction.Rmd
vignettes/linucboffline.png
vignettes/mabs.Rmd
vignettes/mabs.png
vignettes/ml10m.R vignettes/ml10m.Rmd
vignettes/ml10m.png
vignettes/offline_depaul_movies.R vignettes/offline_depaul_movies.Rmd vignettes/only_pkgdown/faq.Rmd
vignettes/only_pkgdown/faq.html
vignettes/replication-fig-1.png
vignettes/replication-fig-2.png
vignettes/replication.R vignettes/replication.Rmd vignettes/simpsons.R vignettes/simpsons.Rmd
vignettes/softmax_average_reward.png
vignettes/softmax_cumulative_reward.png
vignettes/softmax_optimal_action.png
vignettes/sutton_barto.R vignettes/sutton_barto.Rmd
vignettes/sutton_eg_1.png
vignettes/sutton_eg_2.png
vignettes/sutton_gradient.png
vignettes/sutton_optimistic.png
vignettes/sutton_ucb.png
vignettes/sutton_violin.png
vignettes/ucb_average_reward.png
vignettes/ucb_cumulative_reward.png
vignettes/ucb_optimal_action.png
vignettes/website_optimization.R vignettes/website_optimization.Rmd
Nth-iteration-labs/contextual documentation built on Dec. 28, 2019, 8:17 p.m.