knitr::opts_chunk$set(echo = TRUE)

Libraries

library(tidyverse)
library(reshape2)
library(cowplot)
library(MASS)

RBF kernel

rbf_kernel <- function(a,b,sigma){
  k <- exp(-norm(a-b, type = "2")^2/(2*sigma**2))
  return(k)
}

Covariance matrix

# univariate case
# cov_matrix_univ <- function(x, sigma){
#   n <- length(x)
#   cov <- matrix(rep(0, n*n), nrow = n)
#   for(i in 1:n){
#     for(j in 1:n){
#       cov[i,j] <- rbf_kernel(x[i],x[j],sigma)
#     }
#   }
#   return(cov)
# }

# # multivariate case
# cov_matrix <- function(X, sigma){
#   n_rows <- dim(X)[1]
#   n_cols <- dim(X)[2]
#   cov <- matrix(rep(0, n_rows*n_cols), nrow = n_rows)
#   for(i in 1:n_rows){
#     for(j in 1:n_cols){
#       cov[i,j] <- rbf_kernel(X[,i],X[,j],sigma)
#     }
#   }
#   return(cov)
# }

# mtwo matrix input case
cov_matrix_1 <- function(X1,X2,sigma){
  if(is.null(dim(X1))){
    X1 <- t(as.matrix(X1))
  }
  if(is.null(dim(X2))){
    X2 <- t(as.matrix(X2))
  }
  n1 <- dim(X1)[2]
  n2 <- dim(X2)[2]
  cov <- matrix(rep(0, n1*n2), nrow = n1)
  print(dim(cov))
  for(i in 1:n1){
    for(j in 1:n2){
      cov[i,j] <- rbf_kernel(X1[,i],X2[,j],sigma)
    }
  }
  return(cov)
}

Example of an rbf kernel

x <- seq(-20,20, by = 1)
n <- length(x)
sigma <- cov_matrix_1(x,x,4)
df <- as.data.frame(cbind(as.vector(sigma), rep(x,n), rep(x,each=n)))
colnames(df) <- c("value","x","y")
df %>% 
  ggplot(aes(x=x,y=y))+
  geom_raster(aes(fill=value))

Generate 5 realizations of the process

n_samples <- 7
n <- length(x)
samples <- matrix(rep(0,n*n_samples), nrow = n, ncol = n_samples)
for(i in 1:7){
  samples[,i] <- mvrnorm(1, rep(0,n),sigma)
}
df <- as.data.frame(cbind(x,samples))
df_long <- melt(df,id = "x")

p1 <- df_long %>% 
  ggplot(aes(x,value,colour=variable))+
  geom_line()+
  geom_hline(yintercept = 0)+
  geom_hline(yintercept = 2)+
  geom_hline(yintercept = -2)+
  theme_minimal()
p1

Condition on known data points

x_train <- c(-4,10)
y_train <- c(1.5,-1)
# sigma_xx <- cov_matrix_1(x_train,x_train,3)
# sigma_xtxt <- cov_matrix_1(x,x,3)
# sigma_xxt <- cov_matrix_1(x_train,x,3)
# 
# sigma_new <- sigma_xtxt-t(sigma_xxt) %*% solve(sigma_xx) %*% sigma_xxt
# mu_new <- t(sigma_xxt) %*% solve(sigma_xx) %*% y_train

Gaussian posterior function

# Given observations x1, y1, calculate the posterior mean and covariance 
# for y2 based on the input x2.
gp_posterior <- function(x1,y1,x2,sigma){
  sigma_11 <- cov_matrix_1(x1,x1,sigma)
  sigma_12 <- cov_matrix_1(x1,x2,sigma)
  sigma_22 <- cov_matrix_1(x2,x2,sigma)
  dim(sigma_11)
  dim(sigma_12)
  dim(sigma_22)
  sigma2 <- sigma_22 - t(sigma_12) %*% solve(sigma_11) %*% sigma_12
  mu2 <- t(sigma_12) %*% solve(sigma_11) %*% y1
  return(list(mean=mu2,cov=sigma2))
}

Plot

mu_new <- gp_posterior(x_train,y_train,x,6)$mean
cov_new <- gp_posterior(x_train,y_train,x,6)$cov

for(i in 1:7){
  samples[,i] <- mvrnorm(1,mu_new,cov_new)
}
df <- as.data.frame(cbind(x,samples))
df_long <- melt(df,id = "x")

p2 <- df_long %>% 
  ggplot(aes(x,value, colour=variable))+
  geom_line()+
  geom_hline(yintercept = 0)+
  geom_hline(yintercept = 2)+
  geom_hline(yintercept = -2)+
  theme_minimal()
p2

Prior vs posterior

plot_grid(p1,p2)


andreabecsek/panda documentation built on Jan. 2, 2020, 1:56 p.m.