# Calculates prior and likelihood used by Bayesian decoder
#
# Returns a list with fields
# prior: prior probability of each stimulus, length equal nstim.bins
# likelihood: 3D array: nstim.bins x ncells x nresponse.bins with a probability
# of a cell giving particular response for a given stimulus bin
create.discrete.bayes = function(training.df, nstim.bins=20*20, value.var=response_bin, stim.var=bin.xy) {
stim.var = enquo(stim.var)
value.var.name = quo_name(enquo(value.var))
# to matrix representation
response = reshape2::acast(training.df, cell_id ~ time_bin, value.var=value.var.name)
setorder(training.df, time_bin)
stimulus = training.df %>%
dplyr::select(time_bin, !! stim.var) %>%
dplyr::arrange(time_bin) %>%
dplyr::distinct(time_bin, .keep_all=TRUE) # keep one stimulus value per time bin
stimulus = stimulus[[2]]
occupancy.hist = hist(stimulus, breaks=0:nstim.bins, plot=FALSE)
Moccupancy = occupancy.hist$counts
visited = which(Moccupancy > 0)
nresponse.bins = max(training.df[[value.var.name]])
Moccupancy[visited] = Moccupancy[visited] + nresponse.bins
ncells = nrow(response)
M = array(0, dim=c(nstim.bins, ncells, nresponse.bins))
# Make unseen bins probability > 0 by adding 1 to each response_bin count
M[visited,,] = 1
for (time_bin_i in 1:length(stimulus)) {
for (cell_i in 1:ncells) {
response_bin = response[cell_i, time_bin_i];
M[stimulus[time_bin_i], cell_i, response_bin] = M[stimulus[time_bin_i], cell_i, response_bin] + 1
}
}
Mlikelihood = M / Moccupancy
colnames(Mlikelihood) = rownames(response)
return(list(prior=occupancy.hist$density,
likelihood=Mlikelihood))
}
create.mfr.bayes = function(training.df, nstim.bins=20*20, value.var=nevents, stim.var=bin.xy) {
stim.var = enquo(stim.var)
stim.var.name = quo_name(stim.var)
value.var = enquo(value.var)
value.var.name = quo_name(value.var)
training.df = dplyr::rename(training.df, s = !!stim.var)
sample.cell.df = training.df[, head(.SD, 1), by = time_bin]
occupancy.hist = hist(sample.cell.df$s, breaks=0:nstim.bins, plot=FALSE)
cell.ids = unique(training.df$cell_id)
full.xy.df = data.frame(s = rep(1:nstim.bins, length(cell.ids)),
nevents=NA,
cell_id = rep(cell.ids, each=nstim.bins))
# Add one event to each visited bin to avoid 0 probability of the bin
one.event.df = select(training.df, cell_id, s) %>%
distinct() %>%
mutate(nevents=1)
model.df = bind_rows(select(training.df, cell_id, s, !!value.var), full.xy.df, one.event.df)
mfr.matrix = reshape2::acast(model.df, cell_id ~ s, value.var=value.var.name, fun.aggregate=function(x) {mean(x, na.rm=TRUE)})
sd.matrix = reshape2::acast(model.df, cell_id ~ s, value.var=value.var.name, fun.aggregate=function(x) {sd(x, na.rm=TRUE)})
M = abind::abind(mfr.matrix, sd.matrix, along=3)
return(list(occupancy.counts=occupancy.hist$counts,
prior=occupancy.hist$density,
likelihood=M))
}
smooth.likelihoods = function(model.bayes, sigma=0.8) {
nstim = dim(model.bayes$likelihood)[1]
ncells = dim(model.bayes$likelihood)[2]
nresponses = dim(model.bayes$likelihood)[3]
likelihood.smooth = array(0, dim=dim(model.bayes$likelihood))
for (cell_i in 1:ncells) {
for (r in 1:nresponses) {
M = matrix(model.bayes$likelihood[, cell_i, r], ncol=xybins, nrow=xybins, byrow = TRUE)
M1 = gauss2dsmooth(M, lambda=sigma, nx=8, ny=8)
M1[which(is.na(M))] = NA
likelihood.smooth[, cell_i, r] = as.vector(t(M1))
}
# normalize to prob values
cell.norm = rowSums(likelihood.smooth[, cell_i, ])
likelihood.smooth[, cell_i, ] = likelihood.smooth[, cell_i,] / cell.norm
}
dimnames(likelihood.smooth) = dimnames(model.bayes$likelihood)
model.bayes$likelihood = likelihood.smooth
return(model.bayes)
}
# # Bayes rule (will assume P(s) uniform):
# # max_s [P(s|r)] ~ max_s P(r|s) * P(s)
# #
# # Assuming independent activity of the cells:
# # P(r|s) = II_i P(r_i|s)
# # pv - data frame with activation
# bayes.max.s = function(model.bayes, pv) {
# model.df = model.bayes$likelihood
#
# if (length(pv) == 0) {
# print('Error, pv empty')
# }
# #setkey(pv, cell_id, bin.response)
# max.s = -1
# max.s.prob = -1.0
# for (s in model.df[,unique(bin.xy)]) {
# #cells.probs = model.df[bin.xy==s,]
# #if (nrow(cells.probs) > 0) {
# # if a cell not present on the day, it will be excluded from probability calculation
# #probs = pv[model.df[bin.xy==s,], prob.r, on=c('cell_id', 'bin.response')]
# probs = model.df[bin.xy==s,][pv, prob.r, on=c('cell_id', 'bin.response')]
#
# if (length(probs) > 0) {
# # P(r|s) = II_i P(r_i|s)
# s.prob = cumprod(probs) %>% last
# s.prob = s.prob * model.bayes$prior[bin.xy==s, prob]
#
# if (s.prob > max.s.prob) {
# max.s.prob = s.prob
# max.s = s
# }
# }
# }
# return(list(s=max.s, prob=max.s.prob))
# }
#
#
# eval.testdata = function(test.df, model.bayes) {
# test.timestamps = test.df[,unique(time_bin)]
# ntimestamps = length(test.timestamps)
# error.norms = rep(0, ntimestamps)
# expected.s = data.frame(bin.x=error.norms, bin.y=error.norms)
# actual.s = data.frame(x=error.norms, y=error.norms)
#
# for (i in 1:length(test.timestamps)) {
# test.timestamp = test.timestamps[i]
# pv = test.df[time_bin==test.timestamp, .(cell_id, bin.response=response_bin)]
# expected.s.row = test.df[time_bin==test.timestamp, .(bin.x, bin.y)] %>% first
# expected.s[i,] = expected.s.row
# actual.s.res = bayes.max.s(model.bayes, pv)
# actual.s.bin = actual.s.res$s %>% from_1dim()
# actual.s[i, ] = actual.s.bin
#
# error.norms[i] = norm2(actual.s.bin$x - expected.s.row$bin.x, actual.s.bin$y - expected.s.row$bin.y)
# }
#
# results.df = cbind(expected.s, actual.s) %>%
# dplyr::rename(actual.x=x, actual.y=y)
# results.df$error = error.norms
# return(results.df)
# }
bin.distance.error = function(nstim.x, nstim.y) {
function(expected.xy, actual.xy) {
actual.s.bin = from_1dim(actual.xy, nstim.y)
expected.s.bin = from_1dim(expected.xy, nstim.y)
dist = norm2(actual.s.bin$x - expected.s.bin$x, actual.s.bin$y - expected.s.bin$y)
return(dist)
}
}
eval.testdata2 = function(test.df,
model.bayes,
predict.fun,
error.fun,
stim.var,
nstim.bins,
value.var) {
stim.var = enquo(stim.var)
stim.var.name = quo_name(stim.var)
value.var.name = quo_name(enquo(value.var))
actual.stim.var.name = paste0('actual.', stim.var.name)
test.response = reshape2::acast(test.df, cell_id ~ time_bin, value.var=value.var.name)
results.df = test.df %>%
mutate(prior = model.bayes$prior[test.df[[stim.var.name]]]) %>%
select(time_bin, !!stim.var, prior) %>%
arrange(time_bin) %>%
distinct()
results.df$error = 0
results.df[[actual.stim.var.name]] = 0
density.sum = matrix(0, nrow=nstim.bins,ncol=nstim.bins)
stimulus.counts = rep(0, nstim.bins)
for (i in 1:ncol(test.response)) {
pv = test.response[, i]
test.timestamp = as.integer(colnames(test.response)[i])
expected.s = results.df[[stim.var.name]][i]
actual.s.res = predict.fun(model.bayes$prior, model.bayes$likelihood, pv)
results.df[[actual.stim.var.name]][i] = actual.s.res$s
density = actual.s.res$density
density[is.na(density)] = 0
density.prob = density / sum(density)
density.sum[expected.s, ] = density.sum[expected.s, ] + density
stimulus.counts[expected.s] = stimulus.counts[expected.s] + 1
results.df$error[i] = error.fun(expected.s, actual.s.res$s)
}
stimulus.counts[which(stimulus.counts == 0)] = 1
return(list(df=results.df, density=density.sum / stimulus.counts))
}
random.prior.classifier = function(prior, likelihood, pv) {
r = runif(1)
prior[which(is.na(prior))] = 0
s = which(cumsum(prior) > r) %>% first
list(s=s, prob=prior[s], density=rep(1 / length(prior), length(prior)))
}
# Find index of the first timestamp with the value equal timestamps[ind].
# Finds leftmost index if inc==-1, and rightmost if inc==1.
# Requires the timestamps to be sorted.
find.first.timestamp = function(timestamps, ind, inc=1) {
while ((ind + inc > 0) &&
(ind + inc <= length(timestamps)) &&
(timestamps[ind] == timestamps[ind+inc])) {
ind = ind + inc
}
return(ind)
}
# Filter the two data frames keeping only rows with bin.xy sampled more than
# min.samples times.
filter.sampled = function(training.df, test.df, min.samples=20) {
bin.samples = training.df[, .(nsamples=length(unique(time_bin))), by=bin.xy]
train.filtered = bin.samples[training.df, on='bin.xy'][nsamples >= min.samples,]
test.filtered = bin.samples[test.df, on='bin.xy'][nsamples >= min.samples,]
list(train=training.df,
test=test.filtered)
}
# Train and evaluate the decoder
eval.decoder = function(binned.traces,
nstim.bins,
error.fun,
train.fun=create.discrete.bayes,
predict.fun=bayesmax,
stim.var=bin.xy,
value.var=response_bin,
training.split.fraction=0.8,
cv=TRUE,
min.samples=20,
filter.test.traces.fun=NULL) {
stim.var = enquo(stim.var)
value.var = enquo(value.var)
setorder(binned.traces, time_bin, cell_id)
ncv = ifelse(cv, (1.0 / (1.0 - training.split.fraction)) %>% floor %>% as.integer, 1)
nrows.training = floor(training.split.fraction * nrow(binned.traces))
nrows.testing = nrow(binned.traces) - nrows.training
result.df = data.frame()
binned.traces = data.table(binned.traces)
test.index.start = nrow(binned.traces)
for (i in 1:ncv) {
test.index.end = find.first.timestamp(binned.traces$time_bin, test.index.start, 1)
test.index.start = find.first.timestamp(binned.traces$time_bin, max(1, test.index.start - nrows.testing), -1)
test.indecies = test.index.start:test.index.end
filtered.dfs = filter.sampled(binned.traces[-test.indecies,],
binned.traces[test.indecies,],
min.samples=min.samples)
training.df = filtered.dfs$train
test.df = filtered.dfs$test
if (!is.null(filter.test.traces.fun)) {
test.df = filter.test.traces.fun(test.df)
}
if (nrow(test.df) == 0) {
warning('0 rows for testing after filtering with min samples')
} else {
model.bayes = train.fun(training.df,
nstim.bins=nstim.bins,
value.var=!!value.var,
stim.var=!!stim.var)
#smooth.model.bayes = smooth.likelihoods(model.bayes)
system.time(eval.res <- eval.testdata2(test.df,
model.bayes,
predict.fun=predict.fun,
stim.var=!!stim.var,
value.var=!!value.var,
nstim.bins=nstim.bins,
error.fun=error.fun))
partial.df = eval.res$df
random.classifier.res <- eval.testdata2(test.df,
model.bayes,
predict.fun=random.prior.classifier,
stim.var=!!stim.var,
value.var=!!value.var,
nstim.bins=nstim.bins,
error.fun=error.fun)
partial.df$random_error = random.classifier.res$df$error
partial.df$cv=i
result.df = bind_rows(result.df, partial.df)
}
}
return(result.df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.