Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(FlexRL)
## -----------------------------------------------------------------------------
df2016 = read.csv("exA.csv", row.names = 1)
df2020 = read.csv("exB.csv", row.names = 1)
## -----------------------------------------------------------------------------
head(df2016)
## -----------------------------------------------------------------------------
head(df2020)
## -----------------------------------------------------------------------------
PIVs_config = list(
ANASCI = list(stable = TRUE),
SESSO = list(stable = TRUE),
STACIV = list(stable = TRUE),
STUDIO = list(stable = TRUE),
IREG = list(stable = TRUE)
)
PIVs = names(PIVs_config)
PIVs_stable = sapply(PIVs_config, function(x) x$stable)
## -----------------------------------------------------------------------------
for(i in 1:length(PIVs)){
intersect_support_piv = intersect( unique(df2016[,PIVs[i]]), unique(df2020[,PIVs[i]]) )
df2016 = df2016[df2016[,PIVs[i]] %in% c(NA,intersect_support_piv),]
df2020 = df2020[df2020[,PIVs[i]] %in% c(NA,intersect_support_piv),]
}
rownames(df2016) = 1:nrow(df2016)
rownames(df2020) = 1:nrow(df2020)
## -----------------------------------------------------------------------------
links = intersect(df2016$ID, df2020$ID)
Nlinks = length(links)
TrueDelta = data.frame( matrix(0, nrow=0, ncol=2) )
for (i in 1:Nlinks)
{
id = links[i]
id16 = which(df2016$ID == id)
id20 = which(df2020$ID == id)
TrueDelta = rbind(TrueDelta, cbind(rownames(df2016[id16,]),rownames(df2020[id20,])))
}
true_pairs = do.call(paste, c(TrueDelta, list(sep="_")))
## -----------------------------------------------------------------------------
nrow(df2016)
## -----------------------------------------------------------------------------
nrow(df2020)
## -----------------------------------------------------------------------------
Nlinks
## -----------------------------------------------------------------------------
colSums(df2016[TrueDelta[,1],PIVs] == df2020[TrueDelta[,2],PIVs]) / Nlinks
## -----------------------------------------------------------------------------
colSums(is.na(df2016[TrueDelta[,1],PIVs])) / Nlinks
## -----------------------------------------------------------------------------
colSums(is.na(df2020[TrueDelta[,2],PIVs])) / Nlinks
## -----------------------------------------------------------------------------
df2016$source = "df2016"
df2020$source = "df2020"
if(nrow(df2020)>nrow(df2016)){
encodedA = df2016
encodedB = df2020
cat("df2020 is the largest file, denoted encodedB")
}else{
encodedB = df2016
encodedA = df2020
cat("df2016 is the largest file, denoted encodedB")
}
levels_PIVs = lapply(PIVs, function(x) levels(factor(as.character(c(encodedA[,x], encodedB[,x])))))
for(i in 1:length(PIVs))
{
encodedA[,PIVs[i]] = as.numeric(factor(as.character(encodedA[,PIVs[i]]), levels=levels_PIVs[[i]]))
encodedB[,PIVs[i]] = as.numeric(factor(as.character(encodedB[,PIVs[i]]), levels=levels_PIVs[[i]]))
}
nvalues = sapply(levels_PIVs, length)
names(nvalues) = PIVs
encodedA[,PIVs][ is.na(encodedA[,PIVs]) ] = 0
encodedB[,PIVs][ is.na(encodedB[,PIVs]) ] = 0
## -----------------------------------------------------------------------------
nvalues
## -----------------------------------------------------------------------------
data = list( A = encodedA,
B = encodedB,
Nvalues = nvalues,
PIVs_config = PIVs_config,
controlOnMistakes = c(TRUE, TRUE, FALSE, FALSE, FALSE),
sameMistakes = TRUE,
phiMistakesAFixed = FALSE,
phiMistakesBFixed = FALSE,
phiForMistakesA = c(NA, NA, NA, NA, NA),
phiForMistakesB = c(NA, NA, NA, NA, NA)
)
fit = FlexRL::stEM( data = data,
StEMIter = 50,
StEMBurnin = 30,
GibbsIter = 100,
GibbsBurnin = 70,
musicOn = TRUE,
newDirectory = NULL,
saveInfoIter = FALSE
)
## -----------------------------------------------------------------------------
DeltaResult = fit$Delta
colnames(DeltaResult) = c("idx20","idx16","probaLink")
DeltaResult = DeltaResult[DeltaResult$probaLink>0.5,]
DeltaResult
## -----------------------------------------------------------------------------
results = data.frame( Results=matrix(NA, nrow=6, ncol=1) )
rownames(results) = c("tp","fp","fn","f1score","fdr","sens.")
if(nrow(DeltaResult)>1){
linked_pairs = do.call(paste, c(DeltaResult[,c("idx16","idx20")], list(sep="_")))
truepositive = length( intersect(linked_pairs, true_pairs) )
falsepositive = length( setdiff(linked_pairs, true_pairs) )
falsenegative = length( setdiff(true_pairs, linked_pairs) )
precision = truepositive / (truepositive + falsepositive)
fdr = 1 - precision
sensitivity = truepositive / (truepositive + falsenegative)
f1score = 2 * (precision * sensitivity) / (precision + sensitivity)
results[,"FlexRL"] = c(truepositive,falsepositive,falsenegative,f1score,fdr,sensitivity)
}
results
## -----------------------------------------------------------------------------
colSums(encodedA[DeltaResult[,"idx20"],PIVs] == encodedB[DeltaResult[,"idx16"],PIVs]) / nrow(DeltaResult)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.