# R/analyzeGibbs.R In blink: Record Linkage for Empirically Motivated Priors

```#' Function to compute a record's Maximal Matching Set (MMS) based on a single linkage structure
#'
#' @param lambda The linkage structure
#' @param record A vector of records
#' @return Computes a records MMS
#' @export

#' @examples
#' lambda <- matrix(c(1,1,2,2,3,3),ncol=3)
#' record <- c(1,10,3,5,20,2)
#' mms(lambda=lambda, record=record)

mms <- function(lambda, record){
lambda. <- drop(lambda)
return(which(lambda.==lambda.[record]))
}

#' Function to compute a record's MPMMS based on a Gibbs sampler. Note: It returns a list of the MPMMS (\$mpmms) and its probability (\$prob)
#'
#' @import stats
#' @import utils
#' @param lam.gs The gibbs sampler
#' @param record A specific record
#' @return Returns a list of the MPMSS and the associated probabilities.
#' @export

#' @examples
#' lam.gs <- matrix(c(1,1,2,2,3,3,5,6,4,3,4,5,3,2,4,1,2,3,4,2),ncol=20,  nrow=4)
#' record <- c(1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3)
#' mpmms(lam.gs=lam.gs, record=record)

mpmms <- function(lam.gs,record){
G <- dim(lam.gs)[1]
mms.out <- NULL
num.mms <- 0
for(g in 1:G){
lambda.g <- drop(lam.gs[g,])
mms.g <- which(lambda.g==lambda.g[record])
mms.g.str <- paste(mms.g,collapse="-")
if(is.null(mms.out[as.character(mms.g.str)]) ||
is.na(mms.out[as.character(mms.g.str)])){
num.mms <- num.mms+1
mms.out <- append(mms.out,1)
names(mms.out)[num.mms] <- mms.g.str
}else{
mms.out[as.character(mms.g.str)] <-
mms.out[as.character(mms.g.str)]+1
}
}
mms.out <- mms.out/G
mpmms.info <- mms.out[which.max(mms.out)]
mpmms.out <- as.numeric(strsplit(names(mpmms.info),
split="-",fixed=TRUE)[[1]])
prob.out <- as.numeric(mpmms.info)
return(list(mpmms=mpmms.out,prob=prob.out))
}

#' Function that returns the shared MPMMS
#' (except with an easier condition to code than JASA paper).
#'  Function to make a list of vectors of estimated links by "P(MPMMS)>0.5" method
#' Note: The default settings return only MPMMSs with multiple members.
#'
#' @import stats
#' @import utils
#' @param lam.gs The estimated linkage structure with a default of 10 iterations
#' @param include.singles Do not include the singleton records
#' @param show.as.multiple Always return MPMMSs that have more than one member
#' @return Returns the shared MPMMS
#' @export

#' @examples
#' lam.gs <- matrix(c(1,1,2,2,3,3,5,6,4,3,4,5,3,2,4,1,2,3,4,2),ncol=20,  nrow=4)

links <- function(lam.gs =lam.gs ,include.singles=FALSE,show.as.multiple=FALSE){
N <- dim(lam.gs)[2]
for(r in 1:N){
mpmms.list.r <- mpmms(lam.gs,r)
mpmms.r <- mpmms.list.r\$mpmms
if(length(mpmms.r)>1 || include.singles){
prob.r <- mpmms.list.r\$prob
if(prob.r>0.5){
if(r==min(mpmms.r) || show.as.multiple){
}
}else{
}
}
}
}

#' Function to take links list that may contain 3-way, 4-way, etc.
#' and reduce it to pairwise only (e.g., a 3-way link 12-45-78 is
#' changed to 2-way links: 12-45, 12-78, 45-78
#'
#' @import stats
#' @import utils
#' @param .links A vector of records that are linked to one another
#' @return Returns two ways links of records
#' @export

#' @examples
#' id <- c(1,2,3,4,5,1,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
#' lam.gs <- matrix(c(1,1,2,2,3,3,5,6,4,3,4,5,3,2,4,1,2,3,4,2),ncol=20, nrow=4)

# more efficient to use combn
}
}
}
}

#' This function takes a set of pairwise links and identifies correct, incorrect,
#' and missing links
#' (correct = estimated and true, incorrect = estimated but not true,
#' missing = true but not estimated)
# TODO: consider using %in% to remove for loops?
#' @import stats
#' @import utils
#' @param est.links.pair The number of estimated links
#' @param true.links.pair The number of true links
#' @param counts.only State whether or not counts only is true or false
#' @return Gives a vector of the estimated and true links, estimated but not true links, and the true but not estimated links
#' @export

#' @examples
#' id <- c(1,2,3,4,5,1,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
#' lam.gs <- matrix(c(1,1,2,2,3,3,5,6,4,3,4,5,3,2,4,1,2,3,4,2),ncol=20, nrow=4)

correct.out <- list()
incorrect.out <- list()
missing.out <- list()
found <- FALSE
found <- TRUE
}
}
if(!found){
}
}
missing <- TRUE
missing <- FALSE
}
}
if(missing){
}
}
if(counts.only){
return(list(correct=length(correct.out),incorrect=length(incorrect.out),
missing=length(missing.out)))
}else{
return(list(correct=correct.out,incorrect=incorrect.out,
missing=missing.out))
}
}

#' Check whether 2 records which are estimated to be linked have the same IDs
#'
#' @param recpair A record pair
#' @param identity_vector A vector of the unique ids
#' @return Whether or not two records which are estimated to be linked have the same unique ids
#' @export

#' @examples
#' id <- c(1,2,3,4,5,1,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
#' lam.gs <- matrix(c(1,1,2,2,3,3,5,6,4,3,4,5,3,2,4,1,2,3,4,2),ncol=20, nrow=4)
#' recpair <- est.links.pair[[1]]
#' check_IDs(recpair=recpair,identity_vector=id)

check_IDs <- function(recpair=recpair, identity_vector=identity_vector) {
rec1 <- recpair[1]
rec2 <- recpair[2]
return(identity_vector[rec1] == identity_vector[rec2])
}
```

## Try the blink package in your browser

Any scripts or data that you put into this service are public.

blink documentation built on Aug. 7, 2017, 9:03 a.m.