#' Function to compute a record's Maximal Matching Set (MMS) based on a single linkage structure
#'
#' @param lambda The linkage structure
#' @param record A vector of records
#' @return Computes a records MMS
#' @export
#' @examples
#' lambda <- matrix(c(1,1,2,2,3,3),ncol=3)
#' record <- c(1,10,3,5,20,2)
#' mms(lambda=lambda, record=record)
mms <- function(lambda, record){
lambda. <- drop(lambda)
return(which(lambda.==lambda.[record]))
}
#' Function to compute a record's MPMMS based on a Gibbs sampler. Note: It returns a list of the MPMMS ($mpmms) and its probability ($prob)
#'
#' @import stats
#' @import utils
#' @param lam.gs The gibbs sampler
#' @param record A specific record
#' @return Returns a list of the MPMSS and the associated probabilities.
#' @export
#' @examples
#' lam.gs <- matrix(c(1,1,2,2,3,3,5,6,4,3,4,5,3,2,4,1,2,3,4,2),ncol=20, nrow=4)
#' record <- c(1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3,1,3)
#' mpmms(lam.gs=lam.gs, record=record)
mpmms <- function(lam.gs,record){
G <- dim(lam.gs)[1]
mms.out <- NULL
num.mms <- 0
for(g in 1:G){
lambda.g <- drop(lam.gs[g,])
mms.g <- which(lambda.g==lambda.g[record])
mms.g.str <- paste(mms.g,collapse="-")
if(is.null(mms.out[as.character(mms.g.str)]) ||
is.na(mms.out[as.character(mms.g.str)])){
num.mms <- num.mms+1
mms.out <- append(mms.out,1)
names(mms.out)[num.mms] <- mms.g.str
}else{
mms.out[as.character(mms.g.str)] <-
mms.out[as.character(mms.g.str)]+1
}
}
mms.out <- mms.out/G
mpmms.info <- mms.out[which.max(mms.out)]
mpmms.out <- as.numeric(strsplit(names(mpmms.info),
split="-",fixed=TRUE)[[1]])
prob.out <- as.numeric(mpmms.info)
return(list(mpmms=mpmms.out,prob=prob.out))
}
#' Function that returns the shared MPMMS
#' (except with an easier condition to code than JASA paper).
#' Function to make a list of vectors of estimated links by "P(MPMMS)>0.5" method
#' Note: The default settings return only MPMMSs with multiple members.
#'
#' @import stats
#' @import utils
#' @param lam.gs The estimated linkage structure with a default of 10 iterations
#' @param include.singles Do not include the singleton records
#' @param show.as.multiple Always return MPMMSs that have more than one member
#' @return Returns the shared MPMMS
#' @export
#' @examples
#' lam.gs <- matrix(c(1,1,2,2,3,3,5,6,4,3,4,5,3,2,4,1,2,3,4,2),ncol=20, nrow=4)
#' links(lam.gs)
links <- function(lam.gs =lam.gs ,include.singles=FALSE,show.as.multiple=FALSE){
N <- dim(lam.gs)[2]
links.out <- list()
for(r in 1:N){
mpmms.list.r <- mpmms(lam.gs,r)
mpmms.r <- mpmms.list.r$mpmms
if(length(mpmms.r)>1 || include.singles){
prob.r <- mpmms.list.r$prob
if(prob.r>0.5){
if(r==min(mpmms.r) || show.as.multiple){
links.out[[length(links.out)+1]] <- mpmms.r
}
}else{
if(include.singles){links.out[[length(links.out)+1]] <- r}
}
}
}
return(links.out)
}
#' Function to take links list that may contain 3-way, 4-way, etc.
#' and reduce it to pairwise only (e.g., a 3-way link 12-45-78 is
#' changed to 2-way links: 12-45, 12-78, 45-78
#'
#' @import stats
#' @import utils
#' @param .links A vector of records that are linked to one another
#' @return Returns two ways links of records
#' @export
#' @examples
#' id <- c(1,2,3,4,5,1,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
#' lam.gs <- matrix(c(1,1,2,2,3,3,5,6,4,3,4,5,3,2,4,1,2,3,4,2),ncol=20, nrow=4)
#' est.links <- links(lam.gs)
#' est.links.pair <- pairwise(est.links)
pairwise <- function(.links){
links.out <- list()
# more efficient to use combn
for(l in 1:length(.links)){
link.l <- .links[[l]]
for(r1 in 1:(length(link.l)-1)){
for(r2 in (r1+1):length(link.l)){
links.out[[length(links.out)+1]] <- c(link.l[r1],link.l[r2])
}
}
}
return(links.out)
}
#' This function takes a set of pairwise links and identifies correct, incorrect,
#' and missing links
#' (correct = estimated and true, incorrect = estimated but not true,
#' missing = true but not estimated)
# TODO: consider using %in% to remove for loops?
#' @import stats
#' @import utils
#' @param est.links.pair The number of estimated links
#' @param true.links.pair The number of true links
#' @param counts.only State whether or not counts only is true or false
#' @return Gives a vector of the estimated and true links, estimated but not true links, and the true but not estimated links
#' @export
#' @examples
#' id <- c(1,2,3,4,5,1,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
#' lam.gs <- matrix(c(1,1,2,2,3,3,5,6,4,3,4,5,3,2,4,1,2,3,4,2),ncol=20, nrow=4)
#' est.links <- links(lam.gs)
#' true.links <- links(matrix(id,nrow=1))
#' est.links.pair <- pairwise(est.links)
#' links.compare(est.links.pair, true.links=id)
links.compare <- function(est.links.pair,true.links.pair,counts.only=TRUE){
correct.out <- list()
incorrect.out <- list()
missing.out <- list()
for(l in 1:length(est.links.pair)){
link.l <- est.links.pair[[l]]
found <- FALSE
for(t in 1:length(true.links.pair)){
if(all(link.l==true.links.pair[[t]])){
correct.out[[length(correct.out)+1]] <- link.l
found <- TRUE
}
}
if(!found){
incorrect.out[[length(incorrect.out)+1]] <- link.l
}
}
for(m in 1:length(true.links.pair)){
link.m <- true.links.pair[[m]]
missing <- TRUE
for(e in 1:length(est.links.pair)){
if(all(link.m==est.links.pair[[e]])){
missing <- FALSE
}
}
if(missing){
missing.out[[length(missing.out)+1]] <- link.m
}
}
if(counts.only){
return(list(correct=length(correct.out),incorrect=length(incorrect.out),
missing=length(missing.out)))
}else{
return(list(correct=correct.out,incorrect=incorrect.out,
missing=missing.out))
}
}
#' Check whether 2 records which are estimated to be linked have the same IDs
#'
#' @param recpair A record pair
#' @param identity_vector A vector of the unique ids
#' @return Whether or not two records which are estimated to be linked have the same unique ids
#' @export
#' @examples
#' id <- c(1,2,3,4,5,1,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
#' rec1 <- 6
#' rec2 <- 1
#' check_IDs(recpair=c(rec1,rec2),identity_vector=id)
check_IDs <- function(recpair, identity_vector) {
rec1 <- recpair[1]
rec2 <- recpair[2]
return(identity_vector[rec1] == identity_vector[rec2])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.