R/match_and_merge.R

#' Add together two numbers
#'
#' @param x A number
#' @param y A number
#' @return The sum of \code{x} and \code{y}
#' @export
#'
mergeDuplicated<-function(table, rowIdName, configExactMatch=NULL,configAproxMatch=NULL){


  table$unique_code<-seq.int(nrow(table))
  for(match in configExactMatch){
    print(paste('Match using: ',paste(match, collapse = ', '),sep=''))
    matched<-dplyr::inner_join(table,table,by=match)
    matched<-matched[complete.cases(matched[,match]),]
    matched$unique_code<-apply(matched[,c('unique_code.x','unique_code.y')], 1, function(x) max(x))
    matched<-matched[,c(paste0(rowIdName,'.x'),'unique_code')]
    names(matched)<-c(rowIdName,'unique_code')
    matched<-aggregate(formula(paste0(rowIdName,'~ unique_code')),data = matched,FUN = max)
    table<-dplyr::left_join(table,matched,by=c(rowIdName),all.x=T)
    table$unique_code.y[is.na(table$unique_code.y)]<-0
    table$unique_code<-apply(table[,c('unique_code.x','unique_code.y')], 1, function(x) max(x))
    table$unique_code.x<-NULL
    table$unique_code.y<-NULL
    rm(matched)
  }

  for(match in configAproxMatch){
    print(paste('Match using: ',paste(match[[1]], collapse = ', '),
                ' (Fuzzy ',paste(match[[2]], collapse = ', '),')',sep=''))
    listadoCruce<-match[[1]]
    varAprox<-match[[2]]
    precisionAlta<-match[[3]]
    matched<-private_multiCruce(table,table,rowIdName,rowIdName,
                               listadoCruce ,varAprox,precisionAlta = precisionAlta,invertir = T)
    matched$unique_code<-apply(matched[,c('unique_code.x','unique_code.y')], 1, function(x) max(x))
    matched<-matched[,c('unique_code','unique_code.x')]
    names(matched)<-c('newidUC','unique_code')
    matched<-unique(matched[,c('newidUC','unique_code')])
    table<-dplyr::left_join(table,matched,by='unique_code')
    table[!is.na(table$newidUC),'unique_code']<-table[!is.na(table$newidUC),'newidUC']
    table$newidUC<-NULL
  }
  return(table)
}


#' Add together two numbers
#'
#' @param x A number
#' @param y A number
#' @return The sum of \code{x} and \code{y}
#' @export
#'
matchDuplicated<-function(table_1,table_2, IdName_1,IdName_2, configExactMatch=NULL,configAproxMatch=NULL){

  return_table<-NULL
  for(match in configExactMatch){
    print(paste('Match using: ',paste(match, collapse = ', '),sep=''))
    matched<-merge(table_1,table_2,by=match)
    matched<-matched[complete.cases(matched[,match]),]
    return_table<-rbind(return_table,matched[,c(IdName_1,IdName_2)])
    rm(matched)
    table_1<-table_1[!table_1[,c(IdName_1)] %in% return_table[,c(IdName_1)],]
    table_2<-table_2[!table_2[,c(IdName_2)] %in% return_table[,c(IdName_2)],]
  }

  for(match in configAproxMatch){
    print(paste('Match using: ',paste(match[[1]], collapse = ', '),
                ' (Fuzzy por ',paste(match[[2]], collapse = ', '),')',sep=''))
    listadoCruce<-match[[1]]
    varAprox<-match[[2]]
    precisionAlta<-match[[3]]
    matched<-private_multiCruce(table_1,table_2,IdName_1,IdName_2,listadoCruce ,
                               varAprox,precisionAlta = precisionAlta,invertir = T)
    matched<-unique(matched[,c(IdName_1,IdName_2)])

    return_table<-rbind(return_table,matched[,c(IdName_1,IdName_2)])
    table_1<-table_1[!table_1[,c(IdName_1)] %in% return_table[,c(IdName_1)],]
    table_2<-table_2[!table_2[,c(IdName_2)] %in% return_table[,c(IdName_2)],]
  }





  return_table<-return_table[complete.cases(return_table),]
  return(return_table)
}
albertferre/match-mergeR documentation built on May 10, 2019, 8:51 a.m.