#' Combine Rows Into One
#'
#' Combine rows into one which are actually sole record.
#'
#' @author Yiying Wang, \email{wangy@@aetna.com}
#' @param df You can assign a data.frame yourself. If null, you will manually
#' select a spreadsheet and extract the data.
#' @param output.format xlsx or xls. Default xlsx.
#'
#' @return Nothing
#' @export
#' @importFrom gWidgets2 gfile gwindow gvbox ggroup gframe gradio gbutton gaction
#' @importFrom gWidgets2 addHandlerChanged
#' @importFrom compiler cmpfun
#' @importFrom readr read_csv
#' @importFrom stringi stri_conv
#' @importFrom stringr str_detect
#' @importFrom XLConnect loadWorkbook createSheet writeWorksheet saveWorkbook readWorksheetFromFile getSheets
#' @importFrom openxlsx read.xlsx write.xlsx createStyle
#'
#' @examples
#' \dontrun{
#' bindRows()
#' }
bindRows <- function(df = NULL, output.format = 'xlsx'){
# Sys.setlocale("LC_CTYPE","Chs") # Very important!
# sapply(c("dplyr","openxlsx","gWidgets2","gWidgets2RGtk2","compiler"),
# require,character.only=TRUE)
# options(guiToolkit="RGtk2")
# addRtoolsPath()
#----------read files--------------------
if (is.null(df)){
if (Sys.info()['sysname'] == "Windows"){
raw.file <- invisible(choose.files(
paste0(getOption("init.dir"), "*.*"), multi=FALSE,
caption="Select the raw data file...",
filters=rbind(matrix(c("Excel files (*.xls?)", "*.xls?;*.xls",
"csv files (*.csv)", "*.csv"),
byrow=TRUE, nrow=2),
Filters["All",])))
}else{
raw.file <- gfile("Select the raw data file...",type='open',
initial.dir=getOption("init.dir"),
filter=list('xls* files'=list(
patterns=c('*.xls?','*.xls')),
'csv files'=list(patterns=c('.csv'))),
multi=FALSE)
raw.file <- stri_conv(raw.file, "CP936", "UTF-8")
}
if (!file.exists(raw.file)) stop("No file designated!")
if (str_detect(raw.file, "[Xx][Ll][Ss]$|[Xx][Ll][Ss][XxMmBb]$")) {
sheets <- enc2native(getSheets(loadWorkbook(raw.file)))
dims <- as.data.frame(sapply(sheets, cmpfun(function(x) {
d <- try(dim(readWorksheetFromFile(raw.file,x)),
silent=TRUE)
if (is.null(d)) d <- c(0,0) else d
})))
dims <- sapply(names(dims), function(x) {
paste(c(x, paste(dims[,x], collapse=" ")), collapse=" ")
})
funSelSht <- function(file=raw.file, sheets=sheets, dims=dims){
window <- gwindow("Select the sheet", width=200, height=200)
box <- gvbox(cont=window)
addHandlerChanged(window, handler=function(...){
gtkMainQuit()
})
gg1 <- ggroup(cont=box)
gg2 <- ggroup(cont=box, horizontal = TRUE)
box1 <- gvbox(cont=gg1)
frm1 <- gframe("Sheet Name (nRow nCol):", cont=box1)
chkmap <- gradio(items=dims, selected=1, index=TRUE, cont=frm1)
box21 <- gvbox(cont=gg2)
box22 <- gvbox(cont=gg2)
actOK <- gaction(" OK ", "OK",
handler=function(h,...){
dsheet <<- enc2native(svalue(chkmap))
output <<- TRUE
dispose(window)
})
buttonOK <- gbutton(action=actOK, cont=box21)
actCancel <- gaction("Cancel", "Cancel",
handler=function(h,...){
dsheet <<- NULL
output <<- FALSE
dispose(window)
})
buttonCancel <- gbutton(action=actCancel, cont=box22)
gtkMain()
return(list(output, dsheet))
}
sheet.sel <- funSelSht(raw.file, sheets, dims)
if (!sheet.sel[[1]]) stop("You cancelled actions.")
sheet.sel <- which(sheets==names(dims)[dims==sheet.sel[[2]]])
dta <- readWorksheetFromFile(raw.file, sheet.sel)
}
if (str_detect(raw.file, "\\.[Cc][Ss][Vv]$"))
dta <- read_csv(raw.file)
}else{
dta <- df
}
#ID var----------
vars <- names(dta)
var <- .funSelVar(vars)
if (!var[[1]]) stop("You did not select any identifier!")
else var <- var[[2]]
#---------Combine rows-----------------
dt <- split(dta, dta[,var])
sn <- as.matrix(sapply(dt, function(x) nrow(x)))
snNoBind <- row.names(sn)[sn[, 1]==1]
snBind <- row.names(sn)[sn[, 1]>1]
.funBindMode <- function(col.name, dat){
d <- dat[!is.na(dat[, col.name]), col.name]
if (length(d)==0){
return(FALSE)
}else{
coerceNA <- try(as.numeric(d[! str_detect(enc2native(
d, "[\u62D2\u5F03\u9634\u9633\\<\\>\\+\\-]"))]), silent=TRUE)
if (length(! str_detect(d, enc2native("[\u62D2\u5F03\u9634\u9633\\<\\>\\+\\-]")))==0){
p <- 0
}else{
p <- sum(is.na(coerceNA)) / length(! str_detect(
d, enc2native("[\u62D2\u5F03\u9634\u9633\\<\\>\\+\\-]")))
}
output <- (p>=0.5 || str_detect(tolower(col.name), enc2native("\u603B\u7ED3|\u5C0F\u7ED3?"))) &&
! str_detect(tolower(col.name), enc2native(paste0(
"name|gender|sex|company|\u59D3\u540D|\u5355\u4F4D|\u90E8\u95E8|\u6027\u522B|",
"department|\u8BC1$|\u53F7$|\u5EA6$|\u8054\u7CFB|\u4E2D\u5FC3$|\u7C7B\u578B$|provider|",
"^\u5C3F|\u5EA6$|\u7EA7^|\u91CF$|\u6570$|\u503C$"))
)
return(output)
}
}
bind.all <- sapply(vars, cmpfun(.funBindMode), dat=dta)
funBindRows <- function(x, bind.all){
if (is.null(dim(x))){
v <- x
}else{
v <- do.call('paste', c(as.data.frame(t(x)), sep="%&%"))
v <- str_replace_all(v, "^%&%|NA%&%|%&%NA|%&%$","")
v[bind.all] <- str_replace_all(v[bind.all], "%&%", ";")
v[!bind.all] <- str_replace_all(v[!bind.all], "^(.*?)%&%.*$", "\\1")
v[str_detect(v, "^$|^NA$")] <- NA
}
return(v)
}
after.bind <- as.data.frame(t(sapply(dt[snBind], cmpfun(funBindRows),
bind.all=bind.all)))
if (ncol(after.bind)>0) {
names(after.bind) <- vars
output <- rbind(do.call('rbind', dt[snNoBind]), after.bind)
}else{
output <- dt[[snNoBind]]
}
raw.path <- str_replace_all(raw.file[1], "^(.+\\\\)[^\\]+\\.[Xx][Ll][Ss].{0,1}$",
"\\1")
if (! str_detect(raw.path, ".+\\\\$")) raw.path <- paste0(raw.path,"\\\\")
if (output.format=='csv'){
write.csv(output,paste0(raw.path,"bind_rows.csv"), na="")
}else if (output.format=='xlsx'){
write.xlsx(output,file=paste0(raw.path,"bind_rows.xlsx"),
sheetName="Sheet1",
headerStyle=createStyle(
fgFill="#E8E8E8",
fontName='Arial Narrow')
)
}
return(paste0("The cleaned dataset 'bind_rows.", output.format,
"' is in the folder ", raw.path))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.