R/wide-to-long.R

Defines functions wide_to_long

Documented in wide_to_long

#'  arrange wide format to long format
#'
#'
#' @param data  A data frame
#' @param categories see example
#' @param single_id see example
#' @param sep separator
#'
#' @examples
#' # example 1
#' n <- 10
#' data <- data.frame(sample = paste0(rep("s",n),1:n),
#'                   w4__abc =rnorm(n,0,1),
#'                  w8__abc =rnorm(n,1,1),
#'                  bl__abc =rnorm(n,3,1),
#'                   w4__mc =rnorm(n,0,1),
#'                   bl__mc =rnorm(n,9,2),
#'                   bl__aaa =rnorm(n,0,1),
#'                   w8__aaa =rnorm(n,0,1),
#'                   w8__mc =rnorm(n,0,1),
#'                   sex =rnorm(n,0,1),
#'                   age =rnorm(n,0,1)
#'                   )
#' wide_to_long(data=data,
#'             categories=c("w4","w8","bl"),
#'             single_id="sample")
#' # example 2
#' n <- 10
#' data <- data.frame(sample = paste0(rep("s",n),1:n),
#'                   sex =rnorm(n,0,1),
#'                   age =rnorm(n,0,1)
#'                   )
#' wide_to_long(data=data,
#'             categories=c("w4","w8","bl"),
#'             single_id="sample")
#' # example 3
#' n <- 10
#' data <- data.frame(sample = paste0(rep("s",n),1:n),
#'                   w4_abc =rnorm(n,0,1),
#'                   w8_abc =rnorm(n,1,1),
#'                   bl_abc =rnorm(n,3,1),
#'                   w4_mc =rnorm(n,0,1),
#'                   bl_mc =rnorm(n,9,2),
#'                   bl_aaa =rnorm(n,0,1),
#'                   w8_aaa =rnorm(n,0,1),
#'                   w8_mc =rnorm(n,0,1)
#'                   )
#' wide_to_long(data=data,
#'             categories=c("w4","w8","bl"),
#'             single_id="sample",sep="_")





wide_to_long <- function(data,categories,single_id,sep = "__")
{
    # this function will use perl-style regex expression in r
    # basic pattern: "^(week4|week8|baseline",sep,".*)"
    pattern <- paste0("^(",paste0(categories,collapse = "|"),")",sep,"(.*)");
    data_name <- names(data);
    # step 1 divsingle_ide variables names into two parts,
    depend_time <- grep(pattern, data_name, value=TRUE,perl=TRUE);
    independ_time <-  setdiff(data_name,depend_time)
    #browser()
    dat_frame <- data[c(single_id)]
    if(length(depend_time))
    {
        denpend <- unique(sub(pattern,'\\2', depend_time))
        sam <- as.character(data[single_id][,1])
        # step 2 merge data by each variable
        dat_frame <- data.frame(sample =rep(sam,each=length(categories)),categories=rep(categories,length(sam)))
        colnames(dat_frame) <- c(single_id,"categories");
        for(den in denpend)
        {
            #browser()
            tmp_variable <- grep(paste0("^(",paste0(categories,collapse = "|"),")",sep,den),depend_time, perl=TRUE,value=TRUE);
            tmp_data <- data[c(single_id,tmp_variable)];
            names(tmp_data) <- c(single_id,sub(paste0('^(.*)',sep,'.*'),'\\1', tmp_variable))
            tmp_id <- sub(paste0('^(.*)',sep,'.*'),'\\1', tmp_variable);
            tmp_long <- reshape2::melt(tmp_data,
                                       id.vars=c(single_id),
                                       measure.vars=c(tmp_id),
                                       variable.name="categories",
                                       value.name=den);
            names(tmp_long) <- c(single_id,"categories",den)
            dat_frame <- base::merge(tmp_long,dat_frame,by=c(single_id,"categories"),all=TRUE)
        }
        tmp_data <- data[c(independ_time)]

        dat_frame <- merge(tmp_data,dat_frame,by=c(single_id));
        dat_frame <- dat_frame[c(c(single_id,"categories"), setdiff(names(dat_frame),c(single_id,"categories")))];
        return (dat_frame)
    }
    tmp_data <- data[c(independ_time)]
    return(tmp_data)
}
ShouyeLiu/metaboliteUtility documentation built on May 6, 2019, 9:07 a.m.