R/util.R

Defines functions DATE cc len tty.width write.xls getSDIR getSampleFromFileName markerStringToPredicate markerStringToSelectRE splitByFov getAllCombos logParams initializeProject projectFileName addNegatives removeNegativeMarkers getMarkerConfig

Documented in getAllCombos getMarkerConfig getSampleFromFileName initializeProject logParams markerStringToPredicate projectFileName splitByFov

DATE <- function() {gsub("-","",Sys.Date())}

cc <- function(...) {paste(...,sep='_')}

len <- function(x) {length(x)}

tty.width <- function() {
   con=pipe("~/bin/getTTYWidth")
   dat=readLines(con,n=1)
   close(con)
   width=as.numeric(dat)
   if(len(width)>0 && width>80) {
     return(width)
   } else {
     return(80)
   }
}

write.xls <- function(dd,filename,row.names=T,col.names=NA,na="NA",append=F) {
  if (!is.data.frame(dd)) {
    dd <- data.frame(dd,check.names=F)
  }
  if(!row.names) {
    col.names=T
  }
  write.table(dd,file=filename,sep="\t",quote=FALSE,
              col.names=col.names,row.names=row.names,na=na,append=append)
}

getSDIR<-function(){
    args=commandArgs(trailing=F)
    TAG="--file="
    path_idx=grep(TAG,args)
    SDIR=dirname(substr(args[path_idx],nchar(TAG)+1,nchar(args[path_idx])))
    if(len(SDIR)==0) {
        return(getwd())
    } else {
        return(SDIR)
    }
}




#' Extract sample name from file name
#' 
#' Assuming sample name does not include underscores,
#' return everything preceding the first underscore 
#' 
#' @param  filePath  file, which may or may not include full path
getSampleFromFileName <- function(filePath){
  gsub("_.*","",basename(filePath))
}

#' Generate string for selecting markers based on positive
#' or negative value in data
#' 
#' 
markerStringToPredicate<-function(ss) {
    mm=strsplit(ss,",")[[1]]
    mm_neg=gsub("-","",mm[grepl("-$",mm)])
    mm_pos=mm[!grepl("-$",mm)]
    if(len(mm_pos) > 0 & len(mm_neg) > 0) {
        paste(
            paste(mm_pos,"== 1"),
            paste(mm_neg,"== 0"),
            sep=" & ",collapse=" & "
        )
    } else if(len(mm_pos) == 0 & len(mm_neg) > 0){
        paste(mm_neg," == 0", collapse=" & ")
    } else {
        paste(
            paste(mm_pos,"== 1"),
            sep=" & ",collapse=" & "
        )
    }
}

markerStringToSelectRE<-function(ss){
    mm=strsplit(gsub("-","",ss),",")[[1]]
    paste0("(",paste(mm,collapse="|"),")")
}

#' Split data in *.rda file by FOV (SPOT)
#' 
#' Take in *.rda file containing data for multiple
#' FOV and output one *.rda file for each one. Input
#' tibble must have FOV column named "SPOT". Output
#' file will be named "FOV_[$SPOT].rda"
#' 
#' @param dat      input tibble/data.frame
#' @param outDir   output directory
#' @export
splitByFov <- function(dat,outDir){
    for(i in levels(as.factor(dat$SPOT))){
        fname <- file.path(outDir,paste0("FOV_",i))
        fov <- filter(dat, SPOT==i)
        saveRDS(fov, paste0(fname,".rda"))
    }
}

#' Get all possible combinations of markers
#'
#' Given a vector of single markers, return a list
#' of all possible combinations of those markers,
#' including both positive and negative variations
#' 
#' @param markers  vector of single markers
#' @return  list of all combinations
#' @export
getAllCombos <- function(markers){
    all.combos = list()
    ## get all combinations of markers
    for(x in 1:length(markers)){
        ## get all combos of x
        combos = combn(markers,x,simplify=FALSE)
        all.combos = c(all.combos, combos)
    }

    for(c in 1:length(all.combos)){
        combo = all.combos[[c]]
        if(length(combo) < length(markers)){
            all.combos[[c]] <- c(combo,paste0(setdiff(markers,combo),"-"))
        }
        all.combos[[c]] <- paste0(all.combos[[c]],collapse=",")
    }
    return(all.combos)
}


#' Log project parameters
#' 
#' Log all parameters set using either command line arguments
#' or manifest file
#' 
#' @param project_params  list of project parameters and values 
#'                        generated by projectParams()
#' @param action          action being carried out for the project (e.g., 
#'                        "generating counts" or "making pie charts")
#' @export
logParams <- function(project_params,action){
    x <- project_params
    write(date(),file=x$log)
    write(paste0("\n",action," with params:\n"),file=x$log,append=TRUE)
    for(n in names(x)){
        #val <- paste(n," = ",paste(x[[n]],collapse=","))
        if(is.list(x[[n]])){
            #val <- paste(names(x[[n]]), paste(x[[n]],collapse="+"), sep=":")
            tmp <- x[[n]]
            val <- c()
            for(v in names(tmp)){
                val <- c(val, paste(v,paste(tmp[[v]],collapse="+"),sep=":"))
            }
            val <- paste(val, collapse=";;")
        } else {
            val <- paste(x[[n]],collapse=",")
        }
        val <- paste0(n, "\t", val)
        write(val, file=x$log, append=TRUE)
    }
    write("\n#######################################################################\n",file=x$log,append=TRUE)
}

#' Build list of project parameters and values by reading
#' project manifest
#' 
#' Given a tab-delimited file of key-value pairs, read
#' them into list. File may contain comments both on top and in columns
#' beyond the second, but will be ignored here
#'
#' TO DO: Add to description list of required and optional keys in file
#' 
#' @param    file    manifest file to be read
#' @param    type    type of project being run ["counts"|"pie_charts"|"spatial_plots"];
#'                   default="counts"
#' @return   a list of all project parameters
#' @export
initializeProject <- function(file,type="counts"){
    ## set defaults for all projects
    pp <- list( log=NULL, 
                debug=FALSE,
                pad=20)

    countsPP <- list( markers=NULL,
                      data_dir=NULL,
                      fov=NULL,
                      alt_bases=c(),
                      run_counts=TRUE,
                      run_frac_total=TRUE,
                      run_medians=TRUE,
                      counts_rda_file=NULL,
                      counts_xlsx_file=NULL,
                      write_xlsx_file=TRUE,   
                      save_rds_file=TRUE )

    piePP <- list( pie_charts=TRUE, 
                   counts_rda_file=NULL,
                   custom_colors=FALSE,
                   pdf_pie_charts_by_sample=NULL,
                   other_threshold=0,
                   draw_legend=TRUE,
                   cell_type_markers=NULL )

    spatialPP <- list( data_file=NULL,
                       cell_types_file=NULL,
                       cell_type_name=NULL,
                       annotations_dir=NULL,
                       func_marker=NULL,
                       write_csv_files=TRUE,
                       sample_color=NULL,
                       sample_colors_d=NULL,
                       sort_by_marker="CD3",
                       fov_bb=list(X0=1,X1=5363,Y0=-3343,Y1=1),
                       plot_bb=list(X0=-499,X1=5363,Y0=-3343,Y1=500)
                      )

    if(type == "counts"){
        pp <- c(pp, countsPP)
    } else if(type == "pie_charts"){
        pp <- c(pp, piePP)
    } else if(type == "spatial_plots"){
        pp <- c(pp, spatialPP)
    } else {
        flog.warn("Unrecognized project type '%s'. Not setting ANY defaults. Reading strictly from manifest.",type) 
    }

    ## process manifest
    pp <- tryCatch({
        pp <- read_yaml(file)
        flog.debug("Read YAML file. WARNING: No defaults were set. Relying completely on manifest file for ALL settings.")
        pp
        #warning("No defaults were set. Relying completely on manifest file for ALL settings.")
      }, error = function(e){
        flog.debug("File not in YAML format. Reading manifest in tab-delimited format") 
        print(e)
        pp
    }, finally = {
        pp
    })

    ## set log file 
    if(is.null(pp$log)){
        if(!is.null(pp$data_dir)){
            df <- file.path(pp$data_dir,dir(pp$data_dir)[grep("\\.rda$",dir(pp$data_dir))]) 
            pp$log <- projectFileName(pp$markers,df,pp$pad,"log")
        } else if(!is.null(pp$data_file)){
            pp$log <- gsub("\\.rda",".log",pp$data_file)
        } else {
            pp$log <- projectFileName(pp$markers,c("_"),pp$pad,"log")
        }
    }

    ## set up logger
    flog.threshold(DEBUG)
    if(!pp$debug){ flog.threshold(INFO) }
    flog.appender(appender.file(pp$log))

    return(pp)
}

#' Generate name of output file for marker stats based on 
#' input file names
#' 
#' Use name of marker file and either a single data file name
#' or the directory of data files to generate output file name
#'
#' @param markerFile    File containing list of marker names
#' @param dataFiles     vector of data file(s)
#' @param pad           amount that will be trimmed from FOV
#' @param type          file type ("rda","txt","xlsx"); essentially
#'                      the file extension
#' @return  file name to be used for saving counts
#' @export
projectFileName <- function(markerFile,dataFiles,pad,type){
    if(is.null(markerFile) || is.null(dataFiles)){
        outFile <- paste0("halo_",format(Sys.Date(), format="%Y%m%d"))
    } else if(length(dataFiles)==1) {
        outFile <- paste("markerTable",
            gsub("_MegaTableV2.rda","",basename(dataFiles[1])),
            file_path_sans_ext(basename(markerFile)),sep="_")
        if(pad > 0){
            outFile <- paste0(outFile, "__PAD_",pad)
        }
    } else {
        outFile <- paste("markerTable",
            basename(dirname(dataFiles[1])),
            substr(digest(sort(dataFiles)),1,8),"__",
            file_path_sans_ext(basename(markerFile)),sep="_")
    }
    if(pad > 0){
        outFile <- paste0(outFile,"__PAD_",pad)
    }
    outFile <- paste0(outFile,".",type)
    return(outFile)
}


addNegatives <- function(cellType, allMarkers){
    tmp <- gsub("-", "", unlist(strsplit(cellType, ",")))
    if(length(allMarkers[-which(allMarkers %in% tmp)]) > 0){
        negs <- paste0(allMarkers[-which(allMarkers %in% tmp)],"-",collapse=",")
        cellType <- paste(cellType,negs,sep=",")
    } 
    return(cellType)
}

removeNegativeMarkers <- function(marker){
    tmp <- unlist(strsplit(marker,","))
    if(any(grepl("-",tmp))){
        tmp <- tmp[-grep("-",tmp)]
    }
    if(length(tmp) > 0){
        return(paste(tmp,collapse=","))
    } else {
        return("Negative")
    }
}

#' Parse marker config 
#' 
#' Given a marker configuration file in yaml format, parse 
#' data into a tibble
#'
#' @param configFile   configuration file in yaml format
#' @return a tibble containing all configuration information
#' @export
getMarkerConfig <- function(configFile, plotConfigFile){

    config <- read_yaml(configFile)
    plotCfg <- read_yaml(plotConfigFile)
    cellIDs <- NULL

    if("cell_identity_markers" %in% names(config)){
        cellIDs <- config$cell_identity_markers
    }

    allMarkers <- data.frame(MarkerSet=NA,MarkerSetAlias=NA,Population=NA,CellType=NA,Label=NA,Color="gray")

    sets <- config$marker_sets

    for(nms in names(sets)){
        ms <- sets[[nms]]
        remPop <- FALSE   ## remove population markers from labels
        remNeg <- FALSE   ## remove negative markers from labels
        functional <- c() 

        msConfig <- names(ms)
 
        ## determine how to set up cell type labels
        if("cell_type_labels" %in% msConfig){
            if("remove_negative_markers" %in% names(ms$cell_type_labels)){
                remNeg <- ms$cell_type_labels$remove_negative_markers
            }
            if("remove_population_markers" %in% names(ms$cell_type_labels)){
                remPop <- ms$cell_type_labels$remove_population_markers
            }
        }

        ## make functional markers mutually exclusive by adding negatives
        ## for markers that are part of the entire set but not part of
        ## each individual marker combo 
        if("functional" %in% msConfig){
            if(!("functional_negatives" %in% msConfig) || ms$functional_negatives){ 
                if(length(ms$functional) == 1){
                    functional <- c(ms$functional, paste0(ms$functional,"-"))
                } else {
                    allFunc <- unique(gsub("-","",unlist(strsplit(ms$functional,","))))
                    for(f in seq(ms$functional)){
                        functional[f] <- addNegatives(ms$functional[f],allFunc)
                    }
                    funcNeg <- paste0(paste0(allFunc,"-"),collapse=",")
                    functional <- c(functional, funcNeg)
                }  
            } else {
                functional <- ms$functional
            }
            if("keep_population_markers" %in% msConfig && ms$keep_population_markers){
                functional <- c(functional,ms$populations)
            }
        }

        ## if exists, get alias for marker set to be used in summary plot titles
        msAlias <- nms
        if("marker_set_alias" %in% msConfig){
            msAlias <- ms$marker_set_alias
        }

        ## generate flattened tibble of all marker information needed for plotting
        for(population in ms$populations){
            pm <- population
            names(pm) <- nms
            if("identity_negatives" %in% msConfig){
                if(ms$identity_negatives == "BOTH"){
                    pm <- c(pm, addNegatives(pm, cellIDs))
                    names(pm)[length(pm)] <- paste0(nms,"_with_negatives")
                } else if(ms$identity_negatives){
                    pm <- addNegatives(pm, cellIDs)
                    names(pm) <- nms 
                }
            }
            for(x in seq(pm)){
                p <- pm[x]
                clr <- "gray"
                if(length(functional) > 0){
                    for(f in functional){
                        clr <- "gray"
                        if(!f %in% ms$populations){
                            m <- paste(p,f,sep=",")
                        } else {
                            m <- p
                        }
                        lbl <- m
                        if(remPop & !f %in% ms$populations){
                            lbl <- f
                        }
                        if(remNeg){
                            lbl <- removeNegativeMarkers(lbl)
                        }
                        if("cell_type_labels" %in% msConfig){
                            if("special_labels" %in% names(ms$cell_type_labels)){
                                if(lbl %in% names(ms$cell_type_labels$special_labels)){
                                    lbl <- ms$cell_type_labels$special_labels[[lbl]]
                                }
                            }
                        }
                        if(is.null(lbl) | nchar(lbl) == 0){
                            lbl <- "Negative"
                        }
                        if(lbl %in% names(plotCfg$plot_options$marker_colors)){
                            clr <- plotCfg$plot_options$marker_colors[[lbl]]
                        }
                        allMarkers <- suppressWarnings(bind_rows(allMarkers, list(MarkerSet=names(pm)[x], 
                                       MarkerSetAlias=msAlias, Population=population, CellType=m, Label=lbl, Color=clr)))
                    }
                } else {
                    lbl <- p
                    if(remNeg){
                       lbl <- removeNegativeMarkers(lbl)
                    }
                    if(lbl %in% names(plotCfg$plot_options$marker_colors)){
                       clr <- plotCfg$plot_options$marker_colors[[lbl]]
                    }
                    if(is.null(lbl) | nchar(lbl) == 0){
                        lbl <- "Negative"
                    }
                    allMarkers <- suppressWarnings(bind_rows(allMarkers, list(MarkerSet=names(pm)[x], 
                                   MarkerSetAlias=msAlias, Population=population, CellType=p, Label=lbl, Color=clr)))
                }
            }
        }
    }

    return(as.tibble(allMarkers %>% filter(complete.cases(.))))
    
}
caitlinjones/halo documentation built on May 7, 2019, 8 a.m.