barzinePhdR: Code developed while and for my PhD thesis at the EMBL-EBI

Documented in colSelect colSelect2 createDFhXg crudeStrip cutoff df.cutoff df.cutoff2 df.fill df.Strip df.StripLine excludeRows filtre.dataset strip

# Filtering on selection ------------------------------------------------------------
#' Selection on a data.frame to keep or filter out specified columns only
#'
#' @param DF data.frame to be filtered
#' @param IN column names to keep
#' @param OUT column names to filter out
#' @param ... other paramaters (for base::grep)
#' @param type pick the object to return;
#'             'DF' sends back the filtered dataframe
#'             'names' sends back the names (with the correct order) of the columns that are kept
#'
#' @return a dataframe or an ordered vector of the kept column names
#' @export
#'
colSelect<-function(DF,IN,OUT,...,type='DF'){

  colNis<-function(DF,pattern,...){
    return(colnames(DF)[grep(pattern,colnames(DF),...)])
  }

  colNisnot<-function(DF,pattern,...){
    return(colnames(DF)[-grep(pattern,colnames(DF),...)])
  }

  index<-rep(FALSE,ncol(DF))
  res<-unique(base::Reduce(c,Map(function(x) {grep(x,colnames(DF),...)} ,IN)))
  index[res]<-TRUE
  toRemove<-base::Reduce(c,Map(function(x) {grep(x,colnames(DF),...)},OUT))
  index[toRemove]<-FALSE
  switch(type,
         'names'={return(colnames(DF)[index])},
         'DF'   ={   new.DF<-DF[,index]
         return(new.DF)
         },
         #  'index'= #default (for the switch,function default: DF)
         {return(index)}
  )

}

#' Selection on a data.frame to keep the specified columns only
#'
#' @param DF data.frame to filter
#' @param IN names of the columns to be kept
#' @param ... other parameters (for base::grep)
#'
#' @return a filtered dataframe
#' @export
#'
colSelect2<-function(DF,IN,...){
  res<-base::Reduce(cbind, lapply(IN,function(x) DF[,grep(x,colnames(DF),...)]))
  res<-res[,unique(colnames(res))]
}

#' Keep only the specified rows of a dataframe
#'
#' @param DF data.frame to be filtered
#' @param filter vector of the index names of the rows to keep
#'
#' @return data.frame where the rows have been specified
#' @export
#'
filtre.dataset<-function(DF,filter){
  return(subDF<-DF[rownames(DF) %in% filter,]) #assignation eliminates the NA (?)
}


# Filtering based on exclusion -------------------------------
#' Create a data.frame for which specific rows have been excluded
#'
#' @param DF data.frame
#' @param rowlist names of the rows to be excluded
#'
#' @return filtered data.frame
#' @export
#'
excludeRows<-function(DF,rowlist){
  DF[!(rownames(DF) %in% rowlist),]
}

# Filtering based on numeric threshold ---------------------------------------
#' Test cell values in a data.frame for equality or higher value than
#' a specified cutoff
#'
#' @param DF numeric data.frame
#' @param CUT expression cutoff for the test
#'
#' @return a boolean data.frame where
#'         TRUE means that the value in the original data.frame is higher than CUT.
#' @export
#'
cutoff<-function(DF,CUT){
  apply(DF,2,function(x) x>=CUT)
}


#' Create a data.frame for which the values under a given threshold are set to 0
#'
#' @param DF numeric data.frame
#' @param CUT The values have to be higher than CUT to be kept, otherwise value is set to 0
#' @param clean if TRUE, the rows where no value are above cut are filtere out
#'
#' @return a data.frame which can have a smaller number of rows if clean is TRUE
#' @export
#'
df.cutoff<-function(DF,CUT,clean=TRUE){
  df1<-as.data.frame(apply(DF,c(1,2),function (x){
    if (x>=CUT){
      return(x)
    }else{
      return(0)}}))
  colnames(df1)<-colnames(DF)
  rownames(df1)<-rownames(DF)
  if (clean) {
    df1<-df1[rowSums(df1)!= 0, ]
  }
  return(DF[rownames(df1),])
}

#' Create a data.frame for which the values under a given threshold are set to 0
#'
#' @param DF numeric data.frame
#' @param cut the threshold
#' @param STRIP boolean; remove the rows filled with 0 when TRUE
#'
#' @return a numeric data.frame where all (value > cut|| value == 0)
#' @export
#'
df.cutoff2<-function(DF,cut,STRIP=FALSE){
  DFbool<-cutoff(DF,cut)
  DFbool<-boolDF2numeric(DFbool)
  res<-DFbool*DF
  if(STRIP){
    res<-stats::na.omit(res[rowSums(res)==0,])
  }
  return(res)
}

#' Strip the rows of a data.frame that are failling the test.
#'
#' @param DF data.frame to strip
#' @param test  'gt' (greater than), 'ge' (greater than or equal),
#'              'lt' (lesser than), 'le' (lesser than or equal)
#' @param threshold numeric value for the test
#' @param verbose when TRUE, shows the number of rows and columns before and after stripping
#'
#' @return a stripped data.frame
#' @export
#'
strip<-function(DF,test='gt',threshold=0,verbose=FALSE){
  if(verbose) {
    print(paste("Dimension of the dataset (before strip)",nrow(DF),"-",ncol(DF)))
  }
  switch(test,
         'gt'={
           DF1<-DF[rowSums(DF > threshold) > 0,]
         },
         'ge'={
           DF1<-DF[rowSums(DF %>=% threshold) >0,]
         },
         'lt'={
           DF1<-DF[rowSums(DF < threshold) > 0,]
         },
         'le'={
           DF1<-DF[rowSums(DF %<=% threshold) >0,]}
  )
  if(verbose) {
    print(paste("Dimension of the dataset (after strip)",nrow(DF1),"-",ncol(DF1),"(",test,"-",threshold,")"))
  }
  return(DF1)
}

#' Filter the content of a numeric data.frame based on a threshold
#'
#' @param DF the numeric data.frame
#' @param val the value used as a threshold
#' @param margin 1: for rows, 2 for columns, c(1:2) apply to both column and row.
#' @param case 'gt' for strictly greater than;
#'             'ge' for greater than or equal to;
#'             'lt' for strictly lesser than;
#'             'le' for lesser than or equal to;
#'             'eq' for equal to
#'
#' @return Data.frame which exclusively contains rows that comply at least once to the given criterion
#' @export
#'
df.Strip<-function(DF,val=0,margin=1,case='gt'){
  if(case=='gt')
    return(na.omit(DF[apply(DF,margin,sum)>val,]))

  if(case=='lt')
    return(na.omit(DF[apply(DF,margin,sum)<val,]))

  if(case=='ge')
    return(na.omit(DF[apply(DF,margin,sum)>=val,]))

  if(case=='le')
    return(na.omit(DF[apply(DF,margin,sum)<=val,]))

  if(case=='eq')
    return(na.omit(DF[apply(DF,margin,sum)==val,]))

  print("Error: not a correct case. Only possible cases: 'gt','ge','lt','le' or 'eq'")

}


#' Compare cell values to a threshold and fill cells
#' that failed the test with NA
#'
#' @param DF a numeric data.frame
#' @param val thereshold for the arithmetical comparison
#' @param case "ge","gt","le","lt","eq" (for df.Strip)
#'
#' @return a numeric data.frame
#' @export
#'
df.StripLine<-function(DF, val=0, case="gt"){
  df1<-df.Strip(DF, val=val, margin=c(1,2),case=case)
  df1[df1 == 0] <- NA
  df1<-stats::na.omit(df1[stats::complete.cases(df1),])
  return(df1)
}


#' Strip rows which don't have any value above the given threshold
#'
#' @param DF numeric data.frame
#' @param threshold threshold of detection
#'
#' @return a numeric data.frame which can have a smaller number of rows
#'         than the input data.frame
#' @export
#'
crudeStrip<-function(DF,threshold){
  DF<-apply(DF,c(1,2),function(x) {
    if(x<threshold){
      return(0)
    }else{
      return(x)
    }})
  DF<-strip(DF)
  return(as.data.frame(DF))
}

# Other filtering methods -------------------

#' Allow to select one tissue only from a data.frame
#'
#' @param DF a data.frame where the tissue are annotated as a part of the column names
#' @param Tissue character string
#'
#' @return a data.frame
#' @export
#'
createDFhXg<-function(DF,Tissue){
  newDF<-DF[,grep(Tissue,colnames(DF))]
  colnames(newDF)<-cleanNames(colnames(newDF),Tissue)
  return(newDF)
}

# Addition ----------------------------------
#' Create a data.frame with new rows or columns for the inputed data.frame
#' that are filled with a constant
#'
#' @param DF data.frame
#' @param NameList the header of the new rows or columns
#' @param value what the new cells in the data.frame should be filled with
#' @param type 'row' to create rows otherwise creates columns
#' @param check boolean
#' @param verbose print the different steps
#'
#' @return a completed data.frame (with the content of Namelist) and filled with the given value
#' @export
#'
df.fill<-function(DF,NameList,value=0,type='row',check=TRUE,verbose=TRUE){
  if(check){
    if (type=='row'){
      NameList<-base::setdiff(NameList,rownames(DF))
      print('Duplicates in new row names removed')
      if(length(NameList)==0) {
        if(verbose) print('Nothing to do')
        return(DF)
      }
    }else{
      NameList<-base::setdiff(NameList,colnames(DF))
      print('Duplicates in new column names removed')
      if(verbose) print('Nothing to do')
      return(DF)
    }
  }
  if(type=='row'){
    tDF<-t(as.matrix(DF))
    addDF<-base::list()
    addDF<-data.frame(lapply(NameList,function(x) addDF[[x]]<-rep(value,nrow(tDF))))
    rownames(addDF)<-rownames(tDF)
    colnames(addDF)<-NameList
    taddDF<-t(as.matrix(addDF))
    return(rbind.data.frame(DF,taddDF))
  }else{#every other choice creates columns
    addDF<-base::list()
    addDF<-data.frame(lapply(NameList,function(x) addDF[[x]]<-rep(value,nrow(DF))))
    rownames(addDF)<-rownames(DF)
    colnames(addDF)<-NameList
    return(cbind.data.frame(DF,addDF))
  }
}

barzine/barzinePhdR documentation built on Nov. 23, 2024, 8:54 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

barzine/barzinePhdR
Code developed while and for my PhD thesis at the EMBL-EBI

R/selection-filtering.R
In barzine/barzinePhdR: Code developed while and for my PhD thesis at the EMBL-EBI

Defines functions df.fill createDFhXg crudeStrip df.StripLine df.Strip strip df.cutoff2 df.cutoff cutoff excludeRows filtre.dataset colSelect2 colSelect

Documented in colSelect colSelect2 createDFhXg crudeStrip cutoff df.cutoff df.cutoff2 df.fill df.Strip df.StripLine excludeRows filtre.dataset strip

R Package Documentation

Browse R Packages

We want your feedback!

barzine/barzinePhdR Code developed while and for my PhD thesis at the EMBL-EBI

R/selection-filtering.R In barzine/barzinePhdR: Code developed while and for my PhD thesis at the EMBL-EBI

Defines functions df.fill createDFhXg crudeStrip df.StripLine df.Strip strip df.cutoff2 df.cutoff cutoff excludeRows filtre.dataset colSelect2 colSelect

Documented in colSelect colSelect2 createDFhXg crudeStrip cutoff df.cutoff df.cutoff2 df.fill df.Strip df.StripLine excludeRows filtre.dataset strip

R Package Documentation

Browse R Packages

We want your feedback!

barzine/barzinePhdR
Code developed while and for my PhD thesis at the EMBL-EBI

R/selection-filtering.R
In barzine/barzinePhdR: Code developed while and for my PhD thesis at the EMBL-EBI