#' Reads an input pulldown .csv file and organizes it for visualization
#'
#' This function is intended to be how users import data from a pulldown.
#'
#' @param file The input .csv, .xls, or .xlsx file. It can be a full path to a file or a
#' relative path from the current working directory.
#'
#' @param standardization_method The method by which the cleaved and uncleaved read
#' counts are combined. Valid choices are 'additive' or
#' 'multiplicative'. The default is additive.
#'
#' @param scale Should we rescale the signal result so that the maximum signal
#' is between 100 and 1000.
#' @param trim_proportion In the rescaling, what percent of the large values should be
#' removed to get to a background rate.
#'
#' @param read_indicator An argument that identifies what columns are responses
#' to be plotted. This could be either a vector of integers
#' or a character string (or vector of strings) that is at the beginning of all of the
#' column names of the response data.
#'
#' @param protein_column A character string indicating which column denotes the protein.
#'
#' @param position_column A character string indicating which column corresponds to the position within a protein.
#'
#'
#' @export
import_pulldown <- function( file, standardization_method = 'additive',
scale=TRUE, trim_proportion=0.25,
read_indicator = 'X',
protein_column = 'protein_ID', position_column = 'Peptide.start',
Cleaved_Type_Indicators = c('Cl_','Un_') ){
# Read in the data: from either a .xls or .csv file
if( is.character(file) ){
if( str_detect(file, fixed('.csv'))){
df <- read.csv(file)
}else{
df <- readxl::read_xls(file)
}
}
# and get rid of the extraneous columns
if( is.character( read_indicator )){
df <- df %>%
select( protein_column, position_column, starts_with(read_indicator ) ) # specify data columns by name
}else{
df <- df %>% # specify data columns by locaton
select( protein_column, position_column, read_indicator )
}
# make the protein and position names consistent
df <- df %>%
rename(protein_ID = protein_column, position = position_column)
# Turn this into a long dataframe
df <- df %>%
arrange( protein_ID, position ) %>% # No guarentee user hasn't sent in mixed up rows
mutate( index = 1:n() ) %>%
gather(key='Type', value='Value', -protein_ID, -position, -index) %>% # convert to a _long_ orientation
drop_na() %>% # Get rid of missing values
mutate( protein_ID = factor(protein_ID),
protein_ID = fct_reorder(protein_ID, index) )
# An index of which observations is either cleaved or uncleaved. This SHOULD be all of the observations
# but if there is something that doesn't seem like cleaved/uncleaved then we'll just keep it
Index <- which( str_detect( df$Type, fixed(Cleaved_Type_Indicators[1])) |
str_detect( df$Type, fixed(Cleaved_Type_Indicators[2])) )
df1 <- df[Index, ] %>%
mutate( Cleave = ifelse( str_detect(.$Type, fixed(Cleaved_Type_Indicators[1])), 'cleaved', 'uncleaved') ) %>%
mutate( Group = str_remove(.$Type, fixed(Cleaved_Type_Indicators[1]) )) %>%
mutate( Group = str_remove(.$Group, fixed(Cleaved_Type_Indicators[2]) )) %>%
select( index, protein_ID, position, Group, Cleave, Value) %>%
group_by( index, protein_ID, position, Group ) %>% spread(key=Cleave, value=Value) %>% # The slow part of this function is the spread command...
group_by(Group) %>%
full_standardize( type = standardization_method, scale=scale, trim_proportion = trim_proportion )
# %>%
# mutate( signal = PepSeq::standardize(cleaved, uncleaved, ##
# type = standardization_method, ## standardize to combine cleaved/uncleaved values
# scale = scale ) ) ##
df2 <- df[-Index, ] %>%
mutate( cleaved = NA, uncleaved = NA ) %>%
rename(signal = Value, Group = Type ) %>%
select( index, protein_ID, position, Group, cleaved, uncleaved, signal)
out <- bind_rows( df1, df2 )
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.