#' hit_for_cell
#'
#' Function to get subcellular location based on the database from protein Atlas, and coordinates
#' of each protein on the plot cell (img2) according to their location
#'
#' @param HIT Usually, the output from hitlist function; but just has to be a data frame with the
#' columns id, treatment and category
#' @param organism The organism on which you performed your experiment. For now, only 'HUMAN' and 'MOUSE' are available.
#' @param cell_coordinate Logical to randomly assign or not coordinate on the cell png corresponding to
#' the mapped cellular location. Default is TRUE.
#'
#' @return Dataframe (format for hit_plotcell)
#'
#' @export
#'
#' @seealso \code{\link{hitplot_cell}}
hit_for_cell <- function(HIT, organism = c("HUMAN", "MOUSE"), cell_coordinate = TRUE){
organism <- match.arg(organism)
if(organism == "HUMAN"){
loca_hit <- IMPRINTS.CETSA.app::pr_atlas[which(!is.na(match(IMPRINTS.CETSA.app::pr_atlas$Uniprot, unique(HIT$id)))),]
}
else if(organism == "MOUSE"){
loca_hit <- IMPRINTS.CETSA.app::pr_atlas_mouse[which(!is.na(match(IMPRINTS.CETSA.app::pr_atlas_mouse$Uniprot, unique(HIT$id)))),]
}
else{
stop("Please, choose a valid organism name : 'HUMAN' or 'MOUSE'")
}
message("Assign proteins to their main location from protein Atlas")
loca_hit <- loca_hit[,c("Uniprot", "Gene", "Subcellular.main.location", "Subcellular.additional.location")]
colnames(loca_hit) <- c("id", "gene.name", "main.location", "additional.location")
HIT <- dplyr::left_join(HIT, loca_hit, by = "id", relationship = "many-to-many")
for(i in loca_hit$Uniprot){
idx <- which(!is.na(match(HIT$id, i)))
HIT[idx, c("gene.name", "main.location", "additional.location")] <-
loca_hit[which(loca_hit$Uniprot == i), c("Gene", "Subcellular.main.location", "Subcellular.additional.location")]
}
HIT <- HIT[,-grep("^additional.location$", colnames(HIT))] #for now, not take additional locations
HIT$main.location.cell <- rep(NA, nrow(HIT))
for (i in 1:nrow(HIT)){
ORG <- strsplit(HIT$main.location[i], ", ")[[1]]
corres <- c()
for(k in 1:length(ORG)){
corres <- append(corres, orgatlas_match[ORG[k],2])
}
HIT$main.location.cell[i] <- paste(corres, collapse = ", ")
}
HIT$nb_location <- as.numeric(
unlist(lapply(as.list(HIT$main.location.cell), function(x) {ifelse(x != "NA", length(strsplit(x, ", ")[[1]]), NA)}))
)
nwl <- length(unique(HIT$id))
HIT <- HIT[which(!is.na(HIT$nb_location)),] #remove prot without location
nwl <- nwl - length(unique(HIT$id))
message(paste(nwl, "proteins were removed because no subcellular location has been found"))
if(nrow(HIT) == 0){
message("No proteins have been found in the Protein Atlas database")
return(HIT)
}
#duplicate row when more than 1 location
message("Duplicating rows when more than one location and separating them")
#uncount : Based on a given column's value (x), repeat each row x times, thereby elongating the df.
HIT <- HIT %>%
tidyr::uncount(nb_location, .remove = FALSE)
#then separate them
mem <- c()
for(i in 1:nrow(HIT)){
if(HIT$nb_location[i] > 1){
if(!(i %in% mem)){
l <- HIT$nb_location[i]
for(k in 1:l){
HIT[(i+k-1), "main.location"] <- strsplit(HIT$main.location[(i+k-1)], ", ")[[1]][k]
HIT[(i+k-1), "main.location.cell"] <- strsplit(HIT$main.location.cell[(i+k-1)], ", ")[[1]][k]
}
mem <- i:(i + l - 1)
}
}
}
if(cell_coordinate){
message("Assign cooordinates to proteins according to their subcellular locations")
idx_malo <- grep("main.location.cell", colnames(HIT))
HIT[, c("x", "y")] <- t(apply(HIT, 1, function(x) point_in_orga(x[idx_malo])))
}
HIT$main.location.cell <- as.factor(HIT$main.location.cell)
HIT$nb_location <- as.factor(HIT$nb_location)
HIT$category <- as.factor(HIT$category)
message("Your data has been mapped succesfully !")
return(HIT)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.