Nothing
#' orthMappingCpx
#' @title Protein Complex Ortholog Mapping
#' @param datInput A list containing reference complexes (i.e.,
#' CORUM complexes). Note that the members of each complexes must be
#' represented by UniProt accession identifier.
#' @param input_species Name of the input species (e.g., "mouse","fly").
#' See \code{\link[orthogene]{map_species}} to return a full list of
#' available species.
#' @param output_species Name of the output species (e.g., "human").
#' See \code{\link[orthogene]{map_species}} to return a full list of
#' available species.
#' @param input_taxid A numeric value that specifies the NCBI
#' taxonomy identifier (TaxId) for input organism (e.g., 10090).
#' @param output_taxid A numeric value that specifies the NCBI
#' taxonomy identifier (TaxId) for output organism.
#' @return A list containing complexes, whose members
#' converted to output_species.
#' @author Matineh Rahmatbakhsh, \email{matinerb.94@gmail.com}
#' @description This function uses \code{\link[orthogene]{convert_orthologs}}
#' function to support ortholog mapping of protein complexes between any pair
#' of 700+ species.
#' @export
orthMappingCpx <-
function(datInput,
input_species,
output_species,
input_taxid,
output_taxid){
if(!is.list(datInput)){
stop("Reference complexes must be list")
}
. <- NULL
V1 <- NULL
id <- NULL
input_gene <- NULL
conv_gene <- NULL
# convert list to data frame
agg_mem <-
lapply(datInput, function(x) paste(x, collapse = ";"))
df_cpx <-
do.call(rbind, agg_mem) %>%
as.data.frame(.) %>%
rownames_to_column("id") %>%
mutate(V1 = strsplit(as.character(V1), ";")) %>%
unnest(V1) %>%
dplyr::rename(accession=V1) %>%
arrange(id) %>%
na.omit(.)
# fetch proteome of input species
prot_input <- protti::fetch_uniprot_proteome(input_taxid,
columns = c("accession",
"gene_primary"),
reviewed = TRUE)
df_cpx_g <-
inner_join(df_cpx, prot_input, by = "accession")
# now orthologous mapping
orth_Df <- orthogene::convert_orthologs(unique(df_cpx_g$gene_primary),
input_species = input_species,
output_species = output_species)
orth_Df <-
orth_Df %>%
tibble::rownames_to_column("conv_gene") %>%
rename(gene_primary = input_gene) %>%
select(1,3)
df_cpx_g2 <-
inner_join(df_cpx_g, orth_Df, by = "gene_primary") %>%
select(1,4) %>%
rename(gene_primary=conv_gene)
# fetch proteome of output species
prot_output <- protti::fetch_uniprot_proteome(output_taxid,
columns = c("accession",
"gene_primary"),
reviewed = TRUE)
fout <-
inner_join(df_cpx_g2,prot_output,by = "gene_primary") %>%
select(1,3)
#conver to list of cpx
fout_cpx <- aggregate(list(fout$accession),
by = list(fout$id),
FUN = function(x) paste(unique(x), collapse = " "))
colnames(fout_cpx) <- c("name", "member")
fout_cpx$member <-
vapply(lapply(strsplit(fout_cpx$member, " "), unique),
paste, character(1L), collapse = " ")
fout_cpx <-
fout_cpx %>%
filter(lengths(strsplit(fout_cpx$member, " ")) >= 3)
fout_cpx_list <-
strsplit(fout_cpx$member, " ")
names(fout_cpx_list) <- fout_cpx$name
# remove redudancy
fout_cpx_list <-
EliminateCpxRedundance(fout_cpx_list)
return(fout_cpx_list)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.