#' doWormMineAnalysis
#'
#' This function determines the lethality status in C elegans of a given human gene.
#'
#' Note: queries the WormMine API using Ruby see http://im-dev.wormbase.org/tools/wormmine/api.do?subtab=ruby
#'
#' @param con A \code{SQLiteConnection} object
#' @param genes A vector of ENSEMBL gene ids
#' @param alt_mart A Mart object created using biomaRt::useMart. This should specify \code{hsapiens_gene_ensembl}
#' as the dataset. See \code{\link{getOrthologs}} for more details.
#' @return a data frame
#' @export
doWormMineAnalysis <- function(con, genes, alt_mart=NULL) {
#get the worm orthologs
worm_orthologs <- getOrthologs(con, genes, 'celegans', alt_mart)
#group the worm orthologs for those genes with orthologs
worm_orthologs_grouped <- worm_orthologs %>%
dplyr::filter(nchar(celegans_homolog_orthology_type) > 4) %>%
dplyr::group_by(ensembl_gene_id, celegans_homolog_orthology_type) %>%
dplyr::summarise(celegans_count_orthologs=n(),
celegans_ortholog_ids=paste(celegans_homolog_ensembl_gene, collapse='|')) %>%
dplyr::ungroup()
#sort out entries with no ortholog
no_worm_orthologs <- worm_orthologs %>%
dplyr::filter(nchar(celegans_homolog_orthology_type) < 4) %>%
dplyr::transmute(ensembl_gene_id,
celegans_homolog_orthology_type='no_worm_ortholog',
celegans_count_orthologs=0,
celegans_ortholog_ids='')
#combine and mark up the human genes with a single worm ortholog
worm_orthologs_combined <- dplyr::bind_rows(worm_orthologs_grouped, no_worm_orthologs) %>%
dplyr::mutate(single_ortholog_filter=(celegans_count_orthologs==1)) %>%
as.data.frame()
#now format gene text for ruby script
worm_gene_ids <- worm_orthologs_combined %>%
dplyr::filter(single_ortholog_filter) %>%
dplyr::transmute(id=celegans_ortholog_ids) %>%
dplyr::distinct()
# list of gene-chunks, each of chunk 500 genes
gene_chunks <- split(worm_gene_ids$id, ceiling(seq_along(worm_gene_ids$id)/500))
message(paste("splitting data into ",length(gene_chunks),"chunks of up to 500 genes"))
#internal function to get the data from Intermine
queryMine <- function(query_genes) {
#run the ruby script
message(sprintf("Get data for %s worm genes from WormMine", length(query_genes)))
worm_genes_txt <- paste(query_genes, collapse=',')
wormmine_cmd <- sprintf('ruby \'%s/ruby/doWormMineAnalysis.rb\' "%s"',system.file(package='CollateralVulnerability2016'), worm_genes_txt)
wormmine_data_txt <- system(wormmine_cmd, intern=TRUE)
#import the data into a data frame
readr::read_tsv(paste(wormmine_data_txt, collapse='\n'),
skip=0,
col_types='cccccccccc',
col_names=c("primaryIdentifier", "symbol", "secondaryIdentifier", "briefDescription", "alleles_symbol",
"alleles_primaryIdentifier", "alleles_phenotypes_identifier", "alleles_phenotypes_name",
"alleles_phenotypes_description", "query"))
}
#get the data from intermine
wormmine_data <- lapply(gene_chunks, queryMine) %>% dplyr::bind_rows() %>% as.data.frame()
message(sprintf('%s records retrieved, now analysing', nrow(wormmine_data)))
#filter the wormmine data
wormmine_data_filtered <- wormmine_data %>%
dplyr::mutate(is_lethal_worm = !grepl('NotObs', query)) %>%
as.data.frame() #already filtered for lethal phenotypes in intermine query
##generate a summary table with one row per allele, highlights whether any phenotype annotations are lethal
wormmine_data_byallele <- wormmine_data_filtered %>%
dplyr::group_by(primaryIdentifier, symbol, alleles_primaryIdentifier, alleles_symbol) %>%
dplyr::summarise(is_lethal_worm=as.logical(max(is_lethal_worm))) %>%
dplyr::ungroup() %>%
as.data.frame()
#now count the number of lethal vs non-lethal alleles
wormmine_data_bygene <- wormmine_data_byallele %>%
dplyr::group_by(primaryIdentifier, symbol) %>%
dplyr::summarise(allele_count_worm=n(),
lethal_count_worm=sum(is_lethal_worm),
lethal_pct_worm=round(lethal_count_worm*100/allele_count_worm,1)) %>%
dplyr::ungroup() %>%
as.data.frame()
#combine output
output_df <- worm_orthologs_combined %>%
dplyr::left_join(wormmine_data_bygene, by=c('celegans_ortholog_ids'='primaryIdentifier')) %>%
as.data.frame()
#put data into sqlite database
DBI::dbWriteTable(con, "wormmine_analysis_martdata_grouped", worm_orthologs_combined, overwrite=TRUE)
DBI::dbWriteTable(con, "wormmine_analysis_wormmine_filtered", wormmine_data_filtered, overwrite=TRUE)
DBI::dbWriteTable(con, "wormmine_analysis_wormmine_byallele", wormmine_data_byallele, overwrite=TRUE)
DBI::dbWriteTable(con, "wormmine_analysis_wormmine_bygene", wormmine_data_bygene, overwrite=TRUE)
DBI::dbWriteTable(con, "wormmine_analysis_output", output_df, overwrite=TRUE)
message('Finished')
return(output_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.