add_ | R Documentation |
Functions to add metadata to data.table objects.
add_ancestor(
phenos,
lvl = 2,
hpo = get_hpo(),
keep_descendants = NULL,
remove_descendants = NULL,
force_new = FALSE
)
add_death(
phenos,
keep_deaths = NULL,
all.x = TRUE,
allow.cartesian = TRUE,
agg_by = NULL
)
add_disease_genes(phenos, all.x = TRUE, verbose = TRUE)
add_evidence(
phenos,
evidence_score_threshold = NULL,
evidence_score_threshold_metric = "evidence_score_sum",
all.x = TRUE,
allow.cartesian = FALSE,
agg_by = c("disease_id", "gene_symbol"),
default_score = 1,
...
)
add_gene_frequency(
phenotype_to_genes = load_phenotype_to_genes(),
gene_frequency_threshold = NULL,
all.x = TRUE,
allow.cartesian = FALSE,
verbose = TRUE
)
add_genes(
phenos = NULL,
phenotype_to_genes = load_phenotype_to_genes(),
hpo = get_hpo(),
by = c("hpo_id", "hpo_name", "disease_id", "disease_name", "disease_description"),
gene_col = "gene_symbol",
all.x = FALSE,
allow.cartesian = FALSE
)
add_gpt_annotations(
phenos,
annot = gpt_annot_codify(reset_weights_dict = TRUE)$annot_weighted,
annot_cols = names(annot)[!names(annot) %in% c("hpo_id", "hpo_name")],
gpt_filters = `names<-`(rep(list(NULL), length(annot_cols)), annot_cols),
force_new = FALSE
)
add_hpo_definition(
phenos,
hpo = get_hpo(),
line_length = FALSE,
use_api = FALSE,
verbose = TRUE
)
add_hpo_id(phenos, hpo = get_hpo(), ignore_case = TRUE)
add_hpo_name(phenos, hpo = get_hpo())
add_info_content(phenos, hpo = get_hpo())
add_mondo(phenos, input_col = "disease_id", map_to = "hpo", ...)
add_ndisease(
phenos,
pheno_ndiseases_threshold = NULL,
all.x = TRUE,
allow.cartesian = FALSE,
verbose = TRUE
)
add_omop(
phenos,
input_col = "hpo_id",
all.x = TRUE,
allow.cartesian = FALSE,
force_new = FALSE,
verbose = TRUE
)
add_onset(
phenos,
keep_onsets = NULL,
agg_by = NULL,
all.x = TRUE,
allow.cartesian = FALSE
)
add_ont_lvl(
phenos,
hpo = get_hpo(),
absolute = TRUE,
keep_ont_levels = NULL,
...
)
add_pheno_frequency(
phenos,
pheno_frequency_threshold = NULL,
all.x = TRUE,
allow.cartesian = FALSE
)
add_prevalence(
phenos,
input_col = "disease_id",
drop_na = TRUE,
method = "orphanet"
)
add_severity(
phenos,
hpo = get_hpo(),
all.x = TRUE,
allow.cartesian = FALSE,
severity_threshold = NULL
)
add_tier(
phenos,
all.x = TRUE,
include_disease_characteristics = TRUE,
auto_assign = TRUE,
hpo = get_hpo(),
keep_tiers = NULL,
verbose = TRUE
)
phenos |
A data.table containing HPO IDs and other metadata. |
lvl |
How many levels deep into the ontology to get ancestors from. For example:
|
hpo |
Human Phenotype Ontology object, loaded from get_ontology. |
keep_descendants |
Terms whose descendants should be kept
(including themselves).
Set to |
remove_descendants |
Terms whose descendants should be removed
(including themselves).
Set to |
force_new |
Force a new query to the OARD API instead of using pre-downloaded data. |
keep_deaths |
The age of death associated with each HPO ID to keep. If >1 age of death is associated with the term, only the earliest age is considered. See add_death for details. |
all.x |
logical; if |
allow.cartesian |
See |
agg_by |
Column to aggregate age of onset metadata by. |
verbose |
Print messages. |
evidence_score_threshold |
The minimum threshold of mean evidence scores of each gene-phenotype association to keep. |
evidence_score_threshold_metric |
The metric to use for filtering with
|
default_score |
Default evidence score to apply to gene-disease associations that are present in the HPO annotations but don't have evidence scores in the GenCC annotations. |
... |
Arguments passed on to
|
phenotype_to_genes |
Output of load_phenotype_to_genes mapping phenotypes to gene annotations. |
gene_frequency_threshold |
Only keep genes with frequency
above the set threshold. Frequency ranges from 0-100 where 100 is
a gene that occurs 100% of the time in a given phenotype.
Include |
by |
A vector of shared column names in |
gene_col |
Name of the gene column. |
annot |
GPT annotation data. |
annot_cols |
Columns to add. |
gpt_filters |
A named list of filters to apply to the GPT annotations. |
line_length |
The number of desired words per line \<int\> |
use_api |
Get definitions from the HPO API, as opposed to a static local dataset. |
ignore_case |
Ignore case when mapping terms. |
input_col |
Name of the column containing the disease or phenotype IDs. |
map_to |
Mapping outputs to include (from Mondo IDs to another database's IDs). |
pheno_ndiseases_threshold |
Filter phenotypes by the maximum number of diseases they are associated with. |
keep_onsets |
The age of onset associated with each HPO ID to keep. If >1 age of onset is associated with the term, only the earliest age is considered. See add_onset for details. |
absolute |
Make the levels absolute in the sense that they consider
the entire ontology ( |
keep_ont_levels |
Only keep phenotypes at certain absolute ontology levels to keep. See add_ont_lvl for details. |
pheno_frequency_threshold |
Only keep phenotypes with frequency
above the set threshold. Frequency ranges from 0-100 where 100 is
a phenotype that occurs 100% of the time in all associated diseases.
Include |
drop_na |
Whether to drop rows with missing prevalence data. |
method |
One of "orphanet" or "oard". |
severity_threshold |
Only keep phenotypes with a mean
severity score (averaged across multiple associated diseases) below the
set threshold. The severity score ranges from 1-4 where 1 is the MOST severe.
Include |
include_disease_characteristics |
Include |
auto_assign |
Automatically assing HPO IDs to Tiers by conducting regex searches for keywords that appear in the term name, or the names of its descendants or ancestors. |
keep_tiers |
Tiers from hpo_tiers to keep.
Include |
Annotated data.
phenos data.table with extra columns:
"AgeOfDeath": AgeOfDeath HPO IDs of disease phenotypes associated with the target hpo_id phenotype.
"AgeOfDeath_names": AgeOfDeath HPO names of disease phenotypes associated with the target hpo_id phenotype.
"AgeOfDeath_counts": The number of times each term in "AgeOfDeath_names" appears across associated disease phenotypes.
"AgeOfDeath_score_mean": Mean age of death score.
"AgeOfDeath_score_min": Minimum age of death score.
"AgeOfDeath_top": The most common age of death term.
"AgeOfDeath_earliest": The earliest age of death.
"AgeOfDeath_latest": The latest age of death.
phenos data.table with extra columns:
"evidence_score_min": Minimum evidence score.
"evidence_score_max": Maximum evidence score.
"evidence_score_mean": Mean evidence score.
phenos data.table with extra column
A named vector of HPO term descriptions.
phenos data.table with extra column
phenos data.table with extra columns.
phenos data.table with extra columns
phenos data.table with extra columns:
"onset": onset HPO IDs of disease phenotypes associated with the target hpo_id phenotype.
"onset_names": onset HPO names of disease phenotypes associated with the target hpo_id phenotype.
"onset_counts": The number of times each term in "onset_names" appears across associated disease phenotypes.
"onset_score_mean": Mean onset score.
"onset_score_min": Minimum onset score.
"onset_top": The most common onset term.
"onset_earliest": The earliest age of onset.
"onset_latest": The latest age of onset.
phenos data.table with extra column
phenos data.table with extra column
phenos data.table with extra columns
phenos data.table with extra column
add_ancestor()
: add_
Add ancestor
Assign each HPO ID to the higher-order ancestral term that it is part of.
add_death()
: add_
Add age of death
Add age of death for each HPO ID. AgeOfDeath IDs and assigned "AgeOfDeath_score" values:
HP:0005268 "Miscarriage" (AgeOfDeath_score=1)
HP:0003826 "Stillbirth" (AgeOfDeath_score=1)
HP:0034241 "Prenatal death" (AgeOfDeath_score=1)
HP:0003811 "Neonatal death" (AgeOfDeath_score=2)
HP:0001522 "Death in infancy" (AgeOfDeath_score=3)
HP:0003819 "Death in childhood" (AgeOfDeath_score=4)
HP:0011421 "Death in adolescence" (AgeOfDeath_score=5)
HP:0100613 "Death in early adulthood" (AgeOfDeath_score=6)
HP:0033764 "Death in middle age" (AgeOfDeath_score=7)
HP:0033763 "Death in adulthood" (AgeOfDeath_score=7)
HP:0033765 "Death in late adulthood" (AgeOfDeath_score=8)
add_disease_genes()
: add_
Add disease genes
Add genes that overlap between an HPO ID and an associated phenotype.
add_evidence()
: add_
Add evidence
Add the strength of evidence supporting each gene-disease association. Delphi survey evidence classification IDs and assigned "evidence_score" values:
GENCC:100001 "Definitive" (evidence_score=6)
GENCC:100002 "Strong" (evidence_score=5)
GENCC:100003 "Moderate" (evidence_score=4)
GENCC:100009 "Supportive" (evidence_score=3)
GENCC:100004 "Limited" (evidence_score=2)
GENCC:100005 "Disputed Evidence" (evidence_score=1)
GENCC:100006 "Refuted Evidence" (evidence_score=0)
GENCC:100008 "No Known Disease Relationship" (evidence_score=0)
add_gene_frequency()
: add_
Add gene frequency
Add gene-level frequency, i.e. how often mutations in a given gene are associated with a given phenotype. Numeric frequency columns are on a 0-100% scale.
add_genes()
: add_
Add genes
Add genes associated with each phenotype (in the context of a particular disease).
add_gpt_annotations()
: add_
Add ancestor
Add annotations generated with a Large Language Model.
add_hpo_definition()
: add_
Get term definition
This function accesses the HPO API to get a description/definition of an
HPO term. If a line_length
\> 0 is passed to the function, it will add
newlines every nth word. This can be useful when displaying the description
in plots with limited space.
add_hpo_id()
: add_
Add HPO ID column to dataframe
Adds the HPO term ID column "hpo_id".
add_hpo_name()
: add_
Add HPO name column to dataframe
Adds the HPO term name column "hpo_name".
add_info_content()
: add_
Add information content
Add a column containing the information content score for each HPO ID.
add_mondo()
: add_
Add Mondo metadata
Add Mondo metadata (MONDO ID mappings, names, and definitions) for diseases using files from their respective databases: e.g. OMIM, DECIPHER, Orphanet.
add_ndisease()
: add_
Add N diseases
Annotate each HPO term with the total number of disease they are associated with.
add_omop()
: add_
Add OMOP
Add metadata from MONDO, including:
mondo_id: MONDO term ID.
mondo_name: MONDO term name.
mondo_def: MONDO term definition.
add_onset()
: add_
Add age of onset
Add age of onset for each HPO ID. onset IDs and assigned "onset_score" values:
HP:0011461 "Fetal onset" (onset_score=1)
HP:0030674 "Antenatal onset" (onset_score=2)
HP:0003577 "Congenital onset" (onset_score=3)
HP:0003623 "Neonatal onset" (onset_score=4)
HP:0003593 "Infantile onset" (onset_score=5)
HP:0011463 "Childhood onset" (onset_score=6)
HP:0003621 "Juvenile onset" (onset_score=7)
HP:0011462 "Young adult onset" (onset_score=8)
HP:0003581 "Adult onset" (onset_score=9)
HP:0003596 "Middle age onset" (onset_score=10)
HP:0003584 "Late onset" (onset_score=11)
add_ont_lvl()
: add_
Add ontology level
Add the relative ontology level for each HPO ID.
add_pheno_frequency()
: add_
Add phenotype frequency
Add phenotype-level frequency, i.e. how often a phenotype occurs in a given disease.
add_prevalence()
: add_
Add prevalence
Add a column containing the prevalence score for each disease ("disease_id") or phenotype ("hpo_id").
add_severity()
: add_
Add HPO modifiers
Annotate each HPO with modifier terms, including (but not limited to) progression and severity ratings. In order of increasing severity:
HP:0012825 "Mild" (Severity_score=4)
HP:0012827 "Borderline" (Severity_score=3)
HP:0012828 "Severe" (Severity_score=2)
HP:0012829"Profound" (Severity_score=1)
add_tier()
: add_
Add severity Tiers
Add severity Tier for each HPO ID, in accordance with the rating system provided by Lazarin et al (2014). In order of increasing severity:
Tier 4 Reduced fertility
Tier 3 Sensory impairment: vision, Immunodeficiency/cancer, Sensory impairment: hearing, Sensory impairment: touch, other (including pain), Mental illness, Dysmorphic features
Tier 2 Shortened life span: premature adulthood, Impaired mobility, Internal physical malformation
Tier 1 Shortened life span: infancy, Shortened life span: childhood/adolescence, Intellectual disability
phenos <- example_phenos()
phenos2 <- add_ancestor(phenos = phenos, lvl=5)
phenos <- example_phenos()
phenos2 <- add_death(phenos = phenos)
## Not run:
phenos <- load_phenotype_to_genes()
phenos2 <- add_severity(phenos = phenos)
## End(Not run)
phenos <- load_phenotype_to_genes()
phenos2 <- add_evidence(phenos = phenos)
phenotype_to_genes <- load_phenotype_to_genes()[seq(1000),]
phenos2 <- add_gene_frequency(phenotype_to_genes = phenotype_to_genes)
phenos <- example_phenos()
phenos2 <- add_genes(phenos = phenos)
phenos <- example_phenos()
phenos2 <- add_gpt_annotations(phenos)
phenos <- example_phenos()
phenos2 <- add_hpo_definition(phenos = phenos)
phenotype_to_genes <- load_phenotype_to_genes()
phenos <- unique(phenotype_to_genes[,c("hpo_id","hpo_name")])
phenos2 <- add_hpo_id(phenos=phenos)
phenos <- example_phenos()
phenos2 <- add_hpo_name(phenos=phenos)
phenos <- example_phenos()
phenos2 <- add_info_content(phenos = phenos)
phenos <- load_phenotype_to_genes(3)[seq(1000)]
phenos2 <- add_mondo(phenos = phenos)
phenos <- example_phenos()
phenos2 <- add_ndisease(phenos = phenos)
phenos <- example_phenos()
phenos2 <- add_omop(phenos = phenos)
phenos <- example_phenos()
phenos2 <- add_onset(phenos = phenos)
phenos <- make_phenos_dataframe(ancestor = "Neurodevelopmental delay")
phenos2 <- add_ont_lvl(phenos = phenos)
phenos <- example_phenos()
phenos2 <- add_pheno_frequency(phenos = phenos)
phenos <- example_phenos()
phenos2 <- add_prevalence(phenos = phenos)
phenos <- example_phenos()
phenos2 <- add_severity(phenos = phenos)
phenos <- example_phenos()
phenos2 <- add_tier(phenos = phenos)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.