biomedical_id_resolver.js
js library for resolving biological ids to their equivalent ids in batch
Install
$ npm i biomedical_id_resolver
Usage
const resolve = require('biomedical_id_resolver');
// input should be an object, with semantic type as the key, and array of CURIEs as value
let input = {
"Gene": ["NCBIGene:1017", "NCBIGene:1018", "HGNC:1177"],
"SmallMolecule": ["CHEBI:15377"],
"Disease": ["MONDO:0004976"],
"Cell": ["CL:0002372"]
};
(async () => {
const resolver = new resolve();
console.log(await resolver.resolve(input);
//=> {'NCBIGene:1017': {...}, 'NCBIGene:1018': {...}, 'HGNC:1177': {...}, 'CHEBI:15377': {...}, 'MONDO:0004976': {...}, 'CL:0002372': {...}}
})();
Output Schema
-
Output is a javascript Object
-
The root keys are CURIES (e.g. NCBIGene:1017) which are passed in as input
-
The values represents resolved identifiers
-
Each CURIE will have 4 required fields
-
id: the primary id (selected based on the ranking described in the next section) and label
-
curies: an array, each element represents a resolved id in CURIE format
-
type: the semantic type of the identifier
-
db_ids: original ids from source database, could be curies or non-curies.
-
-
if an ID can not be resolved using the package, it will have an additional field called "flag", with value equal to "failed"
-
Example Output
{
"NCBIGene:1017": {
"id": {
"label": "cyclin dependent kinase 2",
"identifier": "NCBIGene:1017"
},
"db_ids": {
"NCBIGene": [
"1017"
],
"ENSEMBL": [
"ENSG00000123374"
],
"HGNC": [
"1771"
],
"SYMBOL": [
"CDK2"
],
"UMLS": [
"C1332733",
"C0108855"
],
"name": [
"cyclin dependent kinase 2"
]
},
"type": "Gene",
"curies": [
"NCBIGene:1017",
"ENSEMBL:ENSG00000123374",
"HGNC:1771",
"SYMBOL:CDK2",
"UMLS:C1332733",
"UMLS:C0108855"
]
}
}
SRI node normalizer
Query UsingUsage
const resolver = require('biomedical_id_resolver');
// input must be an object, with semantic type as the key, and array of CURIEs as value
let input = {
"Gene": ["NCBIGene:1017", "NCBIGene:1018", "HGNC:1177"],
"SmallMolecule": ["CHEBI:15377"],
"Disease": ["MONDO:0004976"],
"Cell": ["CL:0002372"]
};
// SRI resolver will figure out the semantic type if the input type is 'unknown', 'undefined', or 'NamedThing'
let input = {
"unknown": ["NCBIGene:1017", "MONDO:0004976"],
};
(async () => {
let res = await resolver.resolveSRI(input);
console.log(res);
})();
Example Output
The output contains id
and equivalent_identifiers
straight from SRI as well as the same fields as the base resolver to make it backwards compatible with it. If the SRI resolved semantic type doesn't agree with the input semantic type, there will be 2 entries in the array for the curie.
{
"NCBIGene:1017": [
{
"id": {
"identifier": "NCBIGene:1017",
"label": "CDK2"
},
"equivalent_identifiers": [
{
"identifier": "NCBIGene:1017",
"label": "CDK2"
},
{
"identifier": "ENSEMBL:ENSG00000123374"
},
{
"identifier": "HGNC:1771",
"label": "CDK2"
},
{
"identifier": "OMIM:116953"
},
{
"identifier": "UMLS:C1332733",
"label": "CDK2 gene"
}
],
"type": [
"biolink:Gene",
"biolink:GeneOrGeneProduct",
"biolink:BiologicalEntity",
"biolink:NamedThing",
"biolink:Entity",
"biolink:MacromolecularMachineMixin"
],
"primaryID": "NCBIGene:1017",
"label": "CDK2",
"attributes": {},
"semanticType": "Gene",
"semanticTypes": [
"biolink:Gene",
"biolink:GeneOrGeneProduct",
"biolink:BiologicalEntity",
"biolink:NamedThing",
"biolink:Entity",
"biolink:MacromolecularMachineMixin"
],
"dbIDs": {
"NCBIGene": [
"1017"
],
"ENSEMBL": [
"ENSG00000123374"
],
"HGNC": [
"1771"
],
"OMIM": [
"116953"
],
"UMLS": [
"C1332733"
],
"name": [
"CDK2",
"CDK2 gene"
]
},
"curies": [
"NCBIGene:1017",
"ENSEMBL:ENSG00000123374",
"HGNC:1771",
"OMIM:116953",
"UMLS:C1332733"
]
}
]
}
Available Semantic Types & prefixes
Gene, Transcript, Protein ID resolution is done through MyGene.info API
-
Gene
- NCBIGene
- ENSEMBL
- HGNC
- MGI
- OMIM
- UMLS
- SYMBOL
- UniProtKB
- name
-
Transcript
- ENSEMBL
- SYMBOL
- name
-
Protein
- UniProtKB
- ENSEMBL
- UMLS
- SYMBOL
- name
Variant ID resolution is done through MyVariant.info API
- SequenceVariant
- CLINVAR
- DBSNP
- HGVS
- MYVARIANT_HG19
SmallMolecule, Drug ID resolution is done through MyChem.info API
-
SmallMolecule
- PUBCHEM.COMPOUND
- CHEMBL.COMPOUND
- UNII
- CHEBI
- DRUGBANK
- MESH
- CAS
- HMDB
- KEGG.COMPOUND
- INCHI
- INCHIKEY
- UMLS
- LINCS
- name
-
Drug
- RXCUI
- NDC
- DRUGBANK
- PUBCHEM.COMPOUND
- CHEMBL.COMPOUND
- UNII
- CHEBI
- MESH
- CAS
- HMDB
- KEGG.COMPOUND
- INCHI
- INCHIKEY
- UMLS
- LINCS
- name
Disease, ClinicalFinding ID Resolution is done through MyDisease.info API
-
Disease
- MONDO
- DOID
- OMIM
- ORPHANET
- EFO
- UMLS
- MESH
- MEDDRA
- NCIT
- SNOMEDCT
- HP
- GARD
- name
-
ClinicalFinding
- LOINC
- NCIT
- EFO
- name
Pathway ID Resolution is done through biothings.ncats.io/geneset API
- Pathway
- GO
- REACT
- KEGG
- SMPDB
- PHARMGKB.PATHWAYS
- WIKIPATHWAYS
- BIOCARTA
- name
MolecularActivity ID Resolution is done through BioThings Gene Ontology Molecular Activity API
- MolecularActivity
- GO
- REACT
- RHEA
- MetaCyc
- KEGG.REACTION
- name
CellularComponent ID Resolution is done through BioThings Gene Ontology Cellular Component API
- CellularComponent
- GO
- MetaCyc
- name
BiologicalProcess ID Resolution is done through BioThings Gene Ontology Biological Process API
-
BiologicalProcess
- GO
- REACT
- MetaCyc
- KEGG
- name
AnatomicalEntity ID Resolution is done through BioThings UBERON API
- AnatomicalEntity
- UBERON
- UMLS
- MESH
- NCIT
- name
PhenotypicFeature ID Resolution is done through BioThings HPO API
- PhenotypicFeature
- HP
- EFO
- NCIT
- UMLS
- MEDDRA
- MP
- SNOMEDCT
- MESH
- name
Cell ID Resolution is done through Biothings Cell Ontology API
- Cell
- CL
- NCIT
- MESH
- EFO
- name
Development
- Install Node 12 or later. You can use the package manager of your choice. Tests need to pass in Node 12 and 14.
- Clone this repository.
- Run
npm ci
to install the dependencies. - scripts are stored in
/src
folder - Add test to
/__tests__
folder - run
npm run release
to bump version and generate change log - run
npx depcheck
to check for unused packages in package.json
CHANGELOG
See CHANGELOG.md