Source code for ols_client.client

# -*- coding: utf-8 -*-

"""Client classes for the OLS."""

import logging
import time
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from urllib.parse import quote

import requests

__all__ = [
    # Base client
    "Client",
    # Concrete
    "EBIClient",
    "TIBClient",
    "ZBMedClient",
    "MonarchClient",
    "FraunhoferClient",
]

logger = logging.getLogger(__name__)


def _iterate_response_terms(response):
    """Iterate over the terms in the given response."""
    yield from response["_embedded"]["terms"]


def _quote(iri):
    # must be double encoded https://www.ebi.ac.uk/ols/docs/api
    iri = quote(iri, safe="")
    iri = quote(iri, safe="")
    return iri


def _help_iterate_labels(term_iterator):
    for term in term_iterator:
        yield term["label"]


[docs] class Client: """Wraps the functions to query the Ontology Lookup Service such that alternative base URL's can be used.""" def __init__(self, base_url: str): """Initialize the client. :param base_url: An optional, custom URL for the OLS API. """ base_url = base_url.rstrip("/") if not base_url.endswith("/api"): base_url = f"{base_url}/api" self.base_url = base_url
[docs] def get_json( self, path: str, params: Optional[Dict[str, Any]] = None, raise_for_status: bool = True, **kwargs, ): """Get the response JSON.""" return self.get_response( path=path, params=params, raise_for_status=raise_for_status, **kwargs ).json()
[docs] def get_response( self, path: str, params: Optional[Dict[str, Any]] = None, raise_for_status: bool = True, **kwargs, ) -> requests.Response: """Send a GET request the given endpoint. :param path: The path to query following the base URL, e.g., ``/ontologies``. If this starts with the base URL, it gets stripped. :param params: Parameters to pass through to :func:`requests.get` :param raise_for_status: If true and the status code isn't 200, raise an exception :param kwargs: Keyword arguments to pass through to :func:`requests.get` :returns: The response from :func:`requests.get` """ if not params: params = {} if path.startswith(self.base_url): path = path[len(self.base_url) :] url = self.base_url + "/" + path.lstrip("/") res = requests.get(url, params=params, **kwargs) if raise_for_status: res.raise_for_status() return res
[docs] def get_paged( self, path: str, key: Optional[str] = None, size: Optional[int] = None, sleep: Optional[int] = None, ) -> Iterable: """Iterate over all terms, lazily with paging. :param path: The url to query :param key: The key to slice from the _embedded field :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to none. :yields: A terms in an ontology :raises ValueError: if an invalid size is given """ if size is None: size = 500 elif size > 500: raise ValueError(f"Maximum size is 500. Given: {size}") res_json = self.get_json(path, params={"size": size}) yv = res_json["_embedded"] if key: yv = yv[key] yield from yv next_href = (res_json.get("_links") or {}).get("href") while next_href: if sleep is not None: time.sleep(sleep) loop_res_json = requests.get(next_href).json() yv = loop_res_json["_embedded"] if key: yv = yv[key] yield from yv next_href = (loop_res_json.get("_links") or {}).get("href")
[docs] def get_ontologies(self): """Get all ontologies.""" return self.get_paged("/ontologies", key="ontologies")
[docs] def get_ontology(self, ontology: str): """Get the metadata for a given ontology. :param ontology: The name of the ontology :return: The dictionary representing the JSON from the OLS """ return self.get_json(f"/ontologies/{ontology}")
[docs] def get_term(self, ontology: str, iri: str): """Get the data for a given term. :param ontology: The name of the ontology :param iri: The IRI of a term :returns: Results about the term """ return self.get_json(f"/ontologies/{ontology}/terms", params={"iri": iri})
[docs] def search(self, query: str, query_fields: Optional[Iterable[str]] = None, params=None): """Search the OLS with the given term. :param query: The query to search :param query_fields: Fields to query :param params: Additional params to pass through to :func:`get_json` :return: dict :returns: A list of search results """ params = dict(params or {}) params["q"] = query if query_fields: params["queryFields"] = ",".join(query_fields) return self.get_json("/search", params=params)["response"]["docs"]
[docs] def suggest(self, query: str, ontology: Union[None, str, List[str]] = None): """Suggest terms from an optional list of ontologies. :param query: The query to suggest :param ontology: The ontology or list of ontologies :returns: A list of suggestion results .. seealso:: https://www.ebi.ac.uk/ols/docs/api#_suggest_term """ params = {"q": query} if ontology: params["ontology"] = ",".join(ontology) if isinstance(ontology, list) else ontology return self.get_json("/suggest", params=params)
[docs] def iter_terms(self, ontology: str, size: Optional[int] = None, sleep: Optional[int] = None): """Iterate over all terms, lazily with paging. :param ontology: The name of the ontology :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :rtype: iter[dict] :yields: Terms in the ontology """ yield from self.get_paged( f"/ontologies/{ontology}/terms", key="terms", size=size, sleep=sleep )
[docs] def iter_ancestors( self, ontology: str, iri: str, size: Optional[int] = None, sleep: Optional[int] = None, ): """Iterate over the ancestors of a given term. :param ontology: The name of the ontology :param iri: The IRI of a term :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :rtype: iter[dict] :yields: the descendants of the given term """ yield from self.get_paged( f"ontologies/{ontology}/terms/{_quote(iri)}/ancestors", key="terms", size=size, sleep=sleep, )
[docs] def iter_hierarchical_ancestors( self, ontology: str, iri: str, size: Optional[int] = None, sleep: Optional[int] = None, ): """Iterate over the hierarchical of a given term. :param ontology: The name of the ontology :param iri: The IRI of a term :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :rtype: iter[dict] :yields: the descendants of the given term """ yield from self.get_paged( f"ontologies/{ontology}/terms/{_quote(iri)}/hierarchicalAncestors", key="terms", size=size, sleep=sleep, )
[docs] def iter_ancestors_labels( self, ontology: str, iri: str, size: Optional[int] = None, sleep: Optional[int] = None ) -> Iterable[str]: """Iterate over the labels for the descendants of a given term. :param ontology: The name of the ontology :param iri: The IRI of a term :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :yields: labels of the descendants of the given term """ yield from _help_iterate_labels(self.iter_ancestors(ontology, iri, size=size, sleep=sleep))
[docs] def iter_labels( self, ontology: str, size: Optional[int] = None, sleep: Optional[int] = None ) -> Iterable[str]: """Iterate over the labels of terms in the ontology. Automatically wraps the pager returned by the OLS. :param ontology: The name of the ontology :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :yields: labels of terms in the ontology """ yield from _help_iterate_labels(self.iter_terms(ontology=ontology, size=size, sleep=sleep))
[docs] def iter_hierarchy( self, ontology: str, size: Optional[int] = None, sleep: Optional[int] = None ) -> Iterable[Tuple[str, str]]: """Iterate over parent-child relation labels. :param ontology: The name of the ontology :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :yields: pairs of parent/child labels """ for term in self.iter_terms(ontology=ontology, size=size, sleep=sleep): try: hierarchy_children_link = term["_links"]["hierarchicalChildren"]["href"] except KeyError: # there's no children for this one continue response = requests.get(hierarchy_children_link).json() for child_term in response["_embedded"]["terms"]: yield term["label"], child_term["label"] # TODO handle different relation types
[docs] def get_description(self, ontology: str) -> Optional[str]: """Get the description of a given ontology. :param ontology: The name of the ontology :returns: The description of the ontology. """ response = self.get_ontology(ontology) return response["config"].get("description")
[docs] class EBIClient(Client): """The first-party instance of the OLS. .. seealso:: https://www.ebi.ac.uk/ols4 """ def __init__(self): """Initialize the client.""" super().__init__(base_url="https://www.ebi.ac.uk/ols4")
[docs] class TIBClient(Client): """The TIB instance of the OLS. With its new Terminology Service, TIB – Leibniz Information Centre for Science and Technology and University Library provides a single point of access to terminology from domains such as architecture, chemistry, computer science, mathematics and physics. .. seealso:: https://service.tib.eu/ts4tib/ """ def __init__(self): """Initialize the client.""" super().__init__(base_url="https://service.tib.eu/ts4tib")
[docs] class ZBMedClient(Client): """The ZB Med instance of the OLS. .. seealso:: https://semanticlookup.zbmed.de/ols """ def __init__(self): """Initialize the client.""" super().__init__(base_url="https://semanticlookup.zbmed.de/ols")
[docs] class MonarchClient(Client): """The Monarch Initiative instance of the OLS. .. seealso:: https://ols.monarchinitiative.org/ """ def __init__(self): """Initialize the client.""" super().__init__(base_url="https://ols.monarchinitiative.org/")
[docs] class FraunhoferClient(Client): """The Fraunhofer SCAI instance of the OLS. .. warning:: Fraunhofer SCAI resources are typically not maintained, do not rely on this. .. seealso:: https://rohan.scai.fraunhofer.de """ def __init__(self): """Initialize the client.""" super().__init__(base_url="https://rohan.scai.fraunhofer.de")