Source code for ols_client.client

"""Client classes for the OLS."""

import logging
import time
from collections.abc import Iterable
from typing import Any, TypeAlias, cast
from urllib.parse import quote

import requests

__all__ = [
    # Base client
    "Client",
    # Concrete
    "EBIClient",
    "FraunhoferClient",
    "MonarchClient",
    "TIBClient",
    "ZBMedClient",
]

logger = logging.getLogger(__name__)


def _iterate_response_terms(response: dict[str, Any]) -> Iterable[dict[str, Any]]:
    """Iterate over the terms in the given response."""
    yield from response["_embedded"]["terms"]


def _quote(iri: str) -> str:
    # must be double encoded https://www.ebi.ac.uk/ols/docs/api
    iri = quote(iri, safe="")
    iri = quote(iri, safe="")
    return iri


def _help_iterate_labels(term_iterator: Iterable[dict[str, Any]]) -> Iterable[str]:
    for term in term_iterator:
        yield term["label"]


TimeoutHint: TypeAlias = float | int | None
Res: TypeAlias = Any


[docs] class Client: """A client for an OLS instance. It wraps the functions to query the OLS such that alternative base URLs can be used. """ def __init__(self, base_url: str): """Initialize the client. :param base_url: An optional, custom URL for the OLS API. """ base_url = base_url.rstrip("/") if not base_url.endswith("/api"): base_url = f"{base_url}/api" self.base_url = base_url
[docs] def get_json( self, path: str, params: dict[str, Any] | None = None, raise_for_status: bool = True, timeout: TimeoutHint = None, **kwargs: Any, ) -> Res: """Get the response JSON.""" return self.get_response( path=path, params=params, raise_for_status=raise_for_status, timeout=timeout, **kwargs ).json()
[docs] def get_response( self, path: str, params: dict[str, Any] | None = None, raise_for_status: bool = True, timeout: TimeoutHint = None, **kwargs: Any, ) -> requests.Response: """Send a GET request the given endpoint. :param path: The path to query following the base URL, e.g., ``/ontologies``. If this starts with the base URL, it gets stripped. :param params: Parameters to pass through to :func:`requests.get` :param raise_for_status: If true and the status code isn't 200, raise an exception :param timeout: The timeout, defaults to 5 seconds if not given :param kwargs: Keyword arguments to pass through to :func:`requests.get` :returns: The response from :func:`requests.get` """ if not params: params = {} if path.startswith(self.base_url): path = path[len(self.base_url) :] url = self.base_url + "/" + path.lstrip("/") res = requests.get(url, params=params, timeout=timeout or 5, **kwargs) if raise_for_status: res.raise_for_status() return res
[docs] def get_paged( self, path: str, key: str | None = None, size: int | None = None, sleep: int | None = None, timeout: TimeoutHint = None, ) -> Iterable[dict[str, Any]]: """Iterate over all terms, lazily with paging. :param path: The url to query :param key: The key to slice from the _embedded field :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to none. :param timeout: The timeout, defaults to 5 seconds if not given. Applied both to initial request and each page :yields: A terms in an ontology :raises ValueError: if an invalid size is given """ if size is None: size = 500 elif size > 500: raise ValueError(f"Maximum size is 500. Given: {size}") res_json = self.get_json(path, timeout=timeout, params={"size": size}) yv = res_json["_embedded"] if key: yv = yv[key] yield from yv next_href = (res_json.get("_links") or {}).get("href") while next_href: if sleep is not None: time.sleep(sleep) loop_res_json = requests.get(next_href, timeout=timeout).json() yv = loop_res_json["_embedded"] if key: yv = yv[key] yield from yv next_href = (loop_res_json.get("_links") or {}).get("href")
[docs] def get_ontologies(self) -> Iterable[dict[str, Any]]: """Get all ontologies.""" return self.get_paged("/ontologies", key="ontologies")
[docs] def get_ontology(self, ontology: str) -> dict[str, Any]: """Get the metadata for a given ontology. :param ontology: The name of the ontology :return: The dictionary representing the JSON from the OLS """ return cast(dict[str, Any], self.get_json(f"/ontologies/{ontology}"))
[docs] def get_term(self, ontology: str, iri: str) -> dict[str, Any]: """Get the data for a given term. :param ontology: The name of the ontology :param iri: The IRI of a term :returns: Results about the term """ return cast( dict[str, Any], self.get_json(f"/ontologies/{ontology}/terms", params={"iri": iri}) )
[docs] def search( self, query: str, query_fields: Iterable[str] | None = None, params: dict[str, Any] | None = None, ) -> Res: """Search the OLS with the given term. :param query: The query to search :param query_fields: Fields to query :param params: Additional params to pass through to :func:`get_json` :return: dict :returns: A list of search results """ params = dict(params or {}) params["q"] = query if query_fields: params["queryFields"] = ",".join(query_fields) return self.get_json("/search", params=params)["response"]["docs"]
[docs] def suggest(self, query: str, ontology: None | str | list[str] = None) -> Res: """Suggest terms from an optional list of ontologies. :param query: The query to suggest :param ontology: The ontology or list of ontologies :returns: A list of suggestion results .. seealso:: https://www.ebi.ac.uk/ols/docs/api#_suggest_term """ params = {"q": query} if ontology: params["ontology"] = ",".join(ontology) if isinstance(ontology, list) else ontology return self.get_json("/suggest", params=params)
[docs] def iter_terms( self, ontology: str, size: int | None = None, sleep: int | None = None, timeout: TimeoutHint = None, ) -> Iterable[dict[str, Any]]: """Iterate over all terms, lazily with paging. :param ontology: The name of the ontology :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :param timeout: The timeout, defaults to 5 seconds if not given, applied to each page :yields: Terms in the ontology """ yield from self.get_paged( f"/ontologies/{ontology}/terms", key="terms", size=size, sleep=sleep, timeout=timeout )
[docs] def iter_ancestors( self, ontology: str, iri: str, size: int | None = None, sleep: int | None = None, ) -> Iterable[dict[str, Any]]: """Iterate over the ancestors of a given term. :param ontology: The name of the ontology :param iri: The IRI of a term :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :yields: the descendants of the given term """ yield from self.get_paged( f"ontologies/{ontology}/terms/{_quote(iri)}/ancestors", key="terms", size=size, sleep=sleep, )
[docs] def iter_hierarchical_ancestors( self, ontology: str, iri: str, size: int | None = None, sleep: int | None = None, ) -> Iterable[dict[str, Any]]: """Iterate over the hierarchical of a given term. :param ontology: The name of the ontology :param iri: The IRI of a term :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :yields: the descendants of the given term """ yield from self.get_paged( f"ontologies/{ontology}/terms/{_quote(iri)}/hierarchicalAncestors", key="terms", size=size, sleep=sleep, )
[docs] def iter_ancestors_labels( self, ontology: str, iri: str, size: int | None = None, sleep: int | None = None ) -> Iterable[str]: """Iterate over the labels for the descendants of a given term. :param ontology: The name of the ontology :param iri: The IRI of a term :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :yields: labels of the descendants of the given term """ yield from _help_iterate_labels(self.iter_ancestors(ontology, iri, size=size, sleep=sleep))
[docs] def iter_labels( self, ontology: str, size: int | None = None, sleep: int | None = None ) -> Iterable[str]: """Iterate over the labels of terms in the ontology. :param ontology: The name of the ontology :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :yields: labels of terms in the ontology This function automatically wraps the pager returned by the OLS. """ yield from _help_iterate_labels(self.iter_terms(ontology=ontology, size=size, sleep=sleep))
[docs] def iter_hierarchy( self, ontology: str, size: int | None = None, sleep: int | None = None, timeout: TimeoutHint = None, ) -> Iterable[tuple[str, str]]: """Iterate over parent-child relation labels. :param ontology: The name of the ontology :param size: The size of each page. Defaults to 500, which is the maximum allowed by the EBI. :param sleep: The amount of time to sleep between pages. Defaults to 0 seconds. :param timeout: The timeout, defaults to 5 seconds if not given :yields: pairs of parent/child labels """ for term in self.iter_terms(ontology=ontology, size=size, sleep=sleep, timeout=timeout): try: hierarchy_children_link = term["_links"]["hierarchicalChildren"]["href"] except KeyError: # there's no children for this one continue response = requests.get(hierarchy_children_link, timeout=timeout).json() for child_term in response["_embedded"]["terms"]: yield term["label"], child_term["label"] # TODO handle different relation types
[docs] def get_description(self, ontology: str) -> str | None: """Get the description of a given ontology. :param ontology: The name of the ontology :returns: The description of the ontology. """ response = self.get_ontology(ontology) return cast(str | None, response["config"].get("description"))
[docs] def get_embedding(self, ontology: str, iri: str) -> list[float]: """Get the text-based embedding for a term.""" return cast( list[float], self.get_json(f"v2/ontologies/{ontology}/classes/{_quote(iri)}/llm_embedding"), )
[docs] class EBIClient(Client): """The first-party instance of the OLS. .. seealso:: https://www.ebi.ac.uk/ols4 """ def __init__(self) -> None: """Initialize the client.""" super().__init__(base_url="https://www.ebi.ac.uk/ols4")
[docs] class TIBClient(Client): """The TIB instance of the OLS. With its new Terminology Service, TIB Leibniz Information Centre for Science and Technology and University Library provides a single point of access to terminology from domains such as architecture, chemistry, computer science, mathematics and physics. .. seealso:: https://service.tib.eu/ts4tib/ """ def __init__(self) -> None: """Initialize the client.""" super().__init__(base_url="https://service.tib.eu/ts4tib")
[docs] class ZBMedClient(Client): """The ZB Med instance of the OLS. .. seealso:: https://semanticlookup.zbmed.de/ols """ def __init__(self) -> None: """Initialize the client.""" super().__init__(base_url="https://semanticlookup.zbmed.de/ols")
[docs] class MonarchClient(Client): """The Monarch Initiative instance of the OLS. .. seealso:: https://ols.monarchinitiative.org/ """ def __init__(self) -> None: """Initialize the client.""" super().__init__(base_url="https://ols.monarchinitiative.org/")
[docs] class FraunhoferClient(Client): """The Fraunhofer SCAI instance of the OLS. .. warning:: Fraunhofer SCAI resources are typically not maintained, do not rely on this. .. seealso:: https://rohan.scai.fraunhofer.de """ def __init__(self) -> None: """Initialize the client.""" super().__init__(base_url="https://rohan.scai.fraunhofer.de")