Source code for tidytcells.tr._query
import re
from typing import Dict, FrozenSet, Optional, Type
from tidytcells import _utils
from tidytcells._utils import Parameter
from tidytcells._query_engine import (
QueryEngine,
HomoSapiensTrQueryEngine,
MusMusculusTrQueryEngine,
)
QUERY_ENGINES: Dict[str, Type[QueryEngine]] = {
"homosapiens": HomoSapiensTrQueryEngine,
"musmusculus": MusMusculusTrQueryEngine,
}
[docs]
def query(
species: str,
precision: Optional[str] = None,
functionality: Optional[str] = None,
contains_pattern: Optional[str] = None,
) -> FrozenSet[str]:
"""
Query the list of all known TR genes / alleles.
.. topic:: Supported species
- ``"homosapiens"``
- ``"musmusculus"``
:param species:
Species to query (see above for supported species).
Defaults to ``"homosapiens"``.
:type species:
str
:param precision:
The level of precision to query.
``allele`` will query from the set of all possible alleles.
``gene`` will query from the set of all possible genes.
Defaults to ``allele``.
:type precision:
str
:param functionality:
Gene/allele functionality to subset by.
``"any"`` queries from all possible genes/alleles.
``"F"`` queries from functional genes/alleles.
``"NF"`` queries from psuedogenes and ORFs.
``"P"`` queries from pseudogenes.
``"ORF"`` queries from ORFs.
An allele is considered queriable if its functionality label matches the description.
A gene is considered queriable if at least one of its alleles' functionality label matches the description.
Defaults to ``"any"``.
:type functionality:
str
:param contains_pattern:
An optional regular expression string which will be used to filter the query result.
If supplied, only genes/alleles which contain the regular expression will be returned.
Defaults to ``None``.
:type contains_pattern:
str
:return:
The set of all genes / alleles that satisfy the given constraints.
:rtype:
FrozenSet[str]
.. topic:: Example usage
List all known variants for the human TR gene TRBV6-1.
>>> tt.tr.query(species="homosapiens", contains_pattern="TRBV6-1")
frozenset({'TRBV6-1*01'})
List all known *Mus musculus* TRAV genes that have at least one allele which is a non-functional ORF.
>>> tt.tr.query(species="musmusculus", precision="gene", functionality="ORF", contains_pattern="TRAV")
frozenset({'TRAV21/DV12', 'TRAV14D-1', 'TRAV13-3', 'TRAV9D-2', 'TRAV5D-4', 'TRAV12D-3', 'TRAV12-1', 'TRAV18', 'TRAV11D'})
"""
species = (
Parameter(species, "species")
.set_default("homosapiens")
.throw_error_if_not_of_type(str)
.value
)
precision = (
Parameter(precision, "precision")
.set_default("allele")
.throw_error_if_not_one_of("allele", "gene")
.value
)
functionality = (
Parameter(functionality, "functionality")
.set_default("any")
.throw_error_if_not_one_of("any", "F", "NF", "P", "ORF")
.value
)
contains_pattern = (
Parameter(contains_pattern, "contains_pattern")
.throw_error_if_not_of_type(str, optional=True)
.value
)
species = _utils.clean_and_lowercase(species)
species_is_supported = species in QUERY_ENGINES
if not species_is_supported:
raise ValueError(f"Unsupported species: {species}. No data available.")
query_engine = QUERY_ENGINES[species]
result = query_engine.query(precision, functionality)
if contains_pattern is None:
return result
results_containing_substring = [i for i in result if re.search(contains_pattern, i)]
return frozenset(results_containing_substring)