Source code for tidytcells.ig._get_aa_sequence

from tidytcells._resources import HOMOSAPIENS_IG_AA_SEQUENCES
from tidytcells import _utils
from tidytcells._utils import Parameter
from typing import Dict, Optional


SUPPORTED_SPECIES_AND_THEIR_AA_SEQUENCES = {
    "homosapiens": HOMOSAPIENS_IG_AA_SEQUENCES,
}


[docs] def get_aa_sequence( symbol: Optional[str] = None, species: Optional[str] = None, gene: Optional[str] = None, ) -> Dict[str, str]: """ Look up the amino acid sequence of a given IG allele. .. topic:: Supported species - ``"homosapiens"`` :param symbol: Standardized allele symbol. Note that the symbol must be specified to the level of the allele. Note that some alleles, notably those of non-functional genes, will not have resolvable amino acid sequences. :type symbol: str :param species: Species to which the IG gene in question belongs (see above for supported species). Defaults to ``"homosapiens"``. :type species: str :param gene: Alias for `symbol`. :type gene: str :return: A dictionary with keys corresponding to names of different sequence regions within the allele, and values corresponding to their amino acid sequences. :rtype: Dict[str, str] .. topic:: Example usage Get amino acid sequence information about the human V gene IGHV1-18*02. >>> tt.ig.get_aa_sequence(gene="IGHV1-18*02", species="homosapiens") {'CDR1-IMGT': 'GYTFTSYG', 'CDR2-IMGT': 'ISAYNGNT', 'FR1-IMGT': 'QVQLVQSGAEVKKPGASVKVSCKAS', 'FR2-IMGT': 'ISWVRQAPGQGLEWMGW', 'FR3-IMGT': 'NYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTA', 'V-REGION': 'QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGISWVRQAPGQGLEWMGWISAYNGNTNYAQKLQGRVTMTTDTSTSTAYMELRSLRSDDTA'} Get amino acid sequence information about the human J gene IGLJ1*01. >>> tt.ig.get_aa_sequence(gene="IGLJ1*01", species="homosapiens") {'J-REGION': 'YVFGTGTKVTVL'} """ symbol = ( Parameter(symbol, "symbol") .resolve_with_alias(gene, "gene") .throw_error_if_not_of_type(str) .value ) species = ( Parameter(species, "species") .set_default("homosapiens") .throw_error_if_not_of_type(str) .value ) species = _utils.clean_and_lowercase(species) species_is_supported = species in SUPPORTED_SPECIES_AND_THEIR_AA_SEQUENCES if not species_is_supported: raise ValueError(f"Unsupported species: {species}. No data available.") aa_sequence_dict = SUPPORTED_SPECIES_AND_THEIR_AA_SEQUENCES[species] if symbol in aa_sequence_dict: return aa_sequence_dict[symbol] raise ValueError(f"No data found for IG gene {symbol} for species {species}.")