Source code for tidytcells.aa._standardize
import warnings
from tidytcells._resources import AMINO_ACIDS
from tidytcells._utils import Parameter
[docs]
def standardize(seq: str, on_fail: str = "reject", suppress_warnings: bool = False):
"""
Ensures that a string value looks like a valid amino acid sequence.
:param seq:
String value representing an amino acid sequence.
:type seq:
str
:param on_fail:
Behaviour when standardization fails.
If set to ``"reject"``, returns ``None`` on failure.
If set to ``"keep"``, returns the original input.
Defaults to ``"reject"``.
:type on_fail:
str
:param suppress_warnings:
Disable warnings that are usually emitted when standardisation fails.
Defaults to ``False``.
:type suppress_warnings:
bool
:return:
Capitalised version of ``seq``, if seq is a valid amino acid sequence.
Otherwise follow behaviour set by ``on_fail``.
:rtype:
Union[str, None]
.. topic:: Example usage
Strings that look like amino acid sequences will be accepted, and returned in capitalised form.
>>> tt.aa.standardize("sqllnakyl")
'SQLLNAKYL'
Any strings that contain characters that cannot be recognised as amino acids will be rejected, and the function will return ``None``.
>>> result = tt.aa.standardize("sqll?akyl")
UserWarning: Input sqll?akyl was rejected as it is not a valid amino acid sequence.
>>> print(result)
None
.. topic:: Decision Logic
To provide an easy way to gauge the scope and limitations of standardization, below is a simplified overview of the decision logic employed when attempting to standardize an amino acid sequence.
For more detail, please refer to the `source code <https://github.com/yutanagano/tidytcells>`_.
.. code-block:: none
IF input sequence contains non-amino acid symbols:
set standardization status to failed
ELSE:
set standardization status to successful
IF standardization status is set to successful:
RETURN standardized sequence
ELSE:
IF on_fail is set to "reject":
RETURN None
IF on_fail is set to "keep":
RETURN original sequence
"""
Parameter(seq, "seq").throw_error_if_not_of_type(str)
Parameter(on_fail, "on_fail").throw_error_if_not_one_of("reject", "keep")
Parameter(suppress_warnings, "suppress_warnings").throw_error_if_not_of_type(bool)
original_input = seq
seq = seq.upper()
for char in seq:
if not char in AMINO_ACIDS:
if not suppress_warnings:
warnings.warn(
f"Failed to standardize {original_input}: not a valid amino acid sequence."
)
if on_fail == "reject":
return None
return original_input
return seq
[docs]
def standardise(*args, **kwargs):
"""
Alias for :py:func:`tidytcells.aa.standardize`.
"""
return standardize(*args, **kwargs)