Source code for dasi.models.alignment

"""Alignments.

These classes represent abstract alignments between existing and
potential molecules that could be produced.
"""
import functools
import operator
from collections.abc import Sized
from typing import Dict
from typing import Generator
from typing import List
from typing import Union

from .molecule import MoleculeType
from dasi.exceptions import AlignmentException
from dasi.utils import argsorted
from dasi.utils import Region


class RepresentsMolecule:
    """Mixin for molecular sized alignments or alignment groups."""

    def __init__(self, query_region: Region, atype: str):
        self.query_region = query_region
        self.type = atype
        if atype not in MoleculeType.types:
            raise ValueError("atype '{}' not in MoleculeTypes".format(atype))

    def size_ok(self):
        """Determine if the size of this molecule is 'acceptable' from the
        bounded molecule type.

        :return: whether this passes the size requirement of the molecule type.
        """
        size = len(self.query_region)
        mol_type = MoleculeType.types[self.type]
        if mol_type.min_size is not None and size < mol_type.min_size:
            return False
        if mol_type.max_size is not None and size > mol_type.max_size:
            return False
        return True


[docs]class Alignment(RepresentsMolecule, Sized): """A pair of Regions that 'aligns' two regions of DNA sequences. All regions must always be the same length. A subregion of both regions may be taken. """ __slots__ = [ "query_region", "subject_region", "type", "query_key", "subject_key", "uid", ]
[docs] def __init__( self, query_region: Region, subject_region: Region, atype: str, query_key: str, subject_key: str, meta: dict = None, ): """Makes an alignment between two regions of sequences. Validates the regions are the same length. :param query_region: Query region this alignment aligns to :param subject_region: Subject region this alignment aligns to. :param atype: Type of alignment :param query_key: The record identifier for the query :param subject_key: The record identifier for the subject """ super().__init__(query_region, atype) assert query_region.direction == 1 self.subject_region = subject_region self.validate() self.query_key = query_key self.subject_key = subject_key if meta is None: meta = {} self.meta = meta
def validate(self): if not len(self.query_region) == len(self.subject_region): raise AlignmentException( "Regions must have the same size: {} vs {}. {} vs {}".format( len(self.query_region), len(self.subject_region), self.query_region, self.subject_region, ) ) def is_perfect_subject(self): return len(self.subject_region) == self.subject_region.context_length
[docs] def sub_region(self, qstart: int, qend: int, atype=None) -> "Alignment": """Returns a copy of the alignment between the inclusive start and end relative to the query region. :param qstart: start of the query sub region :param qend: end of the query sub region :param atype: optional type of alignment to return :return: """ query_copy = self.query_region.sub(qstart, qend) i = self.query_region.i(qstart) if i < 0: i = self.query_region.i(qstart + self.query_region.context_length) if i == len(self.subject_region): i = 0 if self.subject_region.direction == -1: b = len(self.subject_region) - i a = b - len(query_copy) if a == b == len(self.subject_region): a = b = 0 subject_copy = self.subject_region[a:b] else: subject_copy = self.subject_region[i : i + len(query_copy)] if atype is None: atype = self.type self.validate() return self.__class__( query_region=query_copy, subject_region=subject_copy, atype=atype, query_key=self.query_key, subject_key=self.subject_key, )
[docs] def copy(self, atype=None) -> "Alignment": """Do shallow copy of this alignment. Query and subject regions between this and the copied alignment will be identical. :param atype: new alignment type :return: """ if atype is None: atype = self.type return self.__class__( self.query_region, self.subject_region, atype, self.query_key, self.subject_key, )
def __len__(self) -> int: return len(self.query_region) def __str__(self) -> str: return "<{} {} {} {}>".format( self.__class__.__name__, self.type, self.query_region, self.subject_region ) @staticmethod def _rhash(region: Region): return (region.a, region.b, region.c, region.context_length, region.cyclic) def eq_hash(self): return ( (self.query_key, self.subject_key, self.type) + self._rhash(self.query_region) + self._rhash(self.subject_region) ) def __eq__(self, other: "Alignment"): return self.eq_hash() == other.eq_hash() def __repr__(self) -> str: return str(self)
[docs]class AlignmentGroupBase(RepresentsMolecule): """A representative Alignment representing a group of alignments.""" __slots__ = ["query_region", "_alignments", "name", "type", "meta"]
[docs] def __init__( self, alignments: List[Alignment], group_type: str, name: str = None, query_region: Region = None, meta: dict = None, ): """ :param alignments: :param group_type: :param name: :param query_region: :param meta: """ super().__init__(query_region, group_type) self._alignments = tuple(alignments) self.name = name if meta is None: meta = {} self.meta = meta
@property def query_key(self) -> str: """Return the query key associated with the query region.""" return self.alignments[0].query_key @property def subject_regions(self) -> List[Region]: """Return the list of subject regions in this alignment group.""" return [a.subject_region for a in self.alignments] @property def subject_keys(self) -> List[str]: """Return the list of subject keys in this alignment group.""" return [a.subject_key for a in self.alignments] @property def alignments(self): return self._alignments
[docs] def sub_region(self, qstart: int, qend: int, atype: str) -> "AlignmentGroupBase": """Produce a new alignment group with sub-regions of the query region and subject regions at the specified new indicies.""" alignments_copy = [] for a in self.alignments: alignments_copy.append(a.sub_region(qstart, qend)) for a in alignments_copy: a.type = atype return self.__class__( alignments=alignments_copy, group_type=atype, name="subregion" )
def __repr__(self) -> str: return "<{} {}>".format(self.__class__.__name__, self.query_region)
[docs]class AlignmentGroup(AlignmentGroupBase): """A representative Alignment representing a group of alignments sharing the same starting and ending position for a query sequence.""" __slots__ = list(AlignmentGroupBase.__slots__)
[docs] def __init__( self, alignments: List[Alignment], group_type: str, name: str = None, meta: dict = None, ): super().__init__( alignments=alignments, group_type=group_type, name=name, query_region=alignments[0].query_region, meta=meta, )
[docs] def reindex_alignments(self, indices: List[int]): """Reindex the alignments list by index. :param indices: list of indices :return: None """ if len(indices) != len(self.alignments): raise ValueError( "Cannot reindex. Length of indices ({}) does not match length of" " alignments ({})".format(len(indices), len(self._groupings)) ) self._alignments = tuple(self._alignments[i] for i in indices)
[docs] def prioritize_alignments(self, indices: List[int]): """Prioritize alignments by pushing groupings at the given indices into the front of the alignments list. :param indices: list of indices to prioritize :return: None """ other_indices = [] for i in range(len(self.alignments)): if i not in indices: other_indices.append(i) new_indices = indices + other_indices self.reindex_alignments(new_indices)
# class RepresentsPCR(AlignmentGroupBase): # # @abstractmethod # def get_templates(self): # pass
[docs]class PCRProductAlignmentGroup(AlignmentGroupBase): """Represents a PCR product alignment from a template alignment and forward and reverse alignments. Represents several situations: :: Situations the PCRProductAlignmentGroup represents: Rev primer with overhang <-------- ------------------ ----> Primers with overhangs <-------- ------------------ --------> Primers 'within' the template <----- ------------------ --------> And so on... """ __slots__ = list(set(AlignmentGroupBase.__slots__ + ["template", "fwd", "rev"]))
[docs] def __init__( self, fwd: Union[None, Alignment], template: Alignment, rev: Union[None, Alignment], query_region: Region, group_type: str, meta: dict = None, ): """Initialize a new PCRProductAlignmentGroup. Represents a PCR product. Query region end points are determined from the first non-None alignment and the last non-None alignment. Produces *one new alignment*, the template alignment, which is the intersection of the provided template alignment and the query_region, which represents the exact region for which PCR primers ought to align in a PCR reaction. :param fwd: the forward primer alignment. Can be 'inside' the template or have an overhang. :param template: template alignment :param rev: the reverse primer alignment. Can be 'within' the template or have an overhang. :param group_type: group type name :param meta: extra meta data """ if fwd is None and rev is None: raise AlignmentException("Must provide either a fwd and/or rev alignments") alignments = [x for x in [fwd, template, rev] if x is not None] a = alignments[0].query_region.a b = alignments[-1].query_region.b query_region = query_region.new(a, b) self.template = template self.fwd = fwd self.rev = rev super().__init__( alignments=alignments, group_type=group_type, query_region=query_region, meta=meta, )
@property def subject_keys(self): raise AlignmentException( "Use subject keys directly, as in `self.fwd.subject_key`" )
# TODO: MultiPCRProductAlignmentGroup is a seriously convoluted class # Being such an important class, this should be very easy to understand. # `alignments` property should never be accessed directly, as # the concept of 'template' is different here (see `get_template`) #
[docs]class MultiPCRProductAlignmentGroup(AlignmentGroupBase): """A PCR Product Alignment with redundant forward primer, reverse primer, and template alignments. Essentially, this represents region of a designed sequence *that could* be generated from a number of PCR reactions. Each PCR reaction is tracked in the `groupings`, which is a list of dictionaries with keys 'fwd', 'rev', 'template' and valued by Alignments. Now, there are alot of ways to produce PCR products. """ __slots__ = list(set(AlignmentGroupBase.__slots__ + ["_groupings"])) EXPECTED_KEYS = "fwd", "rev", "template" TEMPLATE_ACCESSOR = "adjusted_template"
[docs] def __init__( self, groupings: List[Dict[str, Alignment]], query_region: Region, group_type: str, ): """Initializes a new MultiPCRProductAlignmentGroup. This object represents a region of DNA that can be produced from a number of different forward, reverse, and template DNAs, all producing the same sequence. :param groupings: dictionary of groupings with the "fwd", "rev" and "template" keys. :param query_region: query region :param group_type: group type """ for g in groupings: for key in self.EXPECTED_KEYS: if key not in g: raise ValueError("Grouping is missing key '{}'".format(key)) self._groupings = groupings self._templates = [None] * len(self._groupings) alignments = self._get_alignments() super().__init__( alignments=alignments, query_region=query_region, group_type=group_type )
# @property # def groupings(self): # return self._groupings def _get_alignments(self): accumulated = {} for key in self.EXPECTED_KEYS: accumulated.setdefault(key, list()) for g in self._groupings: if g[key]: accumulated[key].append(g[key]) alignments = [] for key in self.EXPECTED_KEYS: alignments += accumulated[key] return alignments
[docs] def reindex_groupings(self, indices: List[int]): """Reindex the groupings list by index. :param indices: list of indices :return: None """ if len(indices) != len(self._groupings): raise ValueError( "Cannot reindex. Length of indices ({}) does not match length of " "groups ({})".format(len(indices), len(self._groupings)) ) self._groupings = tuple(self._groupings[i] for i in indices) self._alignments = tuple(self._get_alignments())
[docs] def prioritize_groupings(self, indices: List[int]): """Prioritize groupings by pushing groupings at the given indices into the front of the grouping list. :param indices: list of indices to prioritize :return: None """ other_indices = [] for i in range(len(self._groupings)): if i not in indices: other_indices.append(i) new_indices = indices + other_indices self.reindex_groupings(new_indices)
# TODO: WHAT IS THIS METHOD???
[docs] def get_template(self, index: int = 0): """Here we take the intersection of the template.query_region and query region. WHY??? Notes this is **not** the 'template' key of the groupings. .. note:: The result of this alignment is used in the primer design. :param index: :return: """ group = self._groupings[index] if self.TEMPLATE_ACCESSOR not in group: template = self._groupings[index]["template"] intersection = template.query_region.intersection(self.query_region) group[self.TEMPLATE_ACCESSOR] = template.sub_region( intersection.a, intersection.b ) return group[self.TEMPLATE_ACCESSOR]
[docs] def iter_templates(self) -> Generator[Alignment, None, None]: """Generator of templates from `get_template`""" for i in range(len(self._groupings)): yield self.get_template(i)
[docs] def get_fwd(self, index: int = 0) -> Alignment: """Get the forward alignment at the specified index. :param index: index of the grouping to access :return: alignment """ group = self._groupings[index] return group["fwd"]
[docs] def get_rev(self, index: int = 0): """Get the reverse alignment at the specified index. :param index: index of the grouping to access :return: alignment """ group = self._groupings[index] return group["rev"]