Source code for jdna.sequence

"""Represent linear or circularized nucleotides."""
import itertools
import re
from collections import defaultdict
from copy import copy
from enum import IntFlag
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import Sequence as TypingSequence
from typing import Tuple
from typing import Union

import primer3
from Bio import Restriction

from jdna.align import AlignInterface
from jdna.alphabet import AmbiguousDNA
from jdna.alphabet import UnambiguousDNA
from jdna.format import format_sequence
from jdna.io import IOInterface
from jdna.linked_list import DoubleLinkedList
from jdna.linked_list import LinkedListMatch
from jdna.linked_list import Node
from jdna.utils import random_color
from jdna.viewer import SequenceViewer
from jdna.viewer import StringColumn
from jdna.viewer import ViewerAnnotationFlag

# from jdna.align import Align


[docs]class SequenceFlags(IntFlag):
    """Constants/Flags for sequences."""

    FORWARD = 1
    REVERSE = -1
    TOP = 1
    BOTTOM = -1


[docs]class Feature:
    """An annotation for a sequence."""

    def __init__(self, name, type=None, strand=None, color=None):
        self.name = name
        if type is None:
            type = "misc"
        self.type = type
        if strand is None:
            strand = SequenceFlags.FORWARD
        self.strand = strand
        if color is None:
            color = random_color()
        self.color = color
        # self._nodes = set()

    def reverse(self) -> "Sequence":
        self.strand = -1 * self.strand
        return self

    def __str__(self) -> str:
        return "<Feature name='{name}' type='{tp}' color='{color}'".format(
            name=self.name, tp=self.type, color=self.color
        )

    def __repr__(self) -> str:
        return str(self)

    def __copy__(self) -> "Sequence":
        return self.__class__(self.name, self.type, self.strand, self.color)

    # @property
    # def nodes(self):
    #     return self._nodes
    #
    # def segments(self):
    #     return Sequence.segments(self.nodes)
    # visited = set()
    # pairs = set()
    # stop = lambda x: x not in self._nodes
    # for n in self._nodes:
    #     if n not in visited:
    #         tail = n
    #         for tail in n.fwd(stop_criteria=stop):
    #             visited.add(tail)
    #         head = n
    #         for head in n.rev(stop_criteria=stop):
    #             visited.add(head)
    #         pairs.add((head, tail))
    # return pairs

    def is_multipart(self) -> bool:
        if len(self.segments) > 1:
            return True
        return False

    def _bind(self, nodes):
        pass
        # for n in nodes:
        #     self._nodes.add(n)

    # def _unbind(self, nodes):
    #     for n in nodes:
    #         if n in self._nodes:
    #             self._nodes.remove(n)


[docs]class BindPos(LinkedListMatch):
    def __init__(
        self,
        template_bounds: Tuple["Nucleotide", "Nucleotide"],
        query_bounds: Tuple["Nucleotide", "Nucleotide"],
        template: "Sequence",
        query: "Sequence",
        direction: int,
        strand=SequenceFlags.TOP,
    ):
        """Makes a sequence binding position.

        :param template_bounds_list: list of 2 len tuples containing starts and ends from a template
        :type template_bounds_list: template DoubleLinkedList
        :param query_bounds_list: list of 2 len tuples containing starts and ends from a query
        :type query_bounds_list: query DoubleLinkedList
        :param template: the template
        :type template: DoubleLinkedList
        :param query: the query
        :type query: DoubleLinkedList
        :param direction: If SequenceFlags.FORWARD, the binding position indicates binding forward, to the bottom strand
                            of a dsDNA sequence.
        :type direction: int
        :param strand: If SequenceFlags.BOTTOM, then the query is assumed to be the reverse_complement of the original
                        query
        :type strand: int
        """
        super().__init__(template_bounds, query_bounds, template, query)
        self.direction = direction
        self.strand = strand

        if self.direction == SequenceFlags.REVERSE:
            self.anneal = query.copy_slice(*self.query_bounds[::-1])
            self.five_prime_overhang = query.new_slice(None, self.query_end.prev())
            self.three_prime_overhang = query.new_slice(self.query_start.next(), None)
        else:
            self.anneal = query.copy_slice(*self.query_bounds)
            self.five_prime_overhang = query.new_slice(None, self.query_start.prev())
            self.three_prime_overhang = query.new_slice(self.query_end.next(), None)
        # self.anneal = self.primer[query_span[0]:query_span[1]+1]
        # self.five_prime_overhang = self.primer[:query_span[0]]
        # self.three_prime_overhang = self.primer[query_span[1]+1:]

    # def innitialize(self):
    #     if self.direction == SequenceFlags.REVERSE:
    #         if self.anneal:
    #             self.anneal.reverse_complement()
    #         if self.five_prime_overhang:
    #             self.five_prime_overhang.reverse_complement()
    #         if self.three_prime_overhang:
    #             self.three_prime_overhang.reverse_complement()
    #             length = len(self.three_prime_overhang)
    #         else:
    #             length = 0
    #         self.three_prime_overhang, self.five_prime_overhang = self.five_prime_overhang, self.three_prime_overhang
    #         self.query_span = (self.query_span[0] + length, self.query_span[1] + length)

[docs]    @classmethod
    def from_match(
        cls, linked_list_match, template, query, direction, strand=SequenceFlags.TOP
    ):
        """Return a binding pos.

        :param linked_list_match: the linked list match
        :type linked_list_match: LinkedListMatch
        :return:
        :rtype:
        """
        return cls(
            linked_list_match.template_bounds,
            linked_list_match.query_bounds,
            template,
            query,
            direction,
            strand=strand,
        )

    @property
    def template_anneal(self):
        if self.strand == SequenceFlags.FORWARD:
            return Sequence.new_slice(self.start, self.end)
        else:
            return Sequence.new_slice(self.start, self.end)

    @property
    def query_anneal(self):
        if self.direction == SequenceFlags.FORWARD:
            return Sequence.new_slice(self.query_start, self.query_end)
        else:
            return Sequence.new_slice(
                self.query_end, self.query_start
            ).reverse_complement()

    def __repr__(self):
        return "<{cls} span={span} direction='{direction}' strand='{strand}' 5'='{five}' anneal='{anneal}' 3'='{three}'>".format(
            cls=self.__class__.__name__,
            span=self.span,
            direction=self.direction,
            strand=self.strand,
            five=self.five_prime_overhang.__repr__(),
            three=self.three_prime_overhang.__repr__(),
            anneal=self.anneal.__repr__(),
        )


[docs]class Nucleotide(Node):
    """Represents a biological nucleotide.

    Serves a :class:`Node` in teh :class:`Sequence` object.
    """

    __slots__ = ["data", "__next", "__prev", "_features", "alphabet"]

    def __init__(self, base, alphabet=AmbiguousDNA):
        """Nucleotide constructor.

        :param base: base as a single character string
        :type base: basestring
        """
        super().__init__(base)
        self._features = set()
        self.alphabet = alphabet

[docs]    @classmethod
    def random(cls):
        """Generate a random sequence."""
        return cls(UnambiguousDNA.random())

    @property
    def base(self):
        return self.data

[docs]    def equivalent(self, other) -> bool:
        return self.alphabet.compare(self.base, other.base)

    def complementary(self, other) -> bool:
        return self.base.upper() == AmbiguousDNA[other.base].upper()

    def to_complement(self):
        self.data = AmbiguousDNA[self.data]

[docs]    def set_next(self, nucleotide):
        self.cut_next()
        super().set_next(nucleotide)
        Nucleotide.fuse_features(self, nucleotide)

[docs]    def set_prev(self, nucleotide):
        self.cut_prev()
        super().set_prev(nucleotide)
        Nucleotide.fuse_features(nucleotide, self)

[docs]    def cut_prev(self):
        return self._cut(cut_prev=True)

[docs]    def cut_next(self):
        return self._cut(cut_prev=False)

    def _cut(self, cut_prev=True):
        for f in self.features:
            self.split_features(split_prev=cut_prev)
        if cut_prev:
            nxt = super().cut_prev()
        else:
            nxt = super().cut_next()
        return nxt

    @property
    def features(self):
        return self._features

    def add_feature(self, feature):
        self.features.add(feature)
        return feature

    def remove_feature(self, feature):
        self.features.remove(feature)

    def feature_fwd(self, feature):
        def stop(x):
            return feature not in x.features

        return self._propogate(lambda x: x.next(), stop_criteria=stop)

    def feature_rev(self, feature):
        def stop(x):
            return feature not in x.features

        return self._propogate(lambda x: x.prev(), stop_criteria=stop)

    def replace_feature(self, old_feature, new_feature):
        self.features[new_feature] = self.features[old_feature]
        self.remove_feature(old_feature)

    def copy_features_from(self, other):
        for f in other.features:
            if f not in self.features:
                self.add_feature(f)
        self._remove_overlapping_features()

    def get_feature_span(self, feature):
        start = self.feature_rev(feature)[-1]
        end = self.feature_fwd(feature)[-1]
        return (start.features[feature], end.features[feature])

    # def update_feature_span(self, feature, delta_i):
    #     start = self.feature_rev(feature)[-1]
    #     for n in start.feature_fwd(feature):
    #         n.ffeatures[feature] += delta_i

    def _remove_overlapping_features(self):
        # type: () -> Nucleotide
        feature_pairs = itertools.combinations(list(self.features.keys()), 2)
        tobedel = set()
        for f1, f2 in feature_pairs:
            if f1.name == f2.name:
                tobedel.add(f2)
        for tob in tobedel:
            self.remove_feature(tob)

    @staticmethod
    def _default_fuse_condition(f1, f2):
        return f1.name == f2.name

    @classmethod
    def fuse_features(cls, n1, n2, fuse_condition=None):
        if n1 is None or n2 is None:
            return
        if fuse_condition is None:
            fuse_condition = cls._default_fuse_condition

        if not (n1.next() is n2 and n2.prev() is n1):
            raise Exception("Cannot fuse non-consecutive features")

        for f1 in set(n1.features):
            for f2 in set(n2.features):
                if f1 is not f2 and fuse_condition(f1, f2):
                    for n in n2.feature_fwd(f2):
                        n.add_feature(f1)
                        n.remove_feature(f2)

    #
    # @staticmethod
    # def fuse_features(n1, n2):
    #     if n1 is None:
    #         return
    #     if n2 is None:
    #         return
    #
    #     delset = set()
    #
    #     for f1 in n1.features:
    #         for f2 in n2.features:
    #             f1_pos = n1.features[f1]
    #             f2_pos = n2.features[f2]
    #             f1_copy = copy(f1)
    #             # same name & consecutive position
    #             if f1 is f2:
    #                 continue
    #             if f1.name == f2.name and f1_pos + 1 == f2_pos:
    #                 delset.add((f1, f2, f1_copy))
    #     for f1, f2, f1_copy in delset:
    #         for n in n1.feature_rev(f1):
    #             try:
    #                 n.replace_feature(f1, f1_copy)
    #             except KeyError:
    #                 pass
    #         for n in n2.feature_fwd(f2):
    #             try:
    #                 n.replace_feature(f2, f1_copy)
    #             except KeyError:
    #                 pass

    def split_features(self, split_prev=True):
        x1 = self.prev()
        x2 = self
        if not split_prev:
            # then split_next
            x1 = self
            x2 = next(self)
        # If at the end, no splitting is necessary
        if x1 is None or x2 is None:
            return
        for f in x1.features:
            # If this feature spans
            if f in x2.features:
                # Grab the sequences for the split feature
                frag1 = x1.feature_rev(f)
                frag2 = x2.feature_fwd(f)

                # check if its a cyclic feature
                if x2 in frag1:
                    continue
                if x1 in frag2:
                    continue

                # Make two copies of the feature
                f1 = copy(f)
                f2 = copy(f)

                # Swap original feature for copy
                for n in frag1:
                    n.replace_feature(f, f1)
                for n in frag2:
                    n.replace_feature(f, f2)

    def _clear_features(self):
        self._features = set()

    def copy(self):
        copied = super().copy()
        copied._features = set()
        for f in self.features:
            copied.add_feature(f)
        return copied


[docs]class Sequence(DoubleLinkedList):
    """Represents a biological sequence as a double linked list.

    Can be annotated with features.
    """

[docs]    class DEFAULTS:
        """Sequence defaults."""

        MIN_ANNEAL_BASES = 13
        FOREGROUND_COLORS = ["blue", "red"]
        BACKGROUND_COLORS = None
        ALPHABET = AmbiguousDNA

    FORWARD = SequenceFlags.FORWARD
    REVERSE = SequenceFlags.REVERSE
    TOP = SequenceFlags.TOP
    BOTTOM = SequenceFlags.BOTTOM
    NODE_CLASS = Nucleotide
    counter = itertools.count()

    def __init__(
        self,
        sequence: TypingSequence[Any] = None,
        first: Nucleotide = None,
        name: str = None,
        description: str = "",
        metadata: dict = None,
        cyclic: bool = False,
        alphabet=DEFAULTS.ALPHABET,
    ):
        """

        :param sequence: sequence string
        :type sequence: basestring
        :param first: optional first Nucleotide to use as the 'head' to this Sequence
        :type first: Nucleotide
        :param name: optional name of the sequence
        :type name: basestring
        :param description: optional description of the sequence
        :type description: basestring
        :param metadata: additional sequence metadata
        :type metadata: dict
        :param cyclic: whether to make the sequence circular
        :type cyclic: bool
        :param alphabet: the base pair alphabet of this sequence which used for complementary and comparisons
                        (default: AmbiguousDNA)
        :type alphabet: jdna.alphabet.Alphabet
        """

        self.alphabet = alphabet
        super().__init__(data=sequence, first=first, cyclic=cyclic)
        if name is None:
            name = ""
        self.name = name
        self.description = description
        if metadata is None:
            metadata = dict()
        self.metadata = metadata
        self._global_id = next(Sequence.counter)
        self._io = self.IO.instance(self)
        self._align = self.Align.instance(self)
        if cyclic:
            self.cyclic = cyclic

    def new_node(self, data):
        return self.NODE_CLASS(data, alphabet=self.alphabet)

    @property
    def io(self):
        return self._io

    @property
    def align(self):
        return self._align

    @property
    def global_id(self):
        return self._global_id

[docs]    @classmethod
    def random(cls, length):
        """Generate a random sequence."""
        seq = ""
        for i in range(length):
            seq += UnambiguousDNA.random().upper()
        if seq == "":
            return cls.empty()
        return cls(sequence=seq)

    @property
    def features_list(self):
        """Returns set of features contained in sequence.

        :return: set of features in this sequence
        :rtype: set
        """
        features_set = set()
        for i, n in enumerate(self):
            features_set.update(n.features)
        return tuple(features_set)

    @property
    def features(self, with_nodes=False):
        """Return a list of feature positions.

        :param with_nodes: if True, will return a tuple composed of a feature to
                            position dictionary and a feature to
                            start and end node. If False, will just return a feature to
                            position dictionary
        :type with_nodes: bool
        :return: feature positions dictionary OR tuple of feature positions dictionary
                            and feature node dictionary
        :rtype: tuple
        """
        index = 0
        feature_pos = defaultdict(list)
        feature_nodes = defaultdict(list)
        length = len(self)
        for n in self:
            for f in n.features:
                if feature_pos[f] and feature_pos[f][-1][-1] + 1 == index:
                    feature_pos[f][-1][-1] = index
                    feature_nodes[f][-1][-1] = n
                else:
                    feature_pos[f].append([index, index])
                    feature_nodes[f].append([n, n])
            index += 1

        # capture features that span the origin
        if self.cyclic:
            for k in feature_pos:
                positions = feature_pos[k]
                nodes = feature_nodes[k]
                if len(nodes) > 1:
                    if positions[0][0] == 0 and positions[-1][-1] == length - 1:
                        nodes[0][0] = nodes[-1][0]
                        positions[0][0] = positions[-1][0]
                        nodes.pop()
                        positions.pop()

        if with_nodes:
            return feature_pos, feature_nodes
        return feature_pos

    def feature_nodes(self):
        return self.features(with_nodes=True)[-1]

[docs]    def add_feature(self, start, end, feature):
        """Add a feature to the start and end positions (inclusive)

        :param start: start
        :type start: int
        :param end: end (inclusive)
        :type end: int
        :param feature: the feature to add
        :type feature: Feature
        :return: the added feature
        :rtype: Feature
        """
        feature_nts = list(self.inclusive_range(start, end))
        if end and feature_nts[-1] is not self[end]:
            if not self.cyclic:
                raise IndexError(
                    "Cannot add feature to {} to linear dna with bounds {}".format(
                        (start, end), (0, len(self))
                    )
                )
            else:
                raise IndexError("Cannot add feature to {}".format((start, end)))
        for n in self.inclusive_range(start, end):
            n.add_feature(feature)
        return feature

[docs]    def add_multipart_feature(self, positions, feature):
        """Add a multi-part feature (i.e. a disjointed feature)

        :param positions: list of start and ends as tuples ([(1,100), (110,200)]
        :type positions: list
        :param feature: the feature to add
        :type feature: Feature
        :return: the added feature
        :rtype: Feature
        """
        for i, j in positions:
            self.add_feature(i, j, feature)
        return feature

    # def print_features(self):
    #     raise NotImplementedError()

[docs]    def find_feature_by_name(self, name):
        """Find features by name.

        :param name: feature name
        :type name: basestring
        :return: list of features
        :rtype: list
        """
        found = []
        for feature in self.features:
            if feature.name == name:
                found.append(feature)
        return found

[docs]    def annotate(self, start, end, name, feature_type=None, color=None, strand=None):
        """Annotate a regions.

        :param start: start
        :type start: int
        :param end: end (inclusive)
        :type end: end
        :param name: feature name
        :type name: basestring
        :param feature_type: feature type (default=misc)
        :type feature_type: basestring
        :param color: optional feature color
        :type color: basestring
        :return: new feature
        :rtype: Feature
        """
        return self.add_feature(
            start, end, Feature(name, feature_type, strand=strand, color=color)
        )

    def reverse(self):
        features_set = set()
        if self.is_empty():
            return self
        nodes = self.nodes
        for s in nodes:
            s.swap()
            features_set.update(s.features)
        for f in features_set:
            f.reverse()
        self.head = nodes[-1]
        return self

[docs]    def complement(self):
        """Complement the sequence in place."""
        if self.is_empty():
            return self
        curr = self.head
        visited = set()
        while curr and curr not in visited:
            visited.add(curr)
            curr.to_complement()
            curr = next(curr)
        return self

[docs]    def c(self):
        """Complement the sequence in place."""
        return self.complement()

[docs]    def reverse_complement(self):
        """Reverse complement the sequence in place."""
        self.reverse()
        self.complement()
        return self

[docs]    def rc(self):
        """Reverse complement the sequence in place."""
        return self.reverse_complement()

    def cut(self, i, cut_prev=True):
        fragments = super().cut(i, cut_prev)
        fragments = [Sequence(first=f.head) for f in fragments]
        return fragments

    def clear_features(self):
        for n in self:
            n._clear_features()

    def __copy__(self):
        copied = super().__copy__()
        copied.name = self.name
        copied._global_id = next(self.counter)
        copied.clear_features()
        feature_positions = self.features
        for feature, positions in feature_positions.items():
            copied.add_multipart_feature(positions, copy(feature))
        return copied

    # def anneal_to_bottom_strand(self, other, min_bases=10):
    #     for match in self.find_iter(other,
    #                                 min_query_length=min_bases,
    #                                 direction=self.Direction.REVERSE, ):
    #         yield match
    #
    # def anneal_to_top_strand(self, other, min_bases=10):
    #     for match in self.find_iter(other,
    #                                 min_query_length=min_bases,
    #                                 protocol=lambda x, y: x.complementary(y)):
    #         yield match

[docs]    def anneal_forward(self, other, min_bases=DEFAULTS.MIN_ANNEAL_BASES, depth=None):
        """Anneal a sequence in the forward direction."""
        for match in self.find_iter(
            other,
            min_query_length=min_bases,
            direction=self.Direction.REVERSE,
            depth=depth,
        ):
            yield BindPos.from_match(
                match, self, other, direction=self.Direction.FORWARD
            )

[docs]    def anneal_reverse(self, other, min_bases=DEFAULTS.MIN_ANNEAL_BASES, depth=None):
        """Anneal a sequence in the reverse direction."""
        for match in self.find_iter(
            other,
            min_query_length=min_bases,
            direction=(1, -1),
            protocol=lambda x, y: x.complementary(y),
            depth=depth,
        ):
            yield BindPos.from_match(
                match, self, other, direction=self.Direction.REVERSE
            )

[docs]    def anneal(self, ssDNA, min_bases=DEFAULTS.MIN_ANNEAL_BASES, depth=None):
        """Simulate annealing a single stranded piece of DNA to a
        double_stranded template."""
        for match in self.anneal_forward(ssDNA, min_bases=min_bases, depth=depth):
            yield match
        for match in self.anneal_reverse(ssDNA, min_bases=min_bases, depth=depth):
            yield match

[docs]    def dsanneal(self, dsDNA, min_bases=DEFAULTS.MIN_ANNEAL_BASES, depth=None):
        """Simulate annealing a double stranded piece of DNA to a
        double_stranded template."""
        for binding in self.anneal(dsDNA, min_bases=min_bases, depth=depth):
            yield binding
        for binding in self.anneal(
            dsDNA.copy().reverse_complement(), min_bases=min_bases, depth=depth
        ):
            binding.strand = SequenceFlags.BOTTOM
            yield binding

    def format(self, width=75, spacer=""):
        return format_sequence(str(self), width=width, spacer=spacer)

    @classmethod
    def _apply_features_to_view(cls, sequence, view):
        for feature, positions in sequence.features.items():
            for pos in positions:
                direction = None
                if feature.strand == SequenceFlags.FORWARD:
                    direction = ViewerAnnotationFlag.FORWARD
                elif feature.strand == SequenceFlags.REVERSE:
                    direction = ViewerAnnotationFlag.REVERSE
                view.annotate(
                    pos[0],
                    pos[1],
                    label=feature.name,
                    fill=direction,
                    background=feature.color,
                )

    def view_bindings(self, bindings, view=None):
        if view is None:
            view = self.view(complement=True)
        for b in bindings:
            anneal = b.anneal
            primer_sequence = b.five_prime_overhang + anneal + b.three_prime_overhang
            annotation = StringColumn(
                [
                    str(primer_sequence),
                    " " * len(b.five_prime_overhang)
                    + "|" * len(anneal)
                    + " " * len(b.three_prime_overhang),
                ]
            )
            if b.direction == Sequence.FORWARD:
                view.annotate(b.span[0], b.span[1], annotation)
            if b.direction == Sequence.REVERSE:
                view.annotate(b.span[0], b.span[1], annotation.flip()[::-1], top=False)
        return view

[docs]    def view(
        self,
        indent=10,
        width=85,
        spacer=None,
        complement=False,
        features=True,
        **kwargs
    ):
        """Create a :class:`SequenceViewer` instance from this sequence.
        Printing the view object with annotations and complement will produce
        an output similar to the following:

        .. code::


            > "Unnamed" (550bp)


                                                                        ----------------GFP----------------
                                                                        |<START
                                                                        ----      -----------RFP-----------
            0         CCCAGGACTAGCGACTTTCCGTAACGCGACCTAACACCGGCCGTTCCTTCGAGCCAGGCAAATGTTACGTCACTTCCTTAGATTT
                      GGGTCCTGATCGCTGAAAGGCATTGCGCTGGATTGTGGCCGGCAAGGAAGCTCGGTCCGTTTACAATGCAGTGAAGGAATCTAAA

                      ------GFP------
                      -----------------------------------------RFP-----------------------------------------
            85        TGAACAGCGCCGTACCCCGATATGATATTTAGATATATAGCAGTTACACTTGGGGTTGCTATGGACTTAGATCTGCTGTATGTTT
                      ACTTGTCGCGGCATGGGGCTATACTATAAATCTATATATCGTCAATGTGAACCCCAACGATACCTGAATCTAGACGACATACAAA

                      -----------------------------------------RFP-----------------------------------------
            170       TCTTACCTTCCGCATCAGGGGACAATTCGCCAGTAGAATTCAGTTTGTGCGTGAGAACATAAGATTGAATCCCACGCAGGCACAA
                      AGAATGGAAGGCGTAGTCCCCTGTTAAGCGGTCATCTTAAGTCAAACACGCACTCTTGTATTCTAACTTAGGGTGCGTCCGTGTT

                      ---------------------RFP----------------------
            255       GCAGGGCGGGCAGACTCTATAGGTCCTAAGACCCTGAGACTGCGTCCTCAAGATACAGGTTAACAATCCCCGTATGGAGCCGTTC
                      CGTCCCGCCCGTCTGAGATATCCAGGATTCTGGGACTCTGACGCAGGAGTTCTATGTCCAATTGTTAGGGGCATACCTCGGCAAG

            340       TTAGCATGACCCGACAGGTGGGCTTGGCTCGCGTAAGTTGAGTGTTGCAGATACCTGCTGCTGCGCGGTCTAGGGGGAATCGCCG
                      AATCGTACTGGGCTGTCCACCCGAACCGAGCGCATTCAACTCACAACGTCTATGGACGACGACGCGCCAGATCCCCCTTAGCGGC

            425       ATTTTGACGTAGGATCGGTAATGGGCAGTAAACCCGCAACTATTTTCAGCACCAGATGCAAGTTTCCCTAGAAAGCGTCATGGTT
                      TAAAACTGCATCCTAGCCATTACCCGTCATTTGGGCGTTGATAAAAGTCGTGGTCTACGTTCAAAGGGATCTTTCGCAGTACCAA

            510       TGCAATCTCCTTAGGTCACAGCAAACATAGCAGCCCCTGT
                      ACGTTAGAGGAATCCAGTGTCGTTTGTATCGTCGGGGACA

        :param indent: indent between left column and base pairs view windo
        :type indent: int
        :param width: width of the view window
        :type width: int
        :param spacer: string to intersperse between sequence rows (default is newline)
        :type spacer: basestring
        :param complement: whether to include the complementary strand in the view
        :type complement: bool
        :param features: whether to include annotations/features in the view instance
        :type features: bool
        :return: the viewer object
        :rtype: SequenceViewer
        """
        if indent is None:
            indent = 10

        if width is None:
            width = 85

        seqs = [self]
        colors = self.DEFAULTS.FOREGROUND_COLORS[0]
        if complement:
            seqs.append(self.copy().complement())
            colors = self.DEFAULTS.FOREGROUND_COLORS
        if spacer is None:
            if complement:
                spacer = "\n"
            else:
                spacer = ""
        viewer = SequenceViewer(
            seqs,
            name=self.name,
            description=self.description,
            indent=indent,
            width=width,
            spacer=spacer,
            foreground_colors=colors,
            **kwargs
        )
        viewer.metadata.update(self.metadata)
        if features:
            self._apply_features_to_view(self, viewer)
        return viewer

    def upper(self):
        copied = self
        for n in copied:
            n.data = n.data.upper()
        return copied

    def lower(self):
        copied = self
        for n in copied:
            n.data = n.data.lower()
        return copied

[docs]    def print(
        self,
        indent=None,
        width=None,
        spacer=None,
        complement=False,
        features=True,
        **kwargs
    ):
        """Create and print a :class:`SequenceViewer` instance from this
        sequence. Printing the view object with annotations and complement will
        produce an output similar to the following:

        .. code::


            > "Unnamed" (550bp)


                                                                        ----------------GFP----------------
                                                                        |<START
                                                                        ----      -----------RFP-----------
            0         CCCAGGACTAGCGACTTTCCGTAACGCGACCTAACACCGGCCGTTCCTTCGAGCCAGGCAAATGTTACGTCACTTCCTTAGATTT
                      GGGTCCTGATCGCTGAAAGGCATTGCGCTGGATTGTGGCCGGCAAGGAAGCTCGGTCCGTTTACAATGCAGTGAAGGAATCTAAA

                      ------GFP------
                      -----------------------------------------RFP-----------------------------------------
            85        TGAACAGCGCCGTACCCCGATATGATATTTAGATATATAGCAGTTACACTTGGGGTTGCTATGGACTTAGATCTGCTGTATGTTT
                      ACTTGTCGCGGCATGGGGCTATACTATAAATCTATATATCGTCAATGTGAACCCCAACGATACCTGAATCTAGACGACATACAAA

                      -----------------------------------------RFP-----------------------------------------
            170       TCTTACCTTCCGCATCAGGGGACAATTCGCCAGTAGAATTCAGTTTGTGCGTGAGAACATAAGATTGAATCCCACGCAGGCACAA
                      AGAATGGAAGGCGTAGTCCCCTGTTAAGCGGTCATCTTAAGTCAAACACGCACTCTTGTATTCTAACTTAGGGTGCGTCCGTGTT

                      ---------------------RFP----------------------
            255       GCAGGGCGGGCAGACTCTATAGGTCCTAAGACCCTGAGACTGCGTCCTCAAGATACAGGTTAACAATCCCCGTATGGAGCCGTTC
                      CGTCCCGCCCGTCTGAGATATCCAGGATTCTGGGACTCTGACGCAGGAGTTCTATGTCCAATTGTTAGGGGCATACCTCGGCAAG

            340       TTAGCATGACCCGACAGGTGGGCTTGGCTCGCGTAAGTTGAGTGTTGCAGATACCTGCTGCTGCGCGGTCTAGGGGGAATCGCCG
                      AATCGTACTGGGCTGTCCACCCGAACCGAGCGCATTCAACTCACAACGTCTATGGACGACGACGCGCCAGATCCCCCTTAGCGGC

            425       ATTTTGACGTAGGATCGGTAATGGGCAGTAAACCCGCAACTATTTTCAGCACCAGATGCAAGTTTCCCTAGAAAGCGTCATGGTT
                      TAAAACTGCATCCTAGCCATTACCCGTCATTTGGGCGTTGATAAAAGTCGTGGTCTACGTTCAAAGGGATCTTTCGCAGTACCAA

            510       TGCAATCTCCTTAGGTCACAGCAAACATAGCAGCCCCTGT
                      ACGTTAGAGGAATCCAGTGTCGTTTGTATCGTCGGGGACA

        :param indent: indent between left column and base pairs view windo
        :type indent: int
        :param width: width of the view window
        :type width: int
        :param spacer: string to intersperse between sequence rows (default is newline)
        :type spacer: basestring
        :param complement: whether to include the complementary strand in the view
        :type complement: bool
        :param include_annotations: whether to include annotations/features in the view instance
        :type include_annotations: bool
        :return: the viewer object
        :rtype: SequenceViewer
        """
        self.view(
            indent=indent,
            width=width,
            spacer=spacer,
            complement=complement,
            features=features,
            **kwargs
        ).print()

[docs]    def tm(self):
        """Calculate the Tm of this sequence using primer3 defaults.

        :return: the tm of the sequence
        :rtype: float
        """
        return primer3.calcTm(str(self).upper())

[docs]    def json(self):
        """Print sequence to a json dictionary."""
        annotations = []
        for feature, positions in self.features.items():
            for start, end in positions:
                annotations.append(
                    {
                        "start": start,
                        "end": end + 1,
                        "name": feature.name,
                        "color": feature.color,
                        "type": feature.type,
                        "strand": feature.strand,
                    }
                )

        return {
            "name": self.name,
            "isCircular": self.cyclic,
            "length": len(self),
            "bases": str(self),
            "annotations": annotations,
        }

[docs]    @classmethod
    def load(cls, data):
        """Load a sequence from a json formatted dictionary."""
        sequence = cls(data["bases"], name=data["name"])
        sequence.cyclic = data["isCircular"]
        sequence.name = data["name"]
        sequence.description = data.get("description", None)
        for a in data["annotations"]:
            sequence.annotate(
                a["start"], a["end"] - 1, a["name"], a["type"], a["color"]
            )
        return sequence

    def _collect_cut_sites(self, enzyme_site, cut1=None, cut2=None):
        if hasattr(enzyme_site, "charac"):
            cut1 = enzyme_site.charac[0]
            cut2 = enzyme_site.charac[1]
            enzyme_site = enzyme_site.charac[4]

        if isinstance(enzyme_site, str):
            enzyme_site = Sequence(enzyme_site)
        cut_sites = []
        for match in self.find_iter(enzyme_site):
            cut_sites.append(match.span[0] + cut1)
            cut_sites.append(match.span[1] + cut2 + 1)
        return cut_sites

[docs]    def digest(self, enzymes, as_names=False):
        """Supply either a Bio.RestrictionSite or a tuple of (seq, cut1, cut2)

        e.g. ('GTTTAAAC', 4, -4)

        :param enzymes: either a Bio.RestrictionSite or a tuple of (seq, cut1, cut2)
        :type enzymes: list (of tuple|Bio.RestrictionSite)
        :return: list of sequences
        :rtype: list
        """
        cut_sites = []

        if not isinstance(enzymes, list):
            enzymes = [enzymes]
        if as_names:
            enzymes = [getattr(Restriction, name) for name in enzymes]
        for enzyme in enzymes:
            if isinstance(enzyme, tuple):
                cut_sites += self._collect_cut_sites(*enzyme)
            else:
                cut_sites += self._collect_cut_sites(enzyme)
        return self.cut(cut_sites)

    def __repr__(self) -> str:
        max_width = 30
        replace = "..."
        display = int((max_width - len(replace)) / 2.0)
        s = str(self)
        if len(s) > display * 2:
            # diff = display*2 - len(s)
            s = s[:display] + "..." + s[-display:]
        return "Sequence('{}')".format(s)


Sequence.IO = IOInterface(Sequence)
Sequence.Align = AlignInterface(Sequence)
jdna

Table Of Contents

Source code for jdna.sequence