"""Represent linear or circularized nucleotides."""
import itertools
import re
from collections import defaultdict
from copy import copy
from enum import IntFlag
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import Sequence as TypingSequence
from typing import Tuple
from typing import Union
import primer3
from Bio import Restriction
from jdna.align import AlignInterface
from jdna.alphabet import AmbiguousDNA
from jdna.alphabet import UnambiguousDNA
from jdna.format import format_sequence
from jdna.io import IOInterface
from jdna.linked_list import DoubleLinkedList
from jdna.linked_list import LinkedListMatch
from jdna.linked_list import Node
from jdna.utils import random_color
from jdna.viewer import SequenceViewer
from jdna.viewer import StringColumn
from jdna.viewer import ViewerAnnotationFlag
# from jdna.align import Align
[docs]class SequenceFlags(IntFlag):
"""Constants/Flags for sequences."""
FORWARD = 1
REVERSE = -1
TOP = 1
BOTTOM = -1
[docs]class Feature:
"""An annotation for a sequence."""
def __init__(self, name, type=None, strand=None, color=None):
self.name = name
if type is None:
type = "misc"
self.type = type
if strand is None:
strand = SequenceFlags.FORWARD
self.strand = strand
if color is None:
color = random_color()
self.color = color
# self._nodes = set()
def reverse(self) -> "Sequence":
self.strand = -1 * self.strand
return self
def __str__(self) -> str:
return "<Feature name='{name}' type='{tp}' color='{color}'".format(
name=self.name, tp=self.type, color=self.color
)
def __repr__(self) -> str:
return str(self)
def __copy__(self) -> "Sequence":
return self.__class__(self.name, self.type, self.strand, self.color)
# @property
# def nodes(self):
# return self._nodes
#
# def segments(self):
# return Sequence.segments(self.nodes)
# visited = set()
# pairs = set()
# stop = lambda x: x not in self._nodes
# for n in self._nodes:
# if n not in visited:
# tail = n
# for tail in n.fwd(stop_criteria=stop):
# visited.add(tail)
# head = n
# for head in n.rev(stop_criteria=stop):
# visited.add(head)
# pairs.add((head, tail))
# return pairs
def is_multipart(self) -> bool:
if len(self.segments) > 1:
return True
return False
def _bind(self, nodes):
pass
# for n in nodes:
# self._nodes.add(n)
# def _unbind(self, nodes):
# for n in nodes:
# if n in self._nodes:
# self._nodes.remove(n)
[docs]class BindPos(LinkedListMatch):
def __init__(
self,
template_bounds: Tuple["Nucleotide", "Nucleotide"],
query_bounds: Tuple["Nucleotide", "Nucleotide"],
template: "Sequence",
query: "Sequence",
direction: int,
strand=SequenceFlags.TOP,
):
"""Makes a sequence binding position.
:param template_bounds_list: list of 2 len tuples containing starts and ends from a template
:type template_bounds_list: template DoubleLinkedList
:param query_bounds_list: list of 2 len tuples containing starts and ends from a query
:type query_bounds_list: query DoubleLinkedList
:param template: the template
:type template: DoubleLinkedList
:param query: the query
:type query: DoubleLinkedList
:param direction: If SequenceFlags.FORWARD, the binding position indicates binding forward, to the bottom strand
of a dsDNA sequence.
:type direction: int
:param strand: If SequenceFlags.BOTTOM, then the query is assumed to be the reverse_complement of the original
query
:type strand: int
"""
super().__init__(template_bounds, query_bounds, template, query)
self.direction = direction
self.strand = strand
if self.direction == SequenceFlags.REVERSE:
self.anneal = query.copy_slice(*self.query_bounds[::-1])
self.five_prime_overhang = query.new_slice(None, self.query_end.prev())
self.three_prime_overhang = query.new_slice(self.query_start.next(), None)
else:
self.anneal = query.copy_slice(*self.query_bounds)
self.five_prime_overhang = query.new_slice(None, self.query_start.prev())
self.three_prime_overhang = query.new_slice(self.query_end.next(), None)
# self.anneal = self.primer[query_span[0]:query_span[1]+1]
# self.five_prime_overhang = self.primer[:query_span[0]]
# self.three_prime_overhang = self.primer[query_span[1]+1:]
# def innitialize(self):
# if self.direction == SequenceFlags.REVERSE:
# if self.anneal:
# self.anneal.reverse_complement()
# if self.five_prime_overhang:
# self.five_prime_overhang.reverse_complement()
# if self.three_prime_overhang:
# self.three_prime_overhang.reverse_complement()
# length = len(self.three_prime_overhang)
# else:
# length = 0
# self.three_prime_overhang, self.five_prime_overhang = self.five_prime_overhang, self.three_prime_overhang
# self.query_span = (self.query_span[0] + length, self.query_span[1] + length)
[docs] @classmethod
def from_match(
cls, linked_list_match, template, query, direction, strand=SequenceFlags.TOP
):
"""Return a binding pos.
:param linked_list_match: the linked list match
:type linked_list_match: LinkedListMatch
:return:
:rtype:
"""
return cls(
linked_list_match.template_bounds,
linked_list_match.query_bounds,
template,
query,
direction,
strand=strand,
)
@property
def template_anneal(self):
if self.strand == SequenceFlags.FORWARD:
return Sequence.new_slice(self.start, self.end)
else:
return Sequence.new_slice(self.start, self.end)
@property
def query_anneal(self):
if self.direction == SequenceFlags.FORWARD:
return Sequence.new_slice(self.query_start, self.query_end)
else:
return Sequence.new_slice(
self.query_end, self.query_start
).reverse_complement()
def __repr__(self):
return "<{cls} span={span} direction='{direction}' strand='{strand}' 5'='{five}' anneal='{anneal}' 3'='{three}'>".format(
cls=self.__class__.__name__,
span=self.span,
direction=self.direction,
strand=self.strand,
five=self.five_prime_overhang.__repr__(),
three=self.three_prime_overhang.__repr__(),
anneal=self.anneal.__repr__(),
)
[docs]class Nucleotide(Node):
"""Represents a biological nucleotide.
Serves a :class:`Node` in teh :class:`Sequence` object.
"""
__slots__ = ["data", "__next", "__prev", "_features", "alphabet"]
def __init__(self, base, alphabet=AmbiguousDNA):
"""Nucleotide constructor.
:param base: base as a single character string
:type base: basestring
"""
super().__init__(base)
self._features = set()
self.alphabet = alphabet
[docs] @classmethod
def random(cls):
"""Generate a random sequence."""
return cls(UnambiguousDNA.random())
@property
def base(self):
return self.data
[docs] def equivalent(self, other) -> bool:
return self.alphabet.compare(self.base, other.base)
def complementary(self, other) -> bool:
return self.base.upper() == AmbiguousDNA[other.base].upper()
def to_complement(self):
self.data = AmbiguousDNA[self.data]
[docs] def set_next(self, nucleotide):
self.cut_next()
super().set_next(nucleotide)
Nucleotide.fuse_features(self, nucleotide)
[docs] def set_prev(self, nucleotide):
self.cut_prev()
super().set_prev(nucleotide)
Nucleotide.fuse_features(nucleotide, self)
[docs] def cut_prev(self):
return self._cut(cut_prev=True)
[docs] def cut_next(self):
return self._cut(cut_prev=False)
def _cut(self, cut_prev=True):
for f in self.features:
self.split_features(split_prev=cut_prev)
if cut_prev:
nxt = super().cut_prev()
else:
nxt = super().cut_next()
return nxt
@property
def features(self):
return self._features
def add_feature(self, feature):
self.features.add(feature)
return feature
def remove_feature(self, feature):
self.features.remove(feature)
def feature_fwd(self, feature):
def stop(x):
return feature not in x.features
return self._propogate(lambda x: x.next(), stop_criteria=stop)
def feature_rev(self, feature):
def stop(x):
return feature not in x.features
return self._propogate(lambda x: x.prev(), stop_criteria=stop)
def replace_feature(self, old_feature, new_feature):
self.features[new_feature] = self.features[old_feature]
self.remove_feature(old_feature)
def copy_features_from(self, other):
for f in other.features:
if f not in self.features:
self.add_feature(f)
self._remove_overlapping_features()
def get_feature_span(self, feature):
start = self.feature_rev(feature)[-1]
end = self.feature_fwd(feature)[-1]
return (start.features[feature], end.features[feature])
# def update_feature_span(self, feature, delta_i):
# start = self.feature_rev(feature)[-1]
# for n in start.feature_fwd(feature):
# n.ffeatures[feature] += delta_i
def _remove_overlapping_features(self):
# type: () -> Nucleotide
feature_pairs = itertools.combinations(list(self.features.keys()), 2)
tobedel = set()
for f1, f2 in feature_pairs:
if f1.name == f2.name:
tobedel.add(f2)
for tob in tobedel:
self.remove_feature(tob)
@staticmethod
def _default_fuse_condition(f1, f2):
return f1.name == f2.name
@classmethod
def fuse_features(cls, n1, n2, fuse_condition=None):
if n1 is None or n2 is None:
return
if fuse_condition is None:
fuse_condition = cls._default_fuse_condition
if not (n1.next() is n2 and n2.prev() is n1):
raise Exception("Cannot fuse non-consecutive features")
for f1 in set(n1.features):
for f2 in set(n2.features):
if f1 is not f2 and fuse_condition(f1, f2):
for n in n2.feature_fwd(f2):
n.add_feature(f1)
n.remove_feature(f2)
#
# @staticmethod
# def fuse_features(n1, n2):
# if n1 is None:
# return
# if n2 is None:
# return
#
# delset = set()
#
# for f1 in n1.features:
# for f2 in n2.features:
# f1_pos = n1.features[f1]
# f2_pos = n2.features[f2]
# f1_copy = copy(f1)
# # same name & consecutive position
# if f1 is f2:
# continue
# if f1.name == f2.name and f1_pos + 1 == f2_pos:
# delset.add((f1, f2, f1_copy))
# for f1, f2, f1_copy in delset:
# for n in n1.feature_rev(f1):
# try:
# n.replace_feature(f1, f1_copy)
# except KeyError:
# pass
# for n in n2.feature_fwd(f2):
# try:
# n.replace_feature(f2, f1_copy)
# except KeyError:
# pass
def split_features(self, split_prev=True):
x1 = self.prev()
x2 = self
if not split_prev:
# then split_next
x1 = self
x2 = next(self)
# If at the end, no splitting is necessary
if x1 is None or x2 is None:
return
for f in x1.features:
# If this feature spans
if f in x2.features:
# Grab the sequences for the split feature
frag1 = x1.feature_rev(f)
frag2 = x2.feature_fwd(f)
# check if its a cyclic feature
if x2 in frag1:
continue
if x1 in frag2:
continue
# Make two copies of the feature
f1 = copy(f)
f2 = copy(f)
# Swap original feature for copy
for n in frag1:
n.replace_feature(f, f1)
for n in frag2:
n.replace_feature(f, f2)
def _clear_features(self):
self._features = set()
def copy(self):
copied = super().copy()
copied._features = set()
for f in self.features:
copied.add_feature(f)
return copied
[docs]class Sequence(DoubleLinkedList):
"""Represents a biological sequence as a double linked list.
Can be annotated with features.
"""
[docs] class DEFAULTS:
"""Sequence defaults."""
MIN_ANNEAL_BASES = 13
FOREGROUND_COLORS = ["blue", "red"]
BACKGROUND_COLORS = None
ALPHABET = AmbiguousDNA
FORWARD = SequenceFlags.FORWARD
REVERSE = SequenceFlags.REVERSE
TOP = SequenceFlags.TOP
BOTTOM = SequenceFlags.BOTTOM
NODE_CLASS = Nucleotide
counter = itertools.count()
def __init__(
self,
sequence: TypingSequence[Any] = None,
first: Nucleotide = None,
name: str = None,
description: str = "",
metadata: dict = None,
cyclic: bool = False,
alphabet=DEFAULTS.ALPHABET,
):
"""
:param sequence: sequence string
:type sequence: basestring
:param first: optional first Nucleotide to use as the 'head' to this Sequence
:type first: Nucleotide
:param name: optional name of the sequence
:type name: basestring
:param description: optional description of the sequence
:type description: basestring
:param metadata: additional sequence metadata
:type metadata: dict
:param cyclic: whether to make the sequence circular
:type cyclic: bool
:param alphabet: the base pair alphabet of this sequence which used for complementary and comparisons
(default: AmbiguousDNA)
:type alphabet: jdna.alphabet.Alphabet
"""
self.alphabet = alphabet
super().__init__(data=sequence, first=first, cyclic=cyclic)
if name is None:
name = ""
self.name = name
self.description = description
if metadata is None:
metadata = dict()
self.metadata = metadata
self._global_id = next(Sequence.counter)
self._io = self.IO.instance(self)
self._align = self.Align.instance(self)
if cyclic:
self.cyclic = cyclic
def new_node(self, data):
return self.NODE_CLASS(data, alphabet=self.alphabet)
@property
def io(self):
return self._io
@property
def align(self):
return self._align
@property
def global_id(self):
return self._global_id
[docs] @classmethod
def random(cls, length):
"""Generate a random sequence."""
seq = ""
for i in range(length):
seq += UnambiguousDNA.random().upper()
if seq == "":
return cls.empty()
return cls(sequence=seq)
@property
def features_list(self):
"""Returns set of features contained in sequence.
:return: set of features in this sequence
:rtype: set
"""
features_set = set()
for i, n in enumerate(self):
features_set.update(n.features)
return tuple(features_set)
@property
def features(self, with_nodes=False):
"""Return a list of feature positions.
:param with_nodes: if True, will return a tuple composed of a feature to
position dictionary and a feature to
start and end node. If False, will just return a feature to
position dictionary
:type with_nodes: bool
:return: feature positions dictionary OR tuple of feature positions dictionary
and feature node dictionary
:rtype: tuple
"""
index = 0
feature_pos = defaultdict(list)
feature_nodes = defaultdict(list)
length = len(self)
for n in self:
for f in n.features:
if feature_pos[f] and feature_pos[f][-1][-1] + 1 == index:
feature_pos[f][-1][-1] = index
feature_nodes[f][-1][-1] = n
else:
feature_pos[f].append([index, index])
feature_nodes[f].append([n, n])
index += 1
# capture features that span the origin
if self.cyclic:
for k in feature_pos:
positions = feature_pos[k]
nodes = feature_nodes[k]
if len(nodes) > 1:
if positions[0][0] == 0 and positions[-1][-1] == length - 1:
nodes[0][0] = nodes[-1][0]
positions[0][0] = positions[-1][0]
nodes.pop()
positions.pop()
if with_nodes:
return feature_pos, feature_nodes
return feature_pos
def feature_nodes(self):
return self.features(with_nodes=True)[-1]
[docs] def add_feature(self, start, end, feature):
"""Add a feature to the start and end positions (inclusive)
:param start: start
:type start: int
:param end: end (inclusive)
:type end: int
:param feature: the feature to add
:type feature: Feature
:return: the added feature
:rtype: Feature
"""
feature_nts = list(self.inclusive_range(start, end))
if end and feature_nts[-1] is not self[end]:
if not self.cyclic:
raise IndexError(
"Cannot add feature to {} to linear dna with bounds {}".format(
(start, end), (0, len(self))
)
)
else:
raise IndexError("Cannot add feature to {}".format((start, end)))
for n in self.inclusive_range(start, end):
n.add_feature(feature)
return feature
[docs] def add_multipart_feature(self, positions, feature):
"""Add a multi-part feature (i.e. a disjointed feature)
:param positions: list of start and ends as tuples ([(1,100), (110,200)]
:type positions: list
:param feature: the feature to add
:type feature: Feature
:return: the added feature
:rtype: Feature
"""
for i, j in positions:
self.add_feature(i, j, feature)
return feature
# def print_features(self):
# raise NotImplementedError()
[docs] def find_feature_by_name(self, name):
"""Find features by name.
:param name: feature name
:type name: basestring
:return: list of features
:rtype: list
"""
found = []
for feature in self.features:
if feature.name == name:
found.append(feature)
return found
[docs] def annotate(self, start, end, name, feature_type=None, color=None, strand=None):
"""Annotate a regions.
:param start: start
:type start: int
:param end: end (inclusive)
:type end: end
:param name: feature name
:type name: basestring
:param feature_type: feature type (default=misc)
:type feature_type: basestring
:param color: optional feature color
:type color: basestring
:return: new feature
:rtype: Feature
"""
return self.add_feature(
start, end, Feature(name, feature_type, strand=strand, color=color)
)
def reverse(self):
features_set = set()
if self.is_empty():
return self
nodes = self.nodes
for s in nodes:
s.swap()
features_set.update(s.features)
for f in features_set:
f.reverse()
self.head = nodes[-1]
return self
[docs] def complement(self):
"""Complement the sequence in place."""
if self.is_empty():
return self
curr = self.head
visited = set()
while curr and curr not in visited:
visited.add(curr)
curr.to_complement()
curr = next(curr)
return self
[docs] def c(self):
"""Complement the sequence in place."""
return self.complement()
[docs] def reverse_complement(self):
"""Reverse complement the sequence in place."""
self.reverse()
self.complement()
return self
[docs] def rc(self):
"""Reverse complement the sequence in place."""
return self.reverse_complement()
def cut(self, i, cut_prev=True):
fragments = super().cut(i, cut_prev)
fragments = [Sequence(first=f.head) for f in fragments]
return fragments
def clear_features(self):
for n in self:
n._clear_features()
def __copy__(self):
copied = super().__copy__()
copied.name = self.name
copied._global_id = next(self.counter)
copied.clear_features()
feature_positions = self.features
for feature, positions in feature_positions.items():
copied.add_multipart_feature(positions, copy(feature))
return copied
# def anneal_to_bottom_strand(self, other, min_bases=10):
# for match in self.find_iter(other,
# min_query_length=min_bases,
# direction=self.Direction.REVERSE, ):
# yield match
#
# def anneal_to_top_strand(self, other, min_bases=10):
# for match in self.find_iter(other,
# min_query_length=min_bases,
# protocol=lambda x, y: x.complementary(y)):
# yield match
[docs] def anneal_forward(self, other, min_bases=DEFAULTS.MIN_ANNEAL_BASES, depth=None):
"""Anneal a sequence in the forward direction."""
for match in self.find_iter(
other,
min_query_length=min_bases,
direction=self.Direction.REVERSE,
depth=depth,
):
yield BindPos.from_match(
match, self, other, direction=self.Direction.FORWARD
)
[docs] def anneal_reverse(self, other, min_bases=DEFAULTS.MIN_ANNEAL_BASES, depth=None):
"""Anneal a sequence in the reverse direction."""
for match in self.find_iter(
other,
min_query_length=min_bases,
direction=(1, -1),
protocol=lambda x, y: x.complementary(y),
depth=depth,
):
yield BindPos.from_match(
match, self, other, direction=self.Direction.REVERSE
)
[docs] def anneal(self, ssDNA, min_bases=DEFAULTS.MIN_ANNEAL_BASES, depth=None):
"""Simulate annealing a single stranded piece of DNA to a
double_stranded template."""
for match in self.anneal_forward(ssDNA, min_bases=min_bases, depth=depth):
yield match
for match in self.anneal_reverse(ssDNA, min_bases=min_bases, depth=depth):
yield match
[docs] def dsanneal(self, dsDNA, min_bases=DEFAULTS.MIN_ANNEAL_BASES, depth=None):
"""Simulate annealing a double stranded piece of DNA to a
double_stranded template."""
for binding in self.anneal(dsDNA, min_bases=min_bases, depth=depth):
yield binding
for binding in self.anneal(
dsDNA.copy().reverse_complement(), min_bases=min_bases, depth=depth
):
binding.strand = SequenceFlags.BOTTOM
yield binding
def format(self, width=75, spacer=""):
return format_sequence(str(self), width=width, spacer=spacer)
@classmethod
def _apply_features_to_view(cls, sequence, view):
for feature, positions in sequence.features.items():
for pos in positions:
direction = None
if feature.strand == SequenceFlags.FORWARD:
direction = ViewerAnnotationFlag.FORWARD
elif feature.strand == SequenceFlags.REVERSE:
direction = ViewerAnnotationFlag.REVERSE
view.annotate(
pos[0],
pos[1],
label=feature.name,
fill=direction,
background=feature.color,
)
def view_bindings(self, bindings, view=None):
if view is None:
view = self.view(complement=True)
for b in bindings:
anneal = b.anneal
primer_sequence = b.five_prime_overhang + anneal + b.three_prime_overhang
annotation = StringColumn(
[
str(primer_sequence),
" " * len(b.five_prime_overhang)
+ "|" * len(anneal)
+ " " * len(b.three_prime_overhang),
]
)
if b.direction == Sequence.FORWARD:
view.annotate(b.span[0], b.span[1], annotation)
if b.direction == Sequence.REVERSE:
view.annotate(b.span[0], b.span[1], annotation.flip()[::-1], top=False)
return view
[docs] def view(
self,
indent=10,
width=85,
spacer=None,
complement=False,
features=True,
**kwargs
):
"""Create a :class:`SequenceViewer` instance from this sequence.
Printing the view object with annotations and complement will produce
an output similar to the following:
.. code::
> "Unnamed" (550bp)
----------------GFP----------------
|<START
---- -----------RFP-----------
0 CCCAGGACTAGCGACTTTCCGTAACGCGACCTAACACCGGCCGTTCCTTCGAGCCAGGCAAATGTTACGTCACTTCCTTAGATTT
GGGTCCTGATCGCTGAAAGGCATTGCGCTGGATTGTGGCCGGCAAGGAAGCTCGGTCCGTTTACAATGCAGTGAAGGAATCTAAA
------GFP------
-----------------------------------------RFP-----------------------------------------
85 TGAACAGCGCCGTACCCCGATATGATATTTAGATATATAGCAGTTACACTTGGGGTTGCTATGGACTTAGATCTGCTGTATGTTT
ACTTGTCGCGGCATGGGGCTATACTATAAATCTATATATCGTCAATGTGAACCCCAACGATACCTGAATCTAGACGACATACAAA
-----------------------------------------RFP-----------------------------------------
170 TCTTACCTTCCGCATCAGGGGACAATTCGCCAGTAGAATTCAGTTTGTGCGTGAGAACATAAGATTGAATCCCACGCAGGCACAA
AGAATGGAAGGCGTAGTCCCCTGTTAAGCGGTCATCTTAAGTCAAACACGCACTCTTGTATTCTAACTTAGGGTGCGTCCGTGTT
---------------------RFP----------------------
255 GCAGGGCGGGCAGACTCTATAGGTCCTAAGACCCTGAGACTGCGTCCTCAAGATACAGGTTAACAATCCCCGTATGGAGCCGTTC
CGTCCCGCCCGTCTGAGATATCCAGGATTCTGGGACTCTGACGCAGGAGTTCTATGTCCAATTGTTAGGGGCATACCTCGGCAAG
340 TTAGCATGACCCGACAGGTGGGCTTGGCTCGCGTAAGTTGAGTGTTGCAGATACCTGCTGCTGCGCGGTCTAGGGGGAATCGCCG
AATCGTACTGGGCTGTCCACCCGAACCGAGCGCATTCAACTCACAACGTCTATGGACGACGACGCGCCAGATCCCCCTTAGCGGC
425 ATTTTGACGTAGGATCGGTAATGGGCAGTAAACCCGCAACTATTTTCAGCACCAGATGCAAGTTTCCCTAGAAAGCGTCATGGTT
TAAAACTGCATCCTAGCCATTACCCGTCATTTGGGCGTTGATAAAAGTCGTGGTCTACGTTCAAAGGGATCTTTCGCAGTACCAA
510 TGCAATCTCCTTAGGTCACAGCAAACATAGCAGCCCCTGT
ACGTTAGAGGAATCCAGTGTCGTTTGTATCGTCGGGGACA
:param indent: indent between left column and base pairs view windo
:type indent: int
:param width: width of the view window
:type width: int
:param spacer: string to intersperse between sequence rows (default is newline)
:type spacer: basestring
:param complement: whether to include the complementary strand in the view
:type complement: bool
:param features: whether to include annotations/features in the view instance
:type features: bool
:return: the viewer object
:rtype: SequenceViewer
"""
if indent is None:
indent = 10
if width is None:
width = 85
seqs = [self]
colors = self.DEFAULTS.FOREGROUND_COLORS[0]
if complement:
seqs.append(self.copy().complement())
colors = self.DEFAULTS.FOREGROUND_COLORS
if spacer is None:
if complement:
spacer = "\n"
else:
spacer = ""
viewer = SequenceViewer(
seqs,
name=self.name,
description=self.description,
indent=indent,
width=width,
spacer=spacer,
foreground_colors=colors,
**kwargs
)
viewer.metadata.update(self.metadata)
if features:
self._apply_features_to_view(self, viewer)
return viewer
def upper(self):
copied = self
for n in copied:
n.data = n.data.upper()
return copied
def lower(self):
copied = self
for n in copied:
n.data = n.data.lower()
return copied
[docs] def print(
self,
indent=None,
width=None,
spacer=None,
complement=False,
features=True,
**kwargs
):
"""Create and print a :class:`SequenceViewer` instance from this
sequence. Printing the view object with annotations and complement will
produce an output similar to the following:
.. code::
> "Unnamed" (550bp)
----------------GFP----------------
|<START
---- -----------RFP-----------
0 CCCAGGACTAGCGACTTTCCGTAACGCGACCTAACACCGGCCGTTCCTTCGAGCCAGGCAAATGTTACGTCACTTCCTTAGATTT
GGGTCCTGATCGCTGAAAGGCATTGCGCTGGATTGTGGCCGGCAAGGAAGCTCGGTCCGTTTACAATGCAGTGAAGGAATCTAAA
------GFP------
-----------------------------------------RFP-----------------------------------------
85 TGAACAGCGCCGTACCCCGATATGATATTTAGATATATAGCAGTTACACTTGGGGTTGCTATGGACTTAGATCTGCTGTATGTTT
ACTTGTCGCGGCATGGGGCTATACTATAAATCTATATATCGTCAATGTGAACCCCAACGATACCTGAATCTAGACGACATACAAA
-----------------------------------------RFP-----------------------------------------
170 TCTTACCTTCCGCATCAGGGGACAATTCGCCAGTAGAATTCAGTTTGTGCGTGAGAACATAAGATTGAATCCCACGCAGGCACAA
AGAATGGAAGGCGTAGTCCCCTGTTAAGCGGTCATCTTAAGTCAAACACGCACTCTTGTATTCTAACTTAGGGTGCGTCCGTGTT
---------------------RFP----------------------
255 GCAGGGCGGGCAGACTCTATAGGTCCTAAGACCCTGAGACTGCGTCCTCAAGATACAGGTTAACAATCCCCGTATGGAGCCGTTC
CGTCCCGCCCGTCTGAGATATCCAGGATTCTGGGACTCTGACGCAGGAGTTCTATGTCCAATTGTTAGGGGCATACCTCGGCAAG
340 TTAGCATGACCCGACAGGTGGGCTTGGCTCGCGTAAGTTGAGTGTTGCAGATACCTGCTGCTGCGCGGTCTAGGGGGAATCGCCG
AATCGTACTGGGCTGTCCACCCGAACCGAGCGCATTCAACTCACAACGTCTATGGACGACGACGCGCCAGATCCCCCTTAGCGGC
425 ATTTTGACGTAGGATCGGTAATGGGCAGTAAACCCGCAACTATTTTCAGCACCAGATGCAAGTTTCCCTAGAAAGCGTCATGGTT
TAAAACTGCATCCTAGCCATTACCCGTCATTTGGGCGTTGATAAAAGTCGTGGTCTACGTTCAAAGGGATCTTTCGCAGTACCAA
510 TGCAATCTCCTTAGGTCACAGCAAACATAGCAGCCCCTGT
ACGTTAGAGGAATCCAGTGTCGTTTGTATCGTCGGGGACA
:param indent: indent between left column and base pairs view windo
:type indent: int
:param width: width of the view window
:type width: int
:param spacer: string to intersperse between sequence rows (default is newline)
:type spacer: basestring
:param complement: whether to include the complementary strand in the view
:type complement: bool
:param include_annotations: whether to include annotations/features in the view instance
:type include_annotations: bool
:return: the viewer object
:rtype: SequenceViewer
"""
self.view(
indent=indent,
width=width,
spacer=spacer,
complement=complement,
features=features,
**kwargs
).print()
[docs] def tm(self):
"""Calculate the Tm of this sequence using primer3 defaults.
:return: the tm of the sequence
:rtype: float
"""
return primer3.calcTm(str(self).upper())
[docs] def json(self):
"""Print sequence to a json dictionary."""
annotations = []
for feature, positions in self.features.items():
for start, end in positions:
annotations.append(
{
"start": start,
"end": end + 1,
"name": feature.name,
"color": feature.color,
"type": feature.type,
"strand": feature.strand,
}
)
return {
"name": self.name,
"isCircular": self.cyclic,
"length": len(self),
"bases": str(self),
"annotations": annotations,
}
[docs] @classmethod
def load(cls, data):
"""Load a sequence from a json formatted dictionary."""
sequence = cls(data["bases"], name=data["name"])
sequence.cyclic = data["isCircular"]
sequence.name = data["name"]
sequence.description = data.get("description", None)
for a in data["annotations"]:
sequence.annotate(
a["start"], a["end"] - 1, a["name"], a["type"], a["color"]
)
return sequence
def _collect_cut_sites(self, enzyme_site, cut1=None, cut2=None):
if hasattr(enzyme_site, "charac"):
cut1 = enzyme_site.charac[0]
cut2 = enzyme_site.charac[1]
enzyme_site = enzyme_site.charac[4]
if isinstance(enzyme_site, str):
enzyme_site = Sequence(enzyme_site)
cut_sites = []
for match in self.find_iter(enzyme_site):
cut_sites.append(match.span[0] + cut1)
cut_sites.append(match.span[1] + cut2 + 1)
return cut_sites
[docs] def digest(self, enzymes, as_names=False):
"""Supply either a Bio.RestrictionSite or a tuple of (seq, cut1, cut2)
e.g. ('GTTTAAAC', 4, -4)
:param enzymes: either a Bio.RestrictionSite or a tuple of (seq, cut1, cut2)
:type enzymes: list (of tuple|Bio.RestrictionSite)
:return: list of sequences
:rtype: list
"""
cut_sites = []
if not isinstance(enzymes, list):
enzymes = [enzymes]
if as_names:
enzymes = [getattr(Restriction, name) for name in enzymes]
for enzyme in enzymes:
if isinstance(enzyme, tuple):
cut_sites += self._collect_cut_sites(*enzyme)
else:
cut_sites += self._collect_cut_sites(enzyme)
return self.cut(cut_sites)
def __repr__(self) -> str:
max_width = 30
replace = "..."
display = int((max_width - len(replace)) / 2.0)
s = str(self)
if len(s) > display * 2:
# diff = display*2 - len(s)
s = s[:display] + "..." + s[-display:]
return "Sequence('{}')".format(s)
Sequence.IO = IOInterface(Sequence)
Sequence.Align = AlignInterface(Sequence)