Source code for jdna.alphabet
"""
class representing base pairs and their complements
"""
import random
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq
import re
[docs]class Alphabet(object):
"""
A dictionary class that retrieves complementary base pairs
"""
__slots__ = ["_chr", "_comp", "__complementary", "__ambiguous"]
def __init__(self, characters, complementary_characters, ambiguous_characters=None):
self.__complementary = dict(
zip(
characters.lower() + characters.upper(),
complementary_characters.lower() + complementary_characters.upper(),
)
)
if ambiguous_characters is None:
ambiguous_characters = {}
self.__ambiguous = ambiguous_characters
@classmethod
def from_biopython_alphabet(
cls, biopython_alphabet: IUPAC.Alphabet, ambiguous_characters=None
):
seq = Seq(biopython_alphabet.letters)
seq_letters = str(seq)
c_seq_letters = str(seq.complement())
return cls(seq_letters, c_seq_letters, ambiguous_characters)
def characters(self):
return self.__complementary.keys()
[docs] def random(self):
"""
Return random character
"""
return random.choice(list(self.__complementary.keys()))
[docs] def compare(self, s1, s2, ignore_case=True):
"""
Compare two sequences. If the second sequence has ambiguous bases, convert
second sequence to a regex to compare with the first sequence.
:param s1:
:type s1:
:param s2:
:type s2:
:return:
:rtype:
"""
pattern = "".join(self.__ambiguous.get(b, b) for b in s2)
if ignore_case:
match = re.match(pattern, s1, re.IGNORECASE)
else:
match = re.match(pattern, s1)
if match:
return True
return False
def complement(self, basestring):
return "".join(self[x] for x in basestring)
def reverse_complement(self, basestring):
return self.complement(basestring)[::-1]
def __getitem__(self, item):
return self.__complementary[item]
def __contains__(self, item):
return item in self.__complementary
# aliases
rc = reverse_complement
c = complement
_iupacdict = {
"M": "[AC]",
"R": "[AG]",
"W": "[AT]",
"S": "[CG]",
"Y": "[CT]",
"K": "[GT]",
"V": "[ACG]",
"H": "[ACT]",
"D": "[AGT]",
"B": "[CGT]",
"X": "[ACGT]",
"N": "[ACGT]",
}
iupacdict = {}
for k, v in _iupacdict.items():
iupacdict[k.lower()] = v.lower()
iupacdict[k.upper()] = v.upper()
AmbiguousDNA = Alphabet.from_biopython_alphabet(IUPAC.ambiguous_dna, iupacdict)
UnambiguousDNA = Alphabet.from_biopython_alphabet(IUPAC.unambiguous_dna, {})