Source code for dasi.models.molecule

"""classes representing *molecules* and types of *molecules*"""
from copy import copy
from typing import Dict
from typing import List
from typing import Tuple
from typing import Union

from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from numpy import inf

from dasi.constants import Constants
from dasi.utils import Region


# TODO: refactor Molecule and MoleculeTypes, as this is confusing
[docs]class MoleculeType: """Molecule metatype.""" types = {}
[docs] def __init__( self, name: str, design: Union[None, Tuple[bool, bool]], use_direct: bool, cost: float, efficiency=1.0, synthesize=False, min_size: int = None, max_size: int = None, description: str = "", ): """Initializes a new molecule.""" self.name = name self.cost = cost self.use_direct = use_direct self.synthesize = synthesize if name in self.types: raise ValueError("Cannot re-define molecule type '{}'".format(name)) self.types[name] = self self.efficiency = efficiency self.design = design self.int_or_ext = None self.min_size = min_size self.max_size = max_size self.description = description
def __repr__(self): return "<{} name='{}'>".format(self.__class__.__name__, self.name)
[docs]class InternalType(MoleculeType): """Molecule representing physical molecule that can be provided by the lab.""" def __init__( self, name: str, design: Tuple[bool, bool], use_direct: bool, cost: float, efficiency=1.0, synthesize: bool = False, min_size: int = None, max_size: int = None, description: str = "", ): super().__init__( name, use_direct=use_direct, design=design, cost=cost, efficiency=efficiency, synthesize=synthesize, min_size=min_size, max_size=max_size, description=description, ) self.design = design self.int_or_ext = "internal"
[docs]class ExternalType(MoleculeType): """Molecule to be designed.""" def __init__( self, name: str, use_direct: bool, cost: float, efficiency=1.0, synthesize: bool = False, min_size: int = None, max_size: int = None, description: str = "", ): super().__init__( name, use_direct=use_direct, design=None, cost=cost, efficiency=efficiency, synthesize=synthesize, min_size=min_size, max_size=max_size, description=description, ) self.int_or_ext = "external" def __call__(self, design): copied = copy(self) copied.design = design return copied
# TODO: move this to the parameters # TODO: efficiencies and base costs could be parameters # TODO: min_size and max_size could be parameters InternalType( name=Constants.FRAGMENT, design=(False, False), use_direct=True, cost=0.0, efficiency=0.98, description=( "A FRAGMENT is a existing fragment in the lab than can be used " "'as is' and used directly in a reaction." ), ) InternalType( name=Constants.PCR_PRODUCT, design=(True, True), use_direct=False, cost=10.0, efficiency=0.95, min_size=100, description=( "A PCR_PRODUCT is a PCR of DNA generated by designing (and synthesizing) " "two new primers and using an existing template to produce the product " "in a PCR reaction. The cost represents the cost of doing the PCR reaction. " "The cost of the primers is not represented here, but in the flanking " "external GAP costs." ), ) InternalType( name=Constants.PCR_PRODUCT_WITH_PRIMERS, design=(False, False), use_direct=False, cost=10.0, efficiency=0.95, min_size=100, description=( "A PCR_PRODUCT_WITH_PRIMERS is a PCR of DNA generated by using " "two existing primers and using an existing template to produce the product" "in a PCR reaction. The cost represents the cost of doing the PCR reaction." ), ) InternalType( name=Constants.PCR_PRODUCT_WITH_RIGHT_PRIMER, design=(True, False), use_direct=False, cost=10.0, efficiency=0.95, min_size=100, description=( "A PCR_PRODUCT_WITH_RIGHT_PRIMER is a PCR of DNA generated by using " "one existing reverse primer and synthesizing a new forward primer and using " " an existing template to produce the product in a PCR reaction. The cost " "represents the cost of doing the PCR reaction. The cost of the synthesized " "primer is not represented here, but in the external gap cost." ), ) InternalType( name=Constants.PCR_PRODUCT_WITH_LEFT_PRIMER, design=(False, True), use_direct=False, cost=10.0, efficiency=0.95, min_size=100, description=( "A PCR_PRODUCT_WITH_RIGHT_PRIMER is a PCR of DNA generated by using " "one existing forward primer and synthesizing a new reverse primer and using " " an existing template to produce the product in a PCR reaction. The cost " "represents the cost of doing the PCR reaction. The cost of the synthesized " "primer is not represented here, but in the external gap cost." ), ) InternalType( name=Constants.PRIMER_EXTENSION_PRODUCT_WITH_PRIMERS, design=(False, False), use_direct=False, cost=9.0, efficiency=0.95, description=( "A PRIMER_EXTENSION_PRODUCT_WITH_PRIMERS is a product produced by annealing " "two existing primers together in annealing reaction or PCR reaction." ), ) InternalType( name=Constants.PRIMER_EXTENSION_PRODUCT_WITH_LEFT_PRIMER, design=(False, True), use_direct=False, cost=9.0, efficiency=0.95, description=( "A PRIMER_EXTENSION_PRODUCT_WITH_LEFT_PRIMER is a product produced by annealing " "one existing primer with a new designed primer" " together in annealing reaction or PCR reaction." ), ) InternalType( name=Constants.PRIMER_EXTENSION_PRODUCT_WITH_RIGHT_PRIMER, design=(True, False), use_direct=False, cost=9.0, efficiency=0.95, description=( "A PRIMER_EXTENSION_PRODUCT_WITH_RIGHT_PRIMER is a product produced by annealing " "one existing primer with a new designed primer" " together in annealing reaction or PCR reaction." ), ) # TODO: SYNTHESIZED_FRAGMENT description InternalType( name=Constants.SHARED_SYNTHESIZED_FRAGMENT, design=(False, False), use_direct=True, cost=1000.0, efficiency=0.1, description="", min_size=100, synthesize=True, ) ExternalType( name=Constants.OVERLAP, use_direct=False, cost=0.0, efficiency=1.0, synthesize=False, description=( "A OVERLAP is an external edge where two pieces of DNA overlap forming " "a junction. No new pieces of DNA are required and so the cost is 0. " "The efficiency of the junction depends on the cost settings, the particular " "sequence and amount of overlap" ), ) ExternalType( name=Constants.GAP, use_direct=False, cost=0.0, efficiency=1.0, synthesize=True, description=( "A GAP is an external edges where two pieces of DNA do no overlap. The " "gap must be spanned by either a synthesized fragment or by the primers " "on the left and right flanking molecules." ), ) MoleculeType( name=Constants.MISSING, design=None, use_direct=False, cost=inf, efficiency=0.0, synthesize=False, description=("A missing molecule. For tests."), ) MoleculeType( name=Constants.PRIMER, design=None, use_direct=False, cost=inf, efficiency=0.0, synthesize=False, description=( "A PRIMER is a single stranded piece of DNA that can amplify a pcr product " "from a template." ), ) MoleculeType( name=Constants.TEMPLATE, design=None, use_direct=True, cost=0.0, efficiency=0.0, synthesize=False, description=("A TEMPLATE is a piece DNA that can be used to make pcr products."), ) MoleculeType( name=Constants.SHARED_FRAGMENT, design=None, use_direct=True, cost=0.0, efficiency=1.0, synthesize=True, description=( "A SHARED_FRAGMENT is a DNA piece that is shared " "between several designs. This information can be used to reduce costs via " "'coalition design'. " ), ) MoleculeType( name=Constants.PLASMID, design=None, use_direct=False, cost=0.0, efficiency=1.0, synthesize=False, description=("A PLASMID is a designed DNA sequence."), )
[docs]class Molecule: """An instance of a molecule type, with a sequence and which alignments are assigned to it.""" def __init__( self, molecule_type: MoleculeType, alignment_group, sequence: SeqRecord, query_region: Region = None, metadata: Dict = None, ): self.type = molecule_type self.alignment_group = alignment_group assert issubclass(type(sequence), SeqRecord) assert issubclass(type(sequence.seq), Seq) self.sequence = sequence self.metadata = metadata or {} self.query_region = query_region def __repr__(self): return "<{cls} name='{name}' group='{group}'>".format( cls=self.__class__.__name__, name=self.type.name, group=self.alignment_group )
[docs]class Reaction: """An activity that takes in several Molecules and produces other Molecules.""" class Types: Direct = "Use Direct" Synthesize = "Synthesize" Retrieve = "Retrieve Fragment" PCR = "PCR" Assembly = "Assembly" _Valid = [Direct, Synthesize, Retrieve, PCR, Assembly]
[docs] def __init__( self, name: str, inputs: List[Molecule], outputs: List[Molecule], metadata: dict = None, ): if name not in self.Types._Valid: raise ValueError( "Name '{}' must be one of {}".format(name, self.Types._Valid) ) self.name = name self.inputs = inputs #: input molecules to the reaction self.outputs = outputs #: output molecule of the reaction self.metadata = metadata
def __repr__(self): return "<{cls} name='{name}' outputs={products} regions={outputs}>".format( cls=self.__class__.__name__, name=self.name, products=[m.type.name for m in self.outputs], outputs=[m.query_region for m in self.outputs], )