Skip to content
12 changes: 6 additions & 6 deletions docs/notebooks/history.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,13 @@
"╙── product (Dseqrecord(-18))\n",
" └─╼ LigationSource\n",
" ├─╼ c (Dseqrecord(-7))\n",
" │ └─╼ Source\n",
" │ └─╼ a (Dseqrecord(-18)) ╾ Source, Source\n",
" │ └─╼ RestrictionEnzymeDigestionSource\n",
" │ └─╼ a (Dseqrecord(-18)) ╾ RestrictionEnzymeDigestionSource, RestrictionEnzymeDigestionSource\n",
" ├─╼ d (Dseqrecord(-12))\n",
" │ └─╼ Source\n",
" │ └─╼ RestrictionEnzymeDigestionSource\n",
" │ └─╼ ...\n",
" └─╼ e (Dseqrecord(-7))\n",
" └─╼ Source\n",
" └─╼ RestrictionEnzymeDigestionSource\n",
" └─╼ ...\n"
]
}
Expand Down Expand Up @@ -354,8 +354,8 @@
" └─╼ CreLoxRecombinationSource\n",
" └─╼ integration_product (Dseqrecord(-84))\n",
" └─╼ CreLoxRecombinationSource\n",
" ├─╼ a (Dseqrecord(-45))\n",
" └─╼ b (Dseqrecord(o39))\n"
" ├─╼ genome (Dseqrecord(-45))\n",
" └─╼ plasmid (Dseqrecord(o39))\n"
]
}
],
Expand Down
232 changes: 5 additions & 227 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ scipy = [
]
seguid = ">=0.0.5"
regex = "^2024.11.6"
opencloning-linkml = "0.4.5"
opencloning-linkml = "^0.4.5"
[tool.poetry.extras]
clipboard = ["pyperclip"]
download = ["pyparsing", "requests"]
Expand Down
5 changes: 3 additions & 2 deletions src/pydna/assembly2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2036,7 +2036,7 @@ def _recast_sources(
"""
for prod in products:
prod.source = source_cls(
**prod.source.model_dump(),
**prod.source.to_unserialized_dict(),
**extra_fields,
)
return products
Expand Down Expand Up @@ -2805,7 +2805,8 @@ def crispr_integration(
# The second element of product.source.input is conventionally the insert/repair fragment
# The other two (first and third) are the two bits of the genome
repair_start = _location_boundaries(product.source.input[0].right_location)[0]
repair_end = _location_boundaries(product.source.input[2].left_location)[1]
# Here we do +1 because the position of the cut marks the boundary (e.g. 0:10, 10:20 if a cut is at pos 10)
repair_end = _location_boundaries(product.source.input[2].left_location)[1] + 1
repair_location = create_location(repair_start, repair_end, len(genome))
some_cuts_inside_repair = []
all_cuts_inside_repair = []
Expand Down
5 changes: 3 additions & 2 deletions src/pydna/genbank.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from pydna.readers import read as _read

from Bio import Entrez as _Entrez
from Bio.SeqFeature import SimpleLocation

# from Bio.SeqFeature import SimpleLocation
from typing import Literal as _Literal, Optional as _Optional
import re as _re
import os as _os
Expand Down Expand Up @@ -179,7 +180,7 @@ def nucleotide(
result.source = RepositoryIdSource(
repository_id=item,
repository_name="genbank",
location=SimpleLocation(seq_start, seq_stop, strand),
# location=SimpleLocation(seq_start, seq_stop, strand),
)
return result

Expand Down
124 changes: 124 additions & 0 deletions src/pydna/oligonucleotide_hybridization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# -*- coding: utf-8 -*-
"""
This module contains the functions for oligonucleotide hybridization.
"""

from pydna.common_sub_strings import common_sub_strings
from Bio.Seq import reverse_complement
from pydna.primer import Primer
from pydna.dseqrecord import Dseqrecord
from pydna.dseq import Dseq
from pydna.opencloning_models import OligoHybridizationSource, SourceInput


def oligonucleotide_hybridization_overhangs(
fwd_oligo_seq: str, rvs_oligo_seq: str, minimal_annealing: int
) -> list[int]:
"""
Returns possible overhangs between two oligos given a minimal annealing length, and
returns an error if mismatches are found.

see https://github.com/manulera/OpenCloning_backend/issues/302 for notation

>>> from pydna.oligonucleotide_hybridization import oligonucleotide_hybridization_overhangs
>>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCAT", 3)
[0]
>>> oligonucleotide_hybridization_overhangs("aATGGC", "GCCAT", 5)
[-1]
>>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATa", 5)
[1]
>>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATaaGCCAT", 5)
[0, 7]

If the minimal annealing length is longer than the length of the shortest oligo, it returns an empty list.

>>> oligonucleotide_hybridization_overhangs("ATGGC", "GCCATaaGCCAT", 100)
[]

If it's possible to anneal for ``minimal_annealing`` length, but with mismatches, it raises an error.

>>> oligonucleotide_hybridization_overhangs("cATGGC", "GCCATa", 5)
Traceback (most recent call last):
...
ValueError: The oligonucleotides can anneal with mismatches
"""
matches = common_sub_strings(
fwd_oligo_seq.lower(),
reverse_complement(rvs_oligo_seq.lower()),
minimal_annealing,
)

for pos_fwd, pos_rvs, length in matches:

if (pos_fwd != 0 and pos_rvs != 0) or (
pos_fwd + length < len(fwd_oligo_seq)
and pos_rvs + length < len(rvs_oligo_seq)
):
raise ValueError("The oligonucleotides can anneal with mismatches")

# Return possible overhangs
return [pos_rvs - pos_fwd for pos_fwd, pos_rvs, length in matches]


def oligonucleotide_hybridization(
fwd_primer: Primer, rvs_primer: Primer, minimal_annealing: int
) -> list[Dseqrecord]:
"""
Returns a list of Dseqrecord objects representing the hybridization of two primers.

>>> from pydna.primer import Primer
>>> from pydna.oligonucleotide_hybridization import oligonucleotide_hybridization
>>> fwd_primer = Primer("ATGGC")
>>> rvs_primer = Primer("GCCA")
>>> oligonucleotide_hybridization(fwd_primer, rvs_primer, 3)[0].seq
Dseq(-5)
ATGGC
ACCG

Multiple values can be returned:

>>> rvs_primer2 = Primer("GCCATaaGCCAT")
>>> oligonucleotide_hybridization(fwd_primer, rvs_primer2, 3)[0].seq
Dseq(-12)
ATGGC
TACCGaaTACCG
>>> oligonucleotide_hybridization(fwd_primer, rvs_primer2, 3)[1].seq
Dseq(-12)
ATGGC
TACCGaaTACCG

If no possible overhangs are found, it returns an empty list.

>>> oligonucleotide_hybridization(fwd_primer, rvs_primer, 100)
[]

If there are mismatches given the minimal annealing length, it raises an error.

>>> fwd_primer3 = Primer("cATGGC")
>>> rvs_primer3 = Primer("GCCATa")
>>> oligonucleotide_hybridization(fwd_primer3, rvs_primer3, 5)
Traceback (most recent call last):
...
ValueError: The oligonucleotides can anneal with mismatches
"""
possible_overhangs = oligonucleotide_hybridization_overhangs(
str(fwd_primer.seq), str(rvs_primer.seq), minimal_annealing
)
sources = [
OligoHybridizationSource(
overhang_crick_3prime=pos,
input=[SourceInput(sequence=fwd_primer), SourceInput(sequence=rvs_primer)],
)
for pos in possible_overhangs
]
return [
Dseqrecord(
Dseq(
str(fwd_primer.seq),
str(rvs_primer.seq),
ovhg=source.overhang_crick_3prime,
),
source=source,
)
for source in sources
]
Loading
Loading