Skip to content

Commit

Permalink
added kmer functionality, removed enzymemanager (#83)
Browse files Browse the repository at this point in the history
* added kmer functionality, removed enzymemanager
  • Loading branch information
Koeng101 authored Aug 13, 2024
1 parent 56f2383 commit 0cc4f7c
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 109 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
- Added kmer detection for ligation events in cloning and removed enzyme manager [#83](https://github.com/Koeng101/dnadesign/pull/83)
- Added option for linear ligations [#82](https://github.com/Koeng101/dnadesign/pull/82)
- Added minimal python packaging [#81](https://github.com/Koeng101/dnadesign/pull/81)
- Greatly simplified the Ligate function [#77](https://github.com/Koeng101/dnadesign/pull/77)
Expand Down
8 changes: 4 additions & 4 deletions lib/align/megamash/megamash.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ import (
"github.com/koeng101/dnadesign/lib/transform"
)

// StandardizedDNA returns the alphabetically lesser strand of a double
// StandardizeDNA returns the alphabetically lesser strand of a double
// stranded DNA molecule.
func StandardizedDNA(sequence string) string {
func StandardizeDNA(sequence string) string {
sequence = strings.ToUpper(sequence)
var deterministicSequence string
reverseComplement := transform.ReverseComplement(sequence)
Expand Down Expand Up @@ -59,7 +59,7 @@ func NewMegamashMap(sequences []fasta.Record, kmerSize uint, kmerMinimalCount in
sequence := fastaRecord.Sequence
sequenceSpecificKmers := make(map[string]bool)
for i := 0; i <= len(sequence)-int(kmerSize); i++ {
kmerString := StandardizedDNA(sequence[i : i+int(kmerSize)])
kmerString := StandardizeDNA(sequence[i : i+int(kmerSize)])
kmerMap[kmerString] = fastaRecord.Identifier
sequenceSpecificKmers[kmerString] = true
}
Expand Down Expand Up @@ -111,7 +111,7 @@ func (m *MegamashMap) Match(sequence string) []Match {
identifierToCounts[identifier] = 0
}
for i := 0; i <= len(sequence)-int(m.KmerSize); i++ {
kmerString := StandardizedDNA(sequence[i : i+int(m.KmerSize)])
kmerString := StandardizeDNA(sequence[i : i+int(m.KmerSize)])
identifier, ok := m.Kmers[kmerString]
if ok {
identifierToCounts[identifier]++
Expand Down
69 changes: 22 additions & 47 deletions lib/clone/clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,23 +90,12 @@ type Enzyme struct {
RecognitionSite string
}

// EnzymeManager manager for Enzymes. Allows for management of enzymes throughout the lifecyle of your
// program. EnzymeManager is not safe for concurrent use.
type EnzymeManager struct {
// enzymeMap Map of enzymes that exist for the lifetime of the manager. Not safe for concurrent use.
enzymeMap map[string]Enzyme
}

// NewEnzymeManager creates a new EnzymeManager given some enzymes.
func NewEnzymeManager(enzymes []Enzyme) EnzymeManager {
enzymeMap := make(map[string]Enzyme)
for enzymeIndex := range enzymes {
enzymeMap[enzymes[enzymeIndex].Name] = enzymes[enzymeIndex]
}

return EnzymeManager{
enzymeMap: enzymeMap,
}
var DefaultEnzymes = map[string]Enzyme{
"BsaI": {"BsaI", regexp.MustCompile("GGTCTC"), regexp.MustCompile("GAGACC"), 1, 4, "GGTCTC"},
"BbsI": {"BbsI", regexp.MustCompile("GAAGAC"), regexp.MustCompile("GTCTTC"), 2, 4, "GAAGAC"},
"BtgZI": {"BtgZI", regexp.MustCompile("GCGATG"), regexp.MustCompile("CATCGC"), 10, 4, "GCGATG"},
"PaqCI": {"PaqCI", regexp.MustCompile("CACCTGC"), regexp.MustCompile("GCAGGTG"), 4, 4, "CACCTGC"},
"BsmBI": {"BsmBI", regexp.MustCompile("CGTCTC"), regexp.MustCompile("GAGACG"), 1, 4, "CGTCTC"},
}

/******************************************************************************
Expand All @@ -119,26 +108,17 @@ Base cloning functions begin here.
// enzyme's name. It is a convenience wrapper around CutWithEnzyme that
// allows us to specify the enzyme by name. Set methylated flag to true if
// there is lowercase methylated DNA as part of the sequence.
func (enzymeManager EnzymeManager) CutWithEnzymeByName(part Part, directional bool, name string, methylated bool) ([]Fragment, error) {
func CutWithEnzymeByName(part Part, directional bool, name string, methylated bool) ([]Fragment, error) {
// Get the enzyme from the enzyme map
enzyme, err := enzymeManager.GetEnzymeByName(name)
if err != nil {
enzyme, ok := DefaultEnzymes[name]
if !ok {
// Return an error if there was an error
return []Fragment{}, err
return []Fragment{}, errors.New("enzyme not found")
}
// Cut the sequence with the enzyme
return CutWithEnzyme(part, directional, enzyme, methylated), nil
}

// GetEnzymeByName gets the enzyme by it's name. If the enzyme manager does not
// contain an enzyme with the provided name, an error will be returned
func (enzymeManager EnzymeManager) GetEnzymeByName(name string) (Enzyme, error) {
if enzyme, ok := enzymeManager.enzymeMap[name]; ok {
return enzyme, nil
}
return Enzyme{}, errors.New("Enzyme " + name + " not found")
}

// CutWithEnzyme cuts a given sequence with an enzyme represented by an Enzyme struct.
// If there is methylated parts of the target DNA, set the "methylated" flag to
// true and lowercase ONLY methylated DNA.
Expand Down Expand Up @@ -285,10 +265,14 @@ func CutWithEnzyme(part Part, directional bool, enzyme Enzyme, methylated bool)
// the first fragment WILL be used in the ligation reaction. This function
// is a massive simplification of the original ligation code which can do more.
// If this does not fulfill your needs, please leave an issue in git.
func Ligate(fragments []Fragment, circular bool) (string, error) {
func Ligate(fragments []Fragment, circular bool) (string, []int, error) {
if len(fragments) == 0 {
return "", errors.New("no fragments to ligate")
return "", []int{}, errors.New("no fragments to ligate")
}
// Ligation pattern is used in downstream functions for analyzing
// ligation patterns.
var ligationPattern []int
ligationPattern = append(ligationPattern, 0) // first fragment is the first ligation site

finalFragment := fragments[0]
used := make(map[int]bool)
Expand All @@ -303,13 +287,15 @@ func Ligate(fragments []Fragment, circular bool) (string, error) {
finalFragment.ReverseOverhang = fragment.ReverseOverhang
used[i] = true
matchFound = true
ligationPattern = append(ligationPattern, i)
break
}
if !used[i] && finalFragment.ReverseOverhang == transform.ReverseComplement(fragment.ReverseOverhang) {
finalFragment.Sequence += finalFragment.ReverseOverhang + transform.ReverseComplement(fragment.Sequence)
finalFragment.ReverseOverhang = transform.ReverseComplement(fragment.ForwardOverhang)
used[i] = true
matchFound = true
ligationPattern = append(ligationPattern, i)
break
}
}
Expand All @@ -318,11 +304,11 @@ func Ligate(fragments []Fragment, circular bool) (string, error) {
// attempt circularization
if circular {
if finalFragment.ForwardOverhang != finalFragment.ReverseOverhang {
return "", errors.New("does not circularize")
return "", ligationPattern, errors.New("does not circularize")
}
return finalFragment.ForwardOverhang + finalFragment.Sequence, nil
return finalFragment.ForwardOverhang + finalFragment.Sequence, ligationPattern, nil
}
return finalFragment.ForwardOverhang + finalFragment.Sequence + finalFragment.ReverseOverhang, nil
return finalFragment.ForwardOverhang + finalFragment.Sequence + finalFragment.ReverseOverhang, ligationPattern, nil
}

/******************************************************************************
Expand All @@ -334,22 +320,11 @@ Specific cloning functions begin here.
// GoldenGate simulates a GoldenGate cloning reaction. As of right now, we only
// support BsaI, BbsI, BtgZI, and BsmBI. Set methylated flag to true if there
// is lowercase methylated DNA as part of the sequence.
func GoldenGate(sequences []Part, cuttingEnzyme Enzyme, methylated bool) (string, error) {
func GoldenGate(sequences []Part, cuttingEnzyme Enzyme, methylated bool) (string, []int, error) {
var fragments []Fragment
for _, sequence := range sequences {
newFragments := CutWithEnzyme(sequence, true, cuttingEnzyme, methylated)
fragments = append(fragments, newFragments...)
}
return Ligate(fragments, true)
}

// GetBaseRestrictionEnzymes return a basic slice of common enzymes used in Golden Gate Assembly. Eventually, we want to get the data for this map from ftp://ftp.neb.com/pub/rebase
func GetBaseRestrictionEnzymes() []Enzyme {
return []Enzyme{
{"BsaI", regexp.MustCompile("GGTCTC"), regexp.MustCompile("GAGACC"), 1, 4, "GGTCTC"},
{"BbsI", regexp.MustCompile("GAAGAC"), regexp.MustCompile("GTCTTC"), 2, 4, "GAAGAC"},
{"BtgZI", regexp.MustCompile("GCGATG"), regexp.MustCompile("CATCGC"), 10, 4, "GCGATG"},
{"PaqCI", regexp.MustCompile("CACCTGC"), regexp.MustCompile("GCAGGTG"), 4, 4, "CACCTGC"},
{"BsmBI", regexp.MustCompile("CGTCTC"), regexp.MustCompile("GAGACG"), 1, 4, "CGTCTC"},
}
}
Loading

0 comments on commit 0cc4f7c

Please sign in to comment.