Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

linearfold #38

Merged
merged 5 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
894 changes: 5 additions & 889 deletions lib/fold/fold.go

Large diffs are not rendered by default.

317 changes: 43 additions & 274 deletions lib/fold/fold_test.go
Original file line number Diff line number Diff line change
@@ -1,285 +1,54 @@
package fold

import (
"math"
"strings"
"testing"
)

func TestFold(t *testing.T) {
t.Run("FoldCache", func(t *testing.T) {
seq := "ATGGATTTAGATAGAT"
foldContext, err := newFoldingContext(seq, 37.0)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
res, err := Zuker(seq, 37.0)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
seqDg := res.MinimumFreeEnergy()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

if math.Abs(seqDg-foldContext.unpairedMinimumFreeEnergyW[0][len(seq)-1].energy) > 1 {
t.Errorf("expected values to be within delta of 1, got %v and %v", seqDg, foldContext.unpairedMinimumFreeEnergyW[0][len(seq)-1].energy)
}
})
t.Run("FoldDNA", func(t *testing.T) {
// unafold's estimates for free energy estimates of DNA oligos
unafoldDgs := map[string]float64{
"GGGAGGTCGTTACATCTGGGTAACACCGGTACTGATCCGGTGACCTCCC": -10.94, // three branched structure
"GGGAGGTCGCTCCAGCTGGGAGGAGCGTTGGGGGTATATACCCCCAACACCGGTACTGATCCGGTGACCTCCC": -23.4, // four branched structure
"CGCAGGGAUACCCGCG": -3.8,
"TAGCTCAGCTGGGAGAGCGCCTGCTTTGCACGCAGGAGGT": -6.85,
"GGGGGCATAGCTCAGCTGGGAGAGCGCCTGCTTTGCACGCAGGAGGTCTGCGGTTCGATCCCGCGCGCTCCCACCA": -15.50,
"TGAGACGGAAGGGGATGATTGTCCCCTTCCGTCTCA": -18.10,
"ACCCCCTCCTTCCTTGGATCAAGGGGCTCAA": -3.65,
}

for seq, ufold := range unafoldDgs {
res, err := Zuker(seq, 37.0)
if err != nil {
t.Fatalf("unexpected error for sequence %s: %v", seq, err)
}
d := res.MinimumFreeEnergy()

// accepting a 60% difference
delta := math.Abs(0.6 * math.Min(d, ufold))
if math.Abs(d-ufold) > delta {
t.Errorf("for sequence %s, expected value within delta of %v, got %v and %v", seq, delta, d, ufold)
}
}
})
t.Run("FoldRNA", func(t *testing.T) {
// unafold's estimates for free energy estimates of RNA oligos
// most tests available at https://github.com/jaswindersingh2/SPOT-RNA/blob/master/sample_inputs/batch_seq.fasta
unafoldDgs := map[string]float64{
"ACCCCCUCCUUCCUUGGAUCAAGGGGCUCAA": -9.5,
"AAGGGGUUGGUCGCCUCGACUAAGCGGCUUGGAAUUCC": -10.1,
"UUGGAGUACACAACCUGUACACUCUUUC": -4.3,
"AGGGAAAAUCCC": -3.3,
"GCUUACGAGCAAGUUAAGCAAC": -4.6,
"UGGGAGGUCGUCUAACGGUAGGACGGCGGACUCUGGAUCCGCUGGUGGAGGUUCGAGUCCUCCCCUCCCAGCCA": -32.8,
"GGGCGAUGAGGCCCGCCCAAACUGCCCUGAAAAGGGCUGAUGGCCUCUACUG": -20.7,
"GGGGGCAUAGCUCAGCUGGGAGAGCGCCUGCUUUGCACGCAGGAGGUCUGCGGUUCGAUCCCGCGCGCUCCCACCA": -31.4,
}

for seq, ufold := range unafoldDgs {
res, err := Zuker(seq, 37.0)
if err != nil {
t.Fatalf("unexpected error for sequence %s: %v", seq, err)
}
d := res.MinimumFreeEnergy()

// accepting a 50% difference
delta := math.Abs(0.5 * math.Min(d, ufold))
if math.Abs(d-ufold) > delta {
t.Errorf("for sequence %s, expected value within delta of %v, got %v and %v", seq, delta, d, ufold)
}
}
})
t.Run("DotBracket", func(t *testing.T) {
seq := "GGGAGGTCGTTACATCTGGGTAACACCGGTACTGATCCGGTGACCTCCC"
res, err := Zuker(seq, 37.0)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

expected := "((((((((.((((......))))..((((.......)))).))))))))"
if res.DotBracket() != expected {
t.Errorf("expected %s, got %s", expected, res.DotBracket())
}
})
t.Run("multibranch", func(t *testing.T) {
seq := "GGGAGGTCGTTACATCTGGGTAACACCGGTACTGATCCGGTGACCTCCC" // three branch

res, err := Zuker(seq, 37.0)
if err != nil {
t.Fatalf("Error: %v", err) // Fatal stops the test immediately
}

found := false
foundIJ := subsequence{7, 41}
for _, s := range res.structs {
if strings.Contains(s.description, "BIFURCATION") {
for _, ij := range s.inner {
if ij == foundIJ {
found = true
}
}
}
}
if !found {
t.Errorf("not found a BIFURCATION with (7, 41) in ij") // Error continues the test
}
})
t.Run("pair", func(t *testing.T) {
seq := "ATGGAATAGTG"
result := pair(seq, 0, 1, 9, 10)
expected := "AT/TG"
if result != expected {
t.Errorf("Expected %s, got %s", expected, result)
}
})
t.Run("stack", func(t *testing.T) {
seq := "GCUCAGCUGGGAGAGC"
temp := 37.0
foldContext, err := newFoldingContext(seq, temp)
if err != nil {
t.Fatalf("Error creating folding context: %v", err)
}

e := stack(1, 2, 14, 13, foldContext)
expectedDelta := -2.1
tolerance := 0.1
if e < expectedDelta-tolerance || e > expectedDelta+tolerance {
t.Errorf("Expected value to be within %.1f +/- %.1f, got %.2f", expectedDelta, tolerance, e)
}
})
t.Run("Bulge", func(t *testing.T) {
// mock bulge of CAT on one side and AG on other
// from pg 429 of SantaLucia, 2004
seq := "ACCCCCATCCTTCCTTGAGTCAAGGGGCTCAA"
foldContext, err := newFoldingContext(seq, 37)
if err != nil {
t.Fatalf("Error creating folding context: %v", err)
}

pairDg, err := Bulge(5, 7, 18, 17, foldContext)
if err != nil {
t.Fatalf("Error in Bulge function: %v", err)
}
expectedDelta := 3.22
tolerance := 0.4
if pairDg < expectedDelta-tolerance || pairDg > expectedDelta+tolerance {
t.Errorf("Expected value to be within %.2f +/- %.1f, got %.2f", expectedDelta, tolerance, pairDg)
}
})
t.Run("hairpin", func(t *testing.T) {
// Test case 1
seq := "ACCCCCTCCTTCCTTGGATCAAGGGGCTCAA"
i, j := 11, 16

foldContext, err := newFoldingContext(seq, 37)
if err != nil {
t.Fatalf("Error creating folding context: %v", err)
}

hairpinDg, err := hairpin(i, j, foldContext)
if err != nil {
t.Fatalf("Error in hairpin function: %v", err)
}
if hairpinDg < 4.3-1.0 || hairpinDg > 4.3+1.0 {
t.Errorf("hairpinDg = %v, want %v +/- %v", hairpinDg, 4.3, 1.0)
}

// Test case 2
seq = "ACCCGCAAGCCCTCCTTCCTTGGATCAAGGGGCTCAA"
i, j = 3, 8

foldContext, err = newFoldingContext(seq, 37)
if err != nil {
t.Fatalf("Error creating folding context: %v", err)
}

hairpinDg, err = hairpin(i, j, foldContext)
if err != nil {
t.Fatalf("Error in hairpin function: %v", err)
}
if hairpinDg < 0.67-0.1 || hairpinDg > 0.67+0.1 {
t.Errorf("hairpinDg = %v, want %v +/- %v", hairpinDg, 0.67, 0.1)
}

// Test case 3
seq = "CUUUGCACG"
i, j = 0, 8

foldContext, err = newFoldingContext(seq, 37)
if err != nil {
t.Fatalf("Error creating folding context: %v", err)
}

hairpinDg, err = hairpin(i, j, foldContext)
if err != nil {
t.Fatalf("Error in hairpin function: %v", err)
}
if hairpinDg < 4.5-0.2 || hairpinDg > 4.5+0.2 {
t.Errorf("hairpinDg = %v, want %v +/- %v", hairpinDg, 4.5, 0.2)
}
})
t.Run("internalLoop", func(t *testing.T) {
seq := "ACCCCCTCCTTCCTTGGATCAAGGGGCTCAA"
i := 6
j := 21

foldContext, err := newFoldingContext(seq, 37)
if err != nil {
t.Fatalf("Error creating folding context: %v", err)
}

dg, err := internalLoop(i, i+4, j, j-4, foldContext)
if err != nil {
t.Fatalf("Error in internalLoop function: %v", err)
}

expectedDg := 3.5
delta := 0.1
if dg < expectedDg-delta || dg > expectedDg+delta {
t.Errorf("internalLoop() = %v, want %v +/- %v", dg, expectedDg, delta)
}
})
t.Run("W", func(t *testing.T) {
// Test case 1
seq := "GCUCAGCUGGGAGAGC"
i, j := 0, 15

foldContext, err := newFoldingContext(seq, 37)
if err != nil {
t.Fatalf("Error creating folding context: %v", err)
}

struc, err := unpairedMinimumFreeEnergyW(i, j, foldContext)
if err != nil {
t.Fatalf("Error in unpairedMinimumFreeEnergyW function: %v", err)
}
if struc.energy < -3.8-0.2 || struc.energy > -3.8+0.2 {
t.Errorf("struc.energy = %v, want %v +/- %v", struc.energy, -3.8, 0.2)
}

// Test case 2
seq = "CCUGCUUUGCACGCAGG"
i, j = 0, 16
"github.com/koeng101/dnadesign/lib/fold/linearfold"
"github.com/koeng101/dnadesign/lib/fold/zuker"
)

foldContext, err = newFoldingContext(seq, 37)
if err != nil {
t.Fatalf("Error creating folding context: %v", err)
}
func TestCONTRAfold(t *testing.T) {
var newFolder SequenceFolder //nolint:gosimple
newFolder = linearfold.NewDefaultCONTRAfoldV2FoldWrapper()
result, score, _ := newFolder.Fold("UGAGUUCUCGAUCUCUAAAAUCG", 37.0)

expectedResult := "......................."
if result != expectedResult {
t.Errorf("Got unexpected result. Expected: %s\nGot: %s", expectedResult, result)
}
expectedScore := -0.22376699999999988
if !(score >= -0.22600466999999988 && score <= -0.22152932999999989) {
t.Errorf("Got unexpected score. Expected: %f +/- 1%%\nGot: %f", expectedScore, score)
}
}

struc, err = unpairedMinimumFreeEnergyW(i, j, foldContext)
if err != nil {
t.Fatalf("Error in unpairedMinimumFreeEnergyW function: %v", err)
}
if struc.energy < -6.4-0.2 || struc.energy > -6.4+0.2 {
t.Errorf("struc.energy = %v, want %v +/- %v", struc.energy, -6.4, 0.2)
}
func TestViennaRNAFold(t *testing.T) {
var newFolder SequenceFolder //nolint:gosimple
newFolder = linearfold.NewDefaultViennaRnaFoldWrapper()
result, score, _ := newFolder.Fold("UCUAGACUUUUCGAUAUCGCGAAAAAAAAU", 37.0)

expectedResult := ".......((((((......))))))....."
if result != expectedResult {
t.Errorf("Got unexpected result. Expected: %s\nGot: %s", expectedResult, result)
}
expectedScore := -3.90
if !(score >= -3.939 && score <= -3.861) {
t.Errorf("Got unexpected score. Expected: %f +/- 1%%\nGot: %f", expectedScore, score)
}
}

// Test case 3
seq = "GCGGUUCGAUCCCGC"
i, j = 0, 14
func TestZuker(t *testing.T) {
var newFolder SequenceFolder //nolint:gosimple
newFolder = zuker.NewZukerFoldWrapper()
result, score, _ := newFolder.Fold("ACCCCCUCCUUCCUUGGAUCAAGGGGCUCAA", 37.0)

foldContext, err = newFoldingContext(seq, 37)
if err != nil {
t.Fatalf("Error creating folding context: %v", err)
}
expectedResult := ".((((.(((......)))....))))"
if result != expectedResult {
t.Errorf("Got unexpected result. Expected: %s\nGot: %s", expectedResult, result)
}

struc, err = unpairedMinimumFreeEnergyW(i, j, foldContext)
if err != nil {
t.Fatalf("Error in unpairedMinimumFreeEnergyW function: %v", err)
}
if struc.energy < -4.2-0.2 || struc.energy > -4.2+0.2 {
t.Errorf("struc.energy = %v, want %v +/- %v", struc.energy, -4.2, 0.2)
}
})
expectedScore := -9.422465
if !(score >= -9.51668965 && score <= -9.32824035) {
t.Errorf("Got unexpected score. Expected: %f +/- 1%%\nGot: %f", expectedScore, score)
}
}
44 changes: 44 additions & 0 deletions lib/fold/linearfold/folder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package linearfold

import (
"github.com/koeng101/dnadesign/lib/fold/mfe"
"github.com/koeng101/dnadesign/lib/fold/mfe/energy_params"
)

/*
These interfaces are here to satisfy the `SequenceFolder` interface, which
looks like this:

type SequenceFolder interface {
Fold(sequence string, temp float64) (dotBracket string, score float64, error)
}
*/

type CONTRAfoldV2FoldWrapper struct {
BeamSize int
}

type ViennaRNAFoldWrapper struct {
EnergyParamsSet energy_params.EnergyParamsSet
DanglingEndsModel mfe.DanglingEndsModel
BeamSize int
}

func NewDefaultCONTRAfoldV2FoldWrapper() CONTRAfoldV2FoldWrapper {
return CONTRAfoldV2FoldWrapper{BeamSize: DefaultBeamSize}
}

func NewDefaultViennaRnaFoldWrapper() ViennaRNAFoldWrapper {
return ViennaRNAFoldWrapper{EnergyParamsSet: DefaultEnergyParamsSet, DanglingEndsModel: mfe.DefaultDanglingEndsModel, BeamSize: DefaultBeamSize}
}

func (c CONTRAfoldV2FoldWrapper) Fold(sequence string, temp float64) (dotBracket string, score float64, err error) {
// Assuming CONTRAfoldV2 is adjusted to accept a temperature parameter and return an error
dotBracket, score = CONTRAfoldV2(sequence, c.BeamSize)
return dotBracket, score, nil
}

func (v ViennaRNAFoldWrapper) Fold(sequence string, temp float64) (dotBracket string, score float64, err error) {
dotBracket, score = ViennaRNAFold(sequence, temp, v.EnergyParamsSet, v.DanglingEndsModel, v.BeamSize)
return dotBracket, score, nil
}
Loading
Loading