From f587e72731bdfebe2e452604707a4efe8906672a Mon Sep 17 00:00:00 2001 From: Koeng101 Date: Sat, 22 Jul 2023 16:56:03 -0700 Subject: [PATCH] Improve fragment interface (#316) * Add checks for reverse sequence * Add reverseComplement checker * add comments * Updated to approx even better neb calculator * improve fragment interface * Improve example, showing how to use existing overhangs --------- Co-authored-by: Tim --- synthesis/fragment/example_test.go | 10 ++++++---- synthesis/fragment/fragment.go | 4 ++-- synthesis/fragment/fragment_test.go | 12 ++++++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/synthesis/fragment/example_test.go b/synthesis/fragment/example_test.go index 15f68077..2f726ada 100644 --- a/synthesis/fragment/example_test.go +++ b/synthesis/fragment/example_test.go @@ -7,13 +7,15 @@ import ( ) // This example shows how to use the fragmenter to fragment a gene in -// preparation for synthesis. +// preparation for synthesis. Inputs are the sequence, the minimal fragment +// length, the maximum fragment length, and a list of overhangs that will +// also be used in the assembly reaction. func Example_basic() { lacZ := "ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG" - fragments, _, _ := fragment.Fragment(lacZ, 95, 105) + fragments, _, _ := fragment.Fragment(lacZ, 95, 105, []string{"AAAA"}) fmt.Println(fragments) - // Output: [ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGG CTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACA AACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCG TGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG] + // Output: [ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGG CTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAAC CAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGC GTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG] } // This example shows how to generate a new overhang onto a list of overhangs. @@ -29,7 +31,7 @@ func ExampleNextOverhang() { func ExampleFragment() { lacZ := "ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG" - fragments, efficiency, _ := fragment.Fragment(lacZ, 95, 105) + fragments, efficiency, _ := fragment.Fragment(lacZ, 95, 105, []string{}) fmt.Printf("%s : %f", fragments[1], efficiency) // Output: CTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACA : 1.000000 diff --git a/synthesis/fragment/fragment.go b/synthesis/fragment/fragment.go index 4ccc4ef3..23a59b27 100644 --- a/synthesis/fragment/fragment.go +++ b/synthesis/fragment/fragment.go @@ -176,7 +176,7 @@ func optimizeOverhangIteration(sequence string, minFragmentSize int, maxFragment // choosing fragment ends for optimal assembly efficiency. Since fragments will // be inserted into either a vector or primer binding sites, the first 4 and // last 4 base pairs are the initial overhang set. -func Fragment(sequence string, minFragmentSize int, maxFragmentSize int) ([]string, float64, error) { +func Fragment(sequence string, minFragmentSize int, maxFragmentSize int, existingOverhangs []string) ([]string, float64, error) { sequence = strings.ToUpper(sequence) - return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, []string{sequence[:4], sequence[len(sequence)-4:]}) + return optimizeOverhangIteration(sequence, minFragmentSize, maxFragmentSize, []string{}, append([]string{sequence[:4], sequence[len(sequence)-4:]}, existingOverhangs...)) } diff --git a/synthesis/fragment/fragment_test.go b/synthesis/fragment/fragment_test.go index 5183500d..1f59a047 100644 --- a/synthesis/fragment/fragment_test.go +++ b/synthesis/fragment/fragment_test.go @@ -7,7 +7,7 @@ import ( func TestFragment(t *testing.T) { gene := "atgaaaaaatttaactggaagaaaatagtcgcgccaattgcaatgctaattattggcttactaggtggtttacttggtgcctttatcctactaacagcagccggggtatcttttaccaatacaacagatactggagtaaaaacggctaagaccgtctacaccaatataacagatacaactaaggctgttaagaaagtacaaaatgccgttgtttctgtcatcaattatcaagaaggttcatcttcagattctctaaatgacctttatggccgtatctttggcggaggggacagttctgattctagccaagaaaattcaaaagattcagatggtctacaggtcgctggtgaaggttctggagtcatctataaaaaagatggcaaagaagcctacatcgtaaccaataaccatgttgtcgatggggctaaaaaacttgaaatcatgctttcggatggttcgaaaattactggtgaacttgttggtaaagacacttactctgacctagcagttgtcaaagtatcttcagataaaataacaactgttgcagaatttgcagactcaaactcccttactgttggtgaaaaagcaattgctatcggtagcccacttggtaccgaatacgccaactcagtaacagaaggaatcgtttctagccttagccgtactataacgatgcaaaacgataatggtgaaactgtatcaacaaacgctatccaaacagatgcagccattaaccctggtaactctggtggtgccctagtcaatattgaaggacaagttatcggtattaattcaagtaaaatttcatcaacgtctgcagtcgctggtagtgctgttgaaggtatggggtttgccattccatcaaacgatgttgttgaaatcatcaatcaattagaaaaagatggtaaagttacacgaccagcactaggaatctcaatagcagatcttaatagcctttctagcagcgcaacttctaaattagatttaccagatgaggtcaaatccggtgttgttgtcggtagtgttcagaaaggtatgccagctgacggtaaacttcaagaatatgatgttatcactgagattgatggtaagaaaatcagctcaaaaactgatattcaaaccaatctttacagccatagtatcggagatactatcaaggtaaccttctatcgtggtaaagataagaaaactgtagatcttaaattaacaaaatctacagaagacatatctgattaa" - _, _, err := Fragment(gene, 90, 110) + _, _, err := Fragment(gene, 90, 110, []string{}) if err != nil { t.Errorf(err.Error()) } @@ -16,7 +16,7 @@ func TestFragment(t *testing.T) { func TestUnfragmentable(t *testing.T) { // One should not be able to fragment this polyA := "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" - _, _, err := Fragment(polyA, 40, 80) + _, _, err := Fragment(polyA, 40, 80, []string{}) if err == nil { t.Errorf("polyA should fail to fragment") } @@ -25,12 +25,12 @@ func TestUnfragmentable(t *testing.T) { func TestFragmentSizes(t *testing.T) { // This tests if minSize > maxSize lacZ := "ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG" - _, _, err := Fragment(lacZ, 105, 95) + _, _, err := Fragment(lacZ, 105, 95, []string{}) if err == nil { t.Errorf("Fragment should fail when minFragmentSize > maxFragmentSize") } - _, _, err = Fragment(lacZ, 7, 95) + _, _, err = Fragment(lacZ, 7, 95, []string{}) if err == nil { t.Errorf("Fragment should fail when minFragmentSize < 8") } @@ -40,7 +40,7 @@ func TestFragmentSizes(t *testing.T) { func TestSmallFragmentSize(t *testing.T) { // The following should succeed, but will require setting minFragmentSize = 12 lacZ := "ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG" - _, _, err := Fragment(lacZ, 12, 30) + _, _, err := Fragment(lacZ, 12, 30, []string{}) if err != nil { t.Errorf("Got error in small fragmentation: %s", err) } @@ -50,7 +50,7 @@ func TestLongFragment(t *testing.T) { // A regression test for a bug that sometimes fragmented a sequence to // be longer than its max length gene := "GGAGGGTCTCAATGCTGGACGATCGCAAATTCAGCGAACAGGAGCTGGTCCGTCGCAACAAATACAAAACGCTGGTCGAGCAAAACAAAGACCCGTACAAGATTACGAACTGGAAACGCAATACCACCCTGCTGAAACTGAATGAGAAATACAAAGACTATAGCAAGGAGGACCTGTTGAACCTGAATCAAGAACTGGTCGTTGTTGCAGGTCGTATCAAACTGTATCGTGAAGCCGGTAAAAAAGCTGCCTTTGTGAACATTGATGATCAAGACTCCTCTATTCAGTTGTACGTGCGCCTGGATGAGATCGGTGATCAGAGCTTCGAGGATTTCCGCAATTTCGACCTGGGTGACATCATTGGTGTTAAAGGTATCATGATGCGCACCGACCACGGCGAGTTGAGCATCCGTTGTAAGGAAGTCGTGCTGCTGAGCAAGGCCCTGCGTCCGCTGCCGGATAAACACGCGGGCATTCAGGATATTGAGGAAAAGTACCGCCGTCGCTATGTGGACCTGATTATGAATCACGACGTGCGCAAGACGTTCCAGGCGCGTACCAAGATCATTCGTACCTTGCAAAACTTTCTGGATAATAAGGGTTACATGGAGGTCGAAACCCCGATCCTGCATAGCCTGAAGGGTGGCGCGAGCGCGAAACCGTTTATTACCCACTACAATGTGCTGAATACGGATGTGTATCTGCGTATCGCGACCGAGCTGCACCTGAAACGCCTGATTGTTGGCGGTTTCGAGGGTGTGTATGAGATCGGTCGCATCTTTCGCAATGAAGGTATGTCCACGCGTCACAATCCGGAATTCACGTCTATCGAACTGTATGTCGCCTATGAGGACATGTTCTTTTTGATGGATCTGACCGAAGAGATTTTTCGCGTTTGTAATGCCGCAGTCAACAGCTCCAGCATCATTGAGTATAACAACGTGAAAATTGACCTGAGCAAGCCGTTTAAGCGCCTGCATATGGTTGACGGTATTAAACAGGTGACCGGCGTCGACTTCTGGCAGGAGATGACGGTCCAACAGGCTCTGGAGCTGGCCAAAAAGCATAAAGTGCACGTTGAAAAACATCAAGAGTCTGTTGGTCACATTATCAATTTGTTCTATGAGGAGTTCGTGGAGTCCACGATTGTTGAGCCGACGTTCGTGTACGGTCACCCGAAGGAAATCTCTCCGCTGGCTAAGAGCAATCCGTCTGACCCGCGTTTCACGGACCGTTTCGAGCTGTTCATTCTGGGTCGTGAGTATGCGAATGCGTTTAGCGAGCTGAATGACCCGATTGACCAGTACGAACGCTTCAAGGCTCAGATTGAGGAGGAAAGCAAGGGCAACGATGAAGCCAACGACATGGACATTGATTTCATCGAGGCTCTGGAACACGCCATGCCGCCGACCGCGGGTATTGGTATCGGCATTGATCGCTTGGTTATGCTGCTGACGAATAGCGAATCCATCAAAGACGTGCTGTTGTTCCCGCAAATGAAGCCGCGCGAATGAAGAGCTTAGAGACCCGCT" - frags, _, err := Fragment(gene, 79, 94) + frags, _, err := Fragment(gene, 79, 94, []string{}) if err != nil { t.Errorf(err.Error()) }