Skip to content

Commit

Permalink
add Labmda phage tests
Browse files Browse the repository at this point in the history
  • Loading branch information
camilogarciabotero committed Jul 8, 2023
1 parent 45b2359 commit faa4217
Show file tree
Hide file tree
Showing 5 changed files with 711 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/GeneFinder.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ using BioSequences:
LongSubSeq,
@biore_str,
GeneticCode
using FASTX: FASTA, sequence
using FASTX: FASTA, sequence, FASTAReader
using IterTools: takewhile, iterated
using MarkovChainHammer.Trajectory: generate
using PrecompileTools
Expand Down
17 changes: 15 additions & 2 deletions src/algorithms/findorfs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ function findorfs(sequence::String; alternative_start::Bool = false, min_len::In
end

@testitem "findorfs test" begin
using BioSequences
using BioSequences, FASTX
cd(@__DIR__)

# A random seq to start
seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
Expand Down Expand Up @@ -94,7 +95,7 @@ end
# Pyrodigal predicts 2 genes from this sequence:
# 1) An alternative start codon (GTG) sequence at 48:347
# 2) A common start codon sequence at 426:590
# On the other hand, the NCBI ORFfinder program predicts 9 ORFS whose length is greater tha 75 nt, from which one has an "outbound" stop
# On the other hand, the NCBI ORFfinder program predicts 9 ORFs whose length is greater than 75 nt, from which one has an "outbound" stop
seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"
orfs03 = findorfs(seq03, min_len=75)
@test length(orfs03) == 9
Expand All @@ -110,4 +111,16 @@ end
ORF(786:872, '+')
]

# Lambda phage tests
# Compare to https://github.com/jonas-fuchs/viral_orf_finder/blob/master/orf_finder.py
# Salisbury and Tsorukas (2019) paper used the Lambda phage genome with 73 CDS and 545 non-CDS ORFs (a total of 618) to compare predictions between several Gene Finder programs
# For a minimal length of 75 nt the following ORFs are predicted:
# orf_finder.py --> 885 (222 complete)
# findorfs (GeneFinder.jl) --> 885
# NCBI ORFfinder --> 375 ORFs
# orfipy --> 375 (`orfipy NC_001416.1.fasta --start ATG --include-stop --min 75`)
NC_001416 = fasta_to_dna("../../test/data/NC_001416.1.fasta")[1]

NC_001416_orfs = findorfs(NC_001416, min_len=75)
@test length(NC_001416_orfs) == 885
end
2 changes: 1 addition & 1 deletion src/helpers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Converts a FASTA formatted file (even if it is a multi-fasta) to an array of `LongSequence{DNAAlphabet{4}}` objects.
"""
function fasta_to_dna(input::String)::Vector{LongSequence{DNAAlphabet{4}}{4}}
function fasta_to_dna(input::String)::Vector{LongSequence{DNAAlphabet{4}}}
FASTAReader(open(input)) do reader
return [LongSequence{DNAAlphabet{4}}(sequence(record)) for record in reader]
end
Expand Down
File renamed without changes.
Loading

0 comments on commit faa4217

Please sign in to comment.