From 16edd6029080ade3343fe2e88eb92a4668e25314 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Fri, 17 May 2024 21:58:20 -0500
Subject: [PATCH 01/40] First steps towards ORFs as GenomicIntervals

---
 Manifest.toml                 | 52 ++++++++++++++++++-
 Project.toml                  |  2 +
 src/GeneFinder.jl             |  1 +
 src/algorithms/naivefinder.jl | 21 +++++---
 src/extended.jl               | 18 +++++--
 src/findorfs.jl               |  1 +
 src/types.jl                  | 98 +++++++++++++++++++++++++++++------
 7 files changed, 165 insertions(+), 28 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index 030f7d6..65da618 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.10.3"
 manifest_format = "2.0"
-project_hash = "b881dd48d80d6b19dfa1756a0b2d6e2e42d93e33"
+project_hash = "514456eec9673906fe896a3ae36685951a71c19f"
 
 [[deps.Automa]]
 deps = ["PrecompileTools", "TranscodingStreams"]
@@ -10,6 +10,9 @@ git-tree-sha1 = "588e0d680ad1d7201d4c6a804dcb1cd9cba79fbb"
 uuid = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
 version = "1.0.3"
 
+[[deps.Base64]]
+uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+
 [[deps.BioGenerics]]
 deps = ["TranscodingStreams"]
 git-tree-sha1 = "7bbc085aebc6faa615740b63756e4986c9e85a70"
@@ -42,6 +45,25 @@ git-tree-sha1 = "e32a61f028b823a172c75e26865637249bb30dff"
 uuid = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
 version = "5.1.3"
 
+[[deps.Compat]]
+deps = ["TOML", "UUIDs"]
+git-tree-sha1 = "b1c55339b7c6c350ee89f2c1604299660525b248"
+uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
+version = "4.15.0"
+
+    [deps.Compat.extensions]
+    CompatLinearAlgebraExt = "LinearAlgebra"
+
+    [deps.Compat.weakdeps]
+    Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+    LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+
+[[deps.DataStructures]]
+deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
+git-tree-sha1 = "1d0a14036acb104d9e89698bd408f63ab58cdc82"
+uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
+version = "0.18.20"
+
 [[deps.Dates]]
 deps = ["Printf"]
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
@@ -56,11 +78,35 @@ weakdeps = ["BioSequences"]
     [deps.FASTX.extensions]
     BioSequencesExt = "BioSequences"
 
+[[deps.GenomicFeatures]]
+deps = ["BioGenerics", "DataStructures", "IntervalTrees"]
+git-tree-sha1 = "720844f71f118dd2ab4898a89b2b4feca7730d81"
+uuid = "899a7d2d-5c61-547b-bef9-6698a8d05446"
+version = "3.0.0"
+
+[[deps.InteractiveUtils]]
+deps = ["Markdown"]
+uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+
+[[deps.IntervalTrees]]
+git-tree-sha1 = "dc3b97bb5c9cb7c437f74027309f2c2f09a82aaf"
+uuid = "524e6230-43b7-53ae-be76-1e9e4d08d11b"
+version = "1.1.0"
+
 [[deps.IterTools]]
 git-tree-sha1 = "42d5f897009e7ff2cf88db414a389e5ed1bdd023"
 uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 version = "1.10.0"
 
+[[deps.Markdown]]
+deps = ["Base64"]
+uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
+
+[[deps.OrderedCollections]]
+git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5"
+uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+version = "1.6.3"
+
 [[deps.PrecompileTools]]
 deps = ["Preferences"]
 git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f"
@@ -117,6 +163,10 @@ git-tree-sha1 = "29509c4862bfb5da9e76eb6937125ab93986270a"
 uuid = "7200193e-83a8-5a55-b20d-5d36d44a0795"
 version = "1.1.2"
 
+[[deps.UUIDs]]
+deps = ["Random", "SHA"]
+uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
+
 [[deps.Unicode]]
 uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
diff --git a/Project.toml b/Project.toml
index fecb45f..cae9ace 100644
--- a/Project.toml
+++ b/Project.toml
@@ -7,6 +7,7 @@ version = "0.4.0"
 BioMarkovChains = "f861b655-cb5f-42ce-b66a-341b542d4f2c"
 BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
 FASTX = "c2308a5c-f048-11e8-3e8a-31650f418d12"
+GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 
@@ -14,6 +15,7 @@ PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 BioMarkovChains = "0.9"
 BioSequences = "3"
 FASTX = "2"
+GenomicFeatures = "3"
 IterTools = "1.4"
 PrecompileTools = "1"
 julia = "1"
diff --git a/src/GeneFinder.jl b/src/GeneFinder.jl
index 7a8c771..b52e45d 100644
--- a/src/GeneFinder.jl
+++ b/src/GeneFinder.jl
@@ -25,6 +25,7 @@ using BioMarkovChains: BioMarkovChain, dnaseqprobability, ECOLICDS, ECOLINOCDS,
 using FASTX: FASTAReader, sequence
 using IterTools: takewhile, iterated
 using PrecompileTools: @setup_workload, @compile_workload
+using GenomicFeatures: GenomicFeatures, AbstractGenomicInterval, GenomicInterval, Strand, summary, groupname, leftposition, rightposition, strand, metadata, STRAND_POS
 
 include("algorithms/naivefinder.jl")
 include("types.jl")
diff --git a/src/algorithms/naivefinder.jl b/src/algorithms/naivefinder.jl
index c3dd114..94f7af1 100644
--- a/src/algorithms/naivefinder.jl
+++ b/src/algorithms/naivefinder.jl
@@ -1,4 +1,6 @@
-export naivefinder, naivefinderscored, log_odds_ratio_decision_rule, lordr
+export naivefinder, naivefinderscored, log_odds_ratio_decision_rule, lordr, lors
+
+const lors = log_odds_ratio_score
 
 """
     locationiterator(sequence::NucleicSeqOrView{DNAAlphabet{N}}; alternative_start::Bool=false) where {N}
@@ -48,11 +50,13 @@ function naivefinder(
     sequence::NucleicSeqOrView{DNAAlphabet{N}};
     alternative_start::Bool = false,
     min_len::Int64 = 6,
+    scheme::Union{Nothing, Function} = nothing,
     kwargs...
 ) where {N}
     seqlen = length(sequence)
     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
     orfs = Vector{ORF}()
+    id = 0
     for strand in ('+', '-')
         seq = strand == '-' ? reverse_complement(sequence) : sequence
 
@@ -61,7 +65,8 @@ function naivefinder(
                 frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
                 start = strand == '+' ? location.start : seqlen - location.stop + 1
                 stop = start + length(location) - 1
-                push!(orfs, ORF(start:stop, strand, frame, 0.0))
+                id += 1
+                push!(orfs, ORF("$(id)", start:stop, strand, frame, NaiveFinder(), scheme))
             end
         end
     end
@@ -91,12 +96,13 @@ function naivefinderscored(
     sequence::NucleicSeqOrView{DNAAlphabet{N}};
     alternative_start::Bool = false,
     min_len::Int64 = 6,
-    scoringscheme::BioMarkovChain = ECOLICDS, # ECOLINOCDS
+    model::BioMarkovChain = ECOLICDS,
     kwargs...
 ) where {N}
     seqlen = length(sequence)
     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
     orfs = Vector{ORF}()
+    id = 0
     for strand in ('+', '-')
         seq = strand == '-' ? reverse_complement(sequence) : sequence
 
@@ -106,8 +112,9 @@ function naivefinderscored(
                 start = strand == '+' ? location.start : seqlen - location.stop + 1
                 stop = start + length(location) - 1
                 # score = -10log10(dnaseqprobability(seq[start:stop], scoringscheme))
-                score = log_odds_ratio_score(seq[start:stop], scoringscheme)
-                push!(orfs, ORF(start:stop, strand, frame, score))
+                score = log_odds_ratio_score(seq[start:stop], model)
+                id += 1
+                push!(orfs, ORF("$(id)", start:stop, strand, frame, NaiveFinderScored(), lors, score))
             end
         end
     end
@@ -154,7 +161,7 @@ sequence = dna"ATGGCATCTAG"
 iscoding(sequence)  # Returns: true or false
 ```
 """
-function log_odds_ratio_decision_rule( #log_odds_ratio_decision lordr/cudr/kfdr/aadr
+function lordr( #log_odds_ratio_decision, also lordr/cudr/kfdr/aadr
     sequence::NucleicSeqOrView{DNAAlphabet{N}};
     codingmodel::BioMarkovChain = ECOLICDS,
     noncodingmodel::BioMarkovChain = ECOLINOCDS,
@@ -176,7 +183,7 @@ function log_odds_ratio_decision_rule( #log_odds_ratio_decision lordr/cudr/kfdr/
     end
 end
 
-const lordr = log_odds_ratio_decision_rule # criteria
+const log_odds_ratio_decision_rule = lordr # criteria
 
 ## Another alternative:
 
diff --git a/src/extended.jl b/src/extended.jl
index d622866..4d08a56 100644
--- a/src/extended.jl
+++ b/src/extended.jl
@@ -2,7 +2,9 @@
 import Base: isless, iterate, sort, getindex
 
 
-Base.isless(a::ORF, b::ORF) = isless(a.location, b.location)
+# Base.isless(a::ORF, b::ORF) = isless(a.location, b.location)
+Base.isless(a::ORF, b::ORF) = isless(a.first:a.last, b.first:b.last)
+# Base.isless(a::ORF, b::ORF) = isless(a.location, b.location)
 # Base.isless(a::ORF, b::ORF) = isless(a.score, b.score)
 
 # Base.sort(v::Vector{<:ORF}; kwargs...) = sort(v, by = _orf_sort_key)
@@ -12,11 +14,19 @@ Base.isless(a::ORF, b::ORF) = isless(a.location, b.location)
 #TODOs: how to make more robust the getindex method? confroning the frames?
 # Base.getindex(sequence::NucleicSeqOrView{A}, orf::ORF) where {A} = orf.strand == '+' ? (@view sequence[orf.location]) : reverse_complement(@view sequence[orf.location])
 
+# function getindex(sequence::NucleicSeqOrView{A}, orf::ORF) where {A}
+#     if orf.strand == '+'
+#         return @view sequence[orf.location]
+#     else
+#         return reverse_complement(@view sequence[orf.location])
+#     end
+# end
+
 function getindex(sequence::NucleicSeqOrView{A}, orf::ORF) where {A}
-    if orf.strand == '+'
-        return @view sequence[orf.location]
+    if orf.strand == '+' || orf.strand == STRAND_POS
+        return @view sequence[orf.first:orf.last]
     else
-        return reverse_complement(@view sequence[orf.location])
+        return reverse_complement(@view sequence[orf.first:orf.last])
     end
 end
 
diff --git a/src/findorfs.jl b/src/findorfs.jl
index 34a6a38..76d8f38 100644
--- a/src/findorfs.jl
+++ b/src/findorfs.jl
@@ -54,6 +54,7 @@ function findorfs(
 end
 
 
+
 ### Some possible ideas:
 
 # function findorfs(
diff --git a/src/types.jl b/src/types.jl
index 1f4e5d7..73d6613 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -50,23 +50,23 @@ The ORF struct represents an open reading frame in a DNA sequence. It has two fi
 - `frame`: is an Int type indicating the reading frame of the ORF. The frame is the position of the first nucleotide of the codon that starts the ORF, relative to the start of the sequence. It can be 1, 2, or 3.
 - `score`: is a Union{Nothing, Float64} type indicating the score of the ORF. It can be a Float64 or nothing.
 """
-struct ORF <: AbstractGene # could also be mutable so that scores can be updated later, but tests fails
-    #TODOs:  location might be a complex Union allowing UnitRange or a Join of ranges (e.g. 1..100, 200..300)?
-    location::UnitRange{Int64}  # Note that it is also called position for gene struct in GenomicAnotations
-    strand::Char
-    frame::Int # Use Int64 instead of Int
-    score::Union{Nothing, Float64} # Add score field shold be a namedtuple with the score and the method used to score
-    # rbs::Union{Nothing, Vector{UnitRange{Int64}}} # Add RBS field  see https://github.com/deprekate/PHANOTATE/blob/c77e80caef8dc3264f7dc698b087bcd486216bcb/phanotate_modules/functions.py#L48
-
-    function ORF(location::UnitRange{Int64}, strand::Char, frame::Int, score::Union{Nothing, Float64} = nothing)
-        @assert frame in (1, 2, 3) "Invalid frame value. Frame must be 1, 2, or 3."
-        @assert strand in ('+', '-') "Invalid strand value. Strand must be '+' or '-'."
-        @assert location[1] < location[end] "Invalid location. Start must be less than stop."
-        @assert score isa Union{Nothing, Float64} "Invalid score value. Score must be a Float64 or nothing."
+# struct ORF <: AbstractGene # could also be mutable so that scores can be updated later, but tests fails
+#     #TODOs:  location might be a complex Union allowing UnitRange or a Join of ranges (e.g. 1..100, 200..300)?
+#     location::UnitRange{Int64}  # Note that it is also called position for gene struct in GenomicAnotations
+#     strand::Char
+#     frame::Int # Use Int64 instead of Int
+#     score::Union{Nothing, Float64} # Add score field shold be a namedtuple with the score and the method used to score
+#     # rbs::Union{Nothing, Vector{UnitRange{Int64}}} # Add RBS field  see https://github.com/deprekate/PHANOTATE/blob/c77e80caef8dc3264f7dc698b087bcd486216bcb/phanotate_modules/functions.py#L48
+
+#     function ORF(location::UnitRange{Int64}, strand::Char, frame::Int, score::Union{Nothing, Float64} = nothing)
+#         @assert frame in (1, 2, 3) "Invalid frame value. Frame must be 1, 2, or 3."
+#         @assert strand in ('+', '-') "Invalid strand value. Strand must be '+' or '-'."
+#         @assert location[1] < location[end] "Invalid location. Start must be less than stop."
+#         @assert score isa Union{Nothing, Float64} "Invalid score value. Score must be a Float64 or nothing."
     
-        return new(location, strand, frame, score)
-    end
-end
+#         return new(location, strand, frame, score)
+#     end
+# end
 
 ## Interfaces for gene finders
 
@@ -80,3 +80,69 @@ struct NaiveFinderScored <: GeneFinderMethod end
 abstract type GeneScoringScheme end
 struct NaiveScoringScheme <: GeneScoringScheme end
 
+
+export ORF
+
+struct ORF{F} <: GenomicFeatures.AbstractGenomicInterval{F}
+    groupname::String
+    first::Int64
+    last::Int64
+    strand::Strand
+    frame::Int
+    finder::F
+    scheme::Union{Nothing, Function}
+    score::Union{Nothing, Float64}
+    rbs::Union{Nothing, Vector{UnitRange{Int64}}} # Add RBS field 
+end
+
+function ORF(
+    groupname::String,
+    first::Int64,
+    last::Int64,
+    strand::Union{Strand,Char},
+    frame::Int,
+    finder::F,
+    scheme::Union{Nothing, Function}=nothing,
+    score::Union{Nothing, Float64}=nothing,
+    rbs::Union{Nothing, Vector{UnitRange{Int64}}}=nothing
+) where {F <: GeneFinderMethod}
+    return ORF{F}(groupname, first, last, strand, frame, finder, scheme, score, rbs)
+end
+
+function ORF(
+    id::String,
+    location::UnitRange{Int64},
+    strand::Union{Strand,Char},
+    frame::Int, finder::F,
+    scheme::Union{Nothing, Function}=nothing,
+    score::Union{Nothing, Float64}=nothing,
+    rbs::Union{Nothing, Vector{UnitRange{Int64}}}=nothing
+) where {F <: GeneFinderMethod}
+    return ORF{F}(id, first(location), last(location), strand, frame, finder, scheme, score, rbs)
+end
+
+function frame(i::ORF)
+    return i.frame
+end
+
+function scheme(i::ORF)
+    return i.scheme
+end
+
+function score(i::ORF)
+    return i.score
+end
+
+function finder(i::ORF)
+    return i.finder
+end
+
+function Base.show(io::IO, i::ORF)
+    if get(io, :compact, false)
+        print(io, groupname(i), ":", leftposition(i), "-", rightposition(i))
+    else
+        # print(io, "ORF{$(typeof(finder(i))),$(scheme(i))}($(groupname(i)), $(leftposition(i)):$(rightposition(i)), $(strand(i)), $(frame(i)), $(round(score(i), digits=5)))")
+        print(io, "ORF{$(typeof(finder(i)))}($(groupname(i)), $(leftposition(i)):$(rightposition(i)), $(strand(i)), $(frame(i)), $(score(i)))")
+        # println(io, " ", groupname(i), " ", leftposition(i), ":", rightposition(i), ", ", strand(i), ", ", frame(i), ", score: $(round(score(i), digits=2))")
+    end
+end
\ No newline at end of file

From d9cc396cf44bbab0a8c661f526eb5bd7c6ae313e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Fri, 17 May 2024 23:38:02 -0500
Subject: [PATCH 02/40] Some io fixes, and tests

---
 src/algorithms/naivefinder.jl |   4 +-
 src/io.jl                     | 164 +++++++++++++++++-----------------
 src/iscoding.jl               |   4 +-
 src/types.jl                  |   3 +-
 test/findorfstest.jl          |   6 +-
 5 files changed, 91 insertions(+), 90 deletions(-)

diff --git a/src/algorithms/naivefinder.jl b/src/algorithms/naivefinder.jl
index 94f7af1..d013e2b 100644
--- a/src/algorithms/naivefinder.jl
+++ b/src/algorithms/naivefinder.jl
@@ -66,7 +66,7 @@ function naivefinder(
                 start = strand == '+' ? location.start : seqlen - location.stop + 1
                 stop = start + length(location) - 1
                 id += 1
-                push!(orfs, ORF("$(id)", start:stop, strand, frame, NaiveFinder(), scheme))
+                push!(orfs, ORF("ORF$(id)", start:stop, strand, frame, NaiveFinder(), scheme))
             end
         end
     end
@@ -114,7 +114,7 @@ function naivefinderscored(
                 # score = -10log10(dnaseqprobability(seq[start:stop], scoringscheme))
                 score = log_odds_ratio_score(seq[start:stop], model)
                 id += 1
-                push!(orfs, ORF("$(id)", start:stop, strand, frame, NaiveFinderScored(), lors, score))
+                push!(orfs, ORF("ORF$(id)", start:stop, strand, frame, NaiveFinderScored(), lors, score))
             end
         end
     end
diff --git a/src/io.jl b/src/io.jl
index 131a710..9e0a88f 100644
--- a/src/io.jl
+++ b/src/io.jl
@@ -18,7 +18,7 @@ Write BED data to a file.
 - `min_len::Int64=6`: The minimum length that a CDS must have in order to be included in the output file. Default is `6`.
 """
 function write_orfs_bed(
-    input::NucleicSeqOrView{DNAAlphabet{N}},
+    input::Union{String,NucleicSeqOrView{DNAAlphabet{N}}},
     output::Union{IOStream, IOBuffer},
     method::M;
     alternative_start::Bool = false,
@@ -26,12 +26,12 @@ function write_orfs_bed(
 ) where {N , M<:GeneFinderMethod}
     orfs = findorfs(input, method; alternative_start, min_len)
     @inbounds for orf in orfs
-        println(output, orf.location.start, "\t", orf.location.stop, "\t", orf.strand, "\t", orf.frame)
+        println(output, orf.first, "\t", orf.last, "\t", orf.strand, "\t", orf.frame)
     end
 end
 
 function write_orfs_bed(
-    input::NucleicSeqOrView{DNAAlphabet{N}},
+    input::Union{String,NucleicSeqOrView{DNAAlphabet{N}}},
     output::String,
     method::M;
     alternative_start::Bool = false,
@@ -40,40 +40,40 @@ function write_orfs_bed(
     orfs = findorfs(input, method; alternative_start, min_len)
     open(output, "w") do f
         @inbounds for orf in orfs
-            write(f, "$(orf.location.start)\t$(orf.location.stop)\t$(orf.strand)\t$(orf.frame)\n")
+            write(f, "$(orf.first)\t$(orf.last)\t$(orf.strand)\t$(orf.frame)\n")
         end
     end
 end
 
-function write_orfs_bed(
-    input::String,
-    output::Union{IOStream, IOBuffer},
-    method::M;
-    alternative_start::Bool = false,
-    min_len::Int64 = 6
-) where {M<:GeneFinderMethod}
-    input = fasta_to_dna(input)[1] # rewrite input to be a DNA sequence
-    orfs = findorfs(input, method; alternative_start, min_len)
-    @inbounds for orf in orfs
-        println(output, orf.location.start, "\t", orf.location.stop, "\t", orf.strand, "\t", orf.frame)
-    end
-end
-
-function write_orfs_bed(
-    input::String,
-    output::String,
-    method::M;
-    alternative_start::Bool = false,
-    min_len::Int64 = 6
-) where {M<:GeneFinderMethod}
-    input = fasta_to_dna(input)[1]
-    orfs = findorfs(input, method; alternative_start, min_len)
-    open(output, "w") do f
-        @inbounds for orf in orfs
-            write(f, "$(orf.location.start)\t$(orf.location.stop)\t$(orf.strand)\t$(orf.frame)\n")
-        end
-    end
-end
+# function write_orfs_bed(
+#     input::String,
+#     output::Union{IOStream, IOBuffer},
+#     method::M;
+#     alternative_start::Bool = false,
+#     min_len::Int64 = 6
+# ) where {M<:GeneFinderMethod}
+#     input = fasta_to_dna(input)[1] # rewrite input to be a DNA sequence
+#     orfs = findorfs(input, method; alternative_start, min_len)
+#     @inbounds for orf in orfs
+#         println(output, orf.first, "\t", orf.last, "\t", orf.strand, "\t", orf.frame)
+#     end
+# end
+
+# function write_orfs_bed(
+#     input::String,
+#     output::String,
+#     method::M;
+#     alternative_start::Bool = false,
+#     min_len::Int64 = 6
+# ) where {M<:GeneFinderMethod}
+#     input = fasta_to_dna(input)[1]
+#     orfs = findorfs(input, method; alternative_start, min_len)
+#     open(output, "w") do f
+#         @inbounds for orf in orfs
+#             write(f, "$(orf.first)\t$(orf.last)\t$(orf.strand)\t$(orf.frame)\n")
+#         end
+#     end
+# end
 
 """
     write_orfs_fna(input::NucleicSeqOrView{DNAAlphabet{N}}, output::Union{IOStream, IOBuffer}, method::M; kwargs...) where {N, M<:GeneFinderMethod}
@@ -102,12 +102,12 @@ filename = "output.fna"
 seq = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
 
 open(filename, "w") do file
-     write_orfs_fna(seq, file)
+     write_orfs_fna(seq, file, NaiveFinder())
 end
 ```
 """
 function write_orfs_fna(
-    input::NucleicSeqOrView{DNAAlphabet{N}},
+    input::Union{String, NucleicSeqOrView{DNAAlphabet{N}}},
     output::Union{IOStream, IOBuffer},
     method::M;
     alternative_start::Bool = false, 
@@ -118,13 +118,13 @@ function write_orfs_fna(
     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
     @inbounds for (i, orf) in enumerate(orfs)
         id = string(lpad(string(i), padding, "0"))
-        println(output, ">ORF", id, " id=", id, " start=", orf.location.start, " stop=", orf.location.stop, " strand=", orf.strand, " frame=", orf.frame)
+        println(output, ">", orf.groupname, " id=", id, " start=", orf.first, " stop=", orf.last, " strand=", orf.strand, " frame=", orf.frame, " score=", orf.score)
         println(output, input[orf])
     end
 end
 
 function write_orfs_fna(
-    input::NucleicSeqOrView{DNAAlphabet{N}}, 
+    input::Union{String,NucleicSeqOrView{DNAAlphabet{N}}}, 
     output::String,
     method::M;
     alternative_start::Bool = false,
@@ -136,47 +136,47 @@ function write_orfs_fna(
     open(output, "w") do f
         for (i, orf) in enumerate(orfs)
             id = string(lpad(string(i), padding, "0"))
-            write(f, ">ORF$(id) id=$(id) start=$(orf.location.start) stop=$(orf.location.stop) strand=$(orf.strand) frame=$(orf.frame)\n$(input[orf])\n") # i.strand == '+' ? input[i.location] : reverse_complement(@view input[i.location])
+            write(f, ">$(orf.groupname) id=$(id) start=$(orf.first) stop=$(orf.last) strand=$(orf.strand) frame=$(orf.frame) score=$(orf.score)\n$(input[orf])\n") # i.strand == '+' ? input[i.location] : reverse_complement(@view input[i.location])
         end
     end
 end
 
-function write_orfs_fna(
-    input::String,
-    output::Union{IOStream, IOBuffer},
-    method::M;
-    alternative_start::Bool = false, 
-    min_len::Int64 = 6
-) where {M<:GeneFinderMethod}
-    input = fasta_to_dna(input)[1]
-    orfs = findorfs(input, method; alternative_start, min_len)
-    norfs = length(orfs)
-    padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-    @inbounds for (i, orf) in enumerate(orfs)
-        id = string(lpad(string(i), padding, "0"))
-        println(output, ">ORF", id, " id=", id, " start=", orf.location.start, " stop=", orf.location.stop, " strand=", orf.strand, " frame=", orf.frame)
-        println(output, input[orf])
-    end
-end
-
-function write_orfs_fna(
-    input::String, 
-    output::String,
-    method::M;
-    alternative_start::Bool = false,
-    min_len::Int64 = 6
-) where {M<:GeneFinderMethod}
-    input = fasta_to_dna(input)[1]
-    orfs = findorfs(input, method; alternative_start, min_len)
-    norfs = length(orfs)
-    padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-    open(output, "w") do f
-        for (i, orf) in enumerate(orfs)
-            id = string(lpad(string(i), padding, "0"))
-            write(f, ">ORF$(id) id=$(id) start=$(orf.location.start) stop=$(orf.location.stop) strand=$(orf.strand) frame=$(orf.frame)\n$(input[orf])\n") # i.strand == '+' ? input[i.location] : reverse_complement(@view input[i.location])
-        end
-    end
-end
+# function write_orfs_fna(
+#     input::String,
+#     output::Union{IOStream, IOBuffer},
+#     method::M;
+#     alternative_start::Bool = false, 
+#     min_len::Int64 = 6
+# ) where {M<:GeneFinderMethod}
+#     input = fasta_to_dna(input)[1]
+#     orfs = findorfs(input, method; alternative_start, min_len)
+#     norfs = length(orfs)
+#     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
+#     @inbounds for (i, orf) in enumerate(orfs)
+#         id = string(lpad(string(i), padding, "0"))
+#         println(output, ">ORF", id, " id=", id, " start=", orf.first, " stop=", orf.last, " strand=", orf.strand, " frame=", orf.frame)
+#         println(output, input[orf])
+#     end
+# end
+
+# function write_orfs_fna(
+#     input::String, 
+#     output::String,
+#     method::M;
+#     alternative_start::Bool = false,
+#     min_len::Int64 = 6
+# ) where {M<:GeneFinderMethod}
+#     input = fasta_to_dna(input)[1]
+#     orfs = findorfs(input, method; alternative_start, min_len)
+#     norfs = length(orfs)
+#     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
+#     open(output, "w") do f
+#         for (i, orf) in enumerate(orfs)
+#             id = string(lpad(string(i), padding, "0"))
+#             write(f, ">ORF$(id) id=$(id) start=$(orf.first) stop=$(orf.last) strand=$(orf.strand) frame=$(orf.frame)\n$(input[orf])\n") # i.strand == '+' ? input[i.location] : reverse_complement(@view input[i.location])
+#         end
+#     end
+# end
 
 """
 	write_orfs_faa(input::NucleicSeqOrView{DNAAlphabet{4}}, output::Union{IOStream, IOBuffer}, method::M; kwargs...) where {N, M<:GeneFinderMethod}
@@ -222,7 +222,7 @@ function write_orfs_faa(
     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
     @inbounds for (i, orf) in enumerate(orfs)
         id = string(lpad(string(i), padding, "0"))
-        println(output, ">ORF", id, " id=", id, " start=", orf.location.start, " stop=", orf.location.stop, " strand=", orf.strand, " frame=", orf.frame)
+        println(output, ">ORF", id, " id=", id, " start=", orf.first, " stop=", orf.last, " strand=", orf.strand, " frame=", orf.frame)
         println(output, translate(input[orf]; code))
     end
 end
@@ -241,7 +241,7 @@ function write_orfs_faa(
     open(output, "w") do f
         for (i, orf) in enumerate(orfs)
             id = string(lpad(string(i), padding, "0"))
-            write(f, ">ORF$(id) id=$(id) start=$(orf.location.start) stop=$(orf.location.stop) strand=$(orf.strand) frame=$(orf.frame)\n$(translate(input[orf]; code))\n")
+            write(f, ">ORF$(id) id=$(id) start=$(orf.first) stop=$(orf.last) strand=$(orf.strand) frame=$(orf.frame)\n$(translate(input[orf]; code))\n")
         end
     end
 end
@@ -260,7 +260,7 @@ function write_orfs_faa(
     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
     @inbounds for (i, orf) in enumerate(orfs)
         id = string(lpad(string(i), padding, "0"))
-        println(output, ">ORF", id, " id=", id, " start=", orf.location.start, " stop=", orf.location.stop, " strand=", orf.strand, " frame=", orf.frame)
+        println(output, ">ORF", id, " id=", id, " start=", orf.first, " stop=", orf.last, " strand=", orf.strand, " frame=", orf.frame)
         println(output, translate(input[orf]; code))
     end
 end
@@ -280,7 +280,7 @@ function write_orfs_faa(
     open(output, "w") do f
         for (i, orf) in enumerate(orfs)
             id = string(lpad(string(i), padding, "0"))
-            write(f, ">ORF$(id) id=$(id) start=$(orf.location.start) stop=$(orf.location.stop) strand=$(orf.strand) frame=$(orf.frame)\n$(translate(input[orf]; code))\n")
+            write(f, ">ORF$(id) id=$(id) start=$(orf.first) stop=$(orf.last) strand=$(orf.strand) frame=$(orf.frame)\n$(translate(input[orf]; code))\n")
         end
     end
 end
@@ -318,7 +318,7 @@ function write_orfs_gff(
     println(output, "##gff-version 3\n##sequence-region Chr 1 $(length(input))")
     for (i, orf) in enumerate(orfs)
         id = string("ORF", lpad(string(i), padding, "0"))
-        println(output, "Chr\t.\tORF\t", orf.location.start, "\t", orf.location.stop, "\t.\t", orf.strand, "\t.\tID=", id, ";Name=", id, ";Frame=", orf.frame)
+        println(output, "Chr\t.\tORF\t", orf.first, "\t", orf.last, "\t.\t", orf.strand, "\t.\tID=", id, ";Name=", id, ";Frame=", orf.frame)
         end
 end
 
@@ -338,7 +338,7 @@ function write_orfs_gff(
             id = string("ORF", lpad(string(i), padding, "0"))
             write(
                 f,
-                "Chr\t.\tORF\t$(orf.location.start)\t$(orf.location.stop)\t.\t$(orf.strand)\t.\tID=$(id);Name=$(id);Frame=$(orf.frame)\n",
+                "Chr\t.\tORF\t$(orf.first)\t$(orf.last)\t.\t$(orf.strand)\t.\tID=$(id);Name=$(id);Frame=$(orf.frame)\n",
             )
         end
     end
@@ -358,7 +358,7 @@ function write_orfs_gff(
     println(output, "##gff-version 3\n##sequence-region Chr 1 $(length(input))")
     for (i, orf) in enumerate(orfs)
         id = string("ORF", lpad(string(i), padding, "0"))
-        println(output, "Chr\t.\tORF\t", orf.location.start, "\t", orf.location.stop, "\t.\t", orf.strand, "\t.\tID=", id, ";Name=", id, ";Frame=", orf.frame)
+        println(output, "Chr\t.\tORF\t", orf.first, "\t", orf.last, "\t.\t", orf.strand, "\t.\tID=", id, ";Name=", id, ";Frame=", orf.frame)
     end
 end
 
@@ -379,7 +379,7 @@ function write_orfs_gff(
             id = string("ORF", lpad(string(i), padding, "0"))
             write(
                 f,
-                "Chr\t.\tORF\t$(orf.location.start)\t$(orf.location.stop)\t.\t$(orf.strand)\t.\tID=$(id);Name=$(id);Frame=$(orf.frame)\n",
+                "Chr\t.\tORF\t$(orf.first)\t$(orf.last)\t.\t$(orf.strand)\t.\tID=$(id);Name=$(id);Frame=$(orf.frame)\n",
             )
         end
     end
diff --git a/src/iscoding.jl b/src/iscoding.jl
index 61f189c..a0d31c8 100644
--- a/src/iscoding.jl
+++ b/src/iscoding.jl
@@ -29,10 +29,10 @@ end
 
 function iscoding(
     sequence::NucleicSeqOrView{DNAAlphabet{N}},
-    orf::ORF;
+    orf::ORF{M};
     criteria::Function = lordr,
     kwargs...
-) where {N}
+) where {N,M <: GeneFinderMethod}
     return criteria(sequence[orf]; kwargs...)
 end
 
diff --git a/src/types.jl b/src/types.jl
index 73d6613..cbd8eaf 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -139,7 +139,8 @@ end
 
 function Base.show(io::IO, i::ORF)
     if get(io, :compact, false)
-        print(io, groupname(i), ":", leftposition(i), "-", rightposition(i))
+        # print(io, groupname(i), ":", leftposition(i), "-", rightposition(i))
+        print(io, "ORF{$(typeof(finder(i)))}($(groupname(i)), $(leftposition(i)):$(rightposition(i)), $(strand(i)), $(frame(i)), $(score(i)))")
     else
         # print(io, "ORF{$(typeof(finder(i))),$(scheme(i))}($(groupname(i)), $(leftposition(i)):$(rightposition(i)), $(strand(i)), $(frame(i)), $(round(score(i), digits=5)))")
         print(io, "ORF{$(typeof(finder(i)))}($(groupname(i)), $(leftposition(i)):$(rightposition(i)), $(strand(i)), $(frame(i)), $(score(i)))")
diff --git a/test/findorfstest.jl b/test/findorfstest.jl
index 3ce7b06..ae7f731 100644
--- a/test/findorfstest.jl
+++ b/test/findorfstest.jl
@@ -5,7 +5,7 @@
     seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
     orfs01 = findorfs(seq01, NaiveFinder())
 
-    @test orfs01 == [ORF(1:33, '+', 1, 0.0), ORF(4:33, '+', 1, 0.0), ORF(8:22, '+', 2, 0.0), ORF(12:29, '+', 3, 0.0), ORF(16:33, '+', 1, 0.0)]
+    @test orfs01 == [ORF{NaiveFinder}(ORF1, 1:33, +, 1, nothing), ORF{NaiveFinder}(ORF2, 4:33, +, 1, nothing), ORF{NaiveFinder}(ORF3, 8:22, +, 2, nothing), ORF{NaiveFinder}(ORF4, 12:29, +, 3, nothing), ORF{NaiveFinder}(ORF5, 16:33, +, 1, nothing)]
     @test length(orfs01) == 5
 
     # > 180195.SAMN03785337.LFLS01000089 -> finds only 1 gene in Prodigal (from Pyrodigal tests)
@@ -13,7 +13,7 @@
     orfs02 = findorfs(seq02, NaiveFinder())
 
     @test length(orfs02) == 12
-    @test orfs02 == [ORF(29:40, '+', 2, 0.0), ORF(137:145, '+', 2, 0.0), ORF(164:184, '+', 2, 0.0), ORF(173:184, '+', 2, 0.0), ORF(236:241, '+', 2, 0.0), ORF(248:268, '+', 2, 0.0), ORF(362:373, '+', 2, 0.0), ORF(470:496, '+', 2, 0.0), ORF(551:574, '+', 2, 0.0), ORF(569:574, '+', 2, 0.0), ORF(581:601, '+', 2, 0.0), ORF(695:706, '+', 2, 0.0)]
+    @test orfs02 == [ORF{NaiveFinder}(ORF1, 29:40, +, 2, nothing), ORF{NaiveFinder}(ORF2, 137:145, +, 2, nothing), ORF{NaiveFinder}(ORF3, 164:184, +, 2, nothing), ORF{NaiveFinder}(ORF4, 173:184, +, 2, nothing), ORF{NaiveFinder}(ORF5, 236:241, +, 2, nothing), ORF{NaiveFinder}(ORF6, 248:268, +, 2, nothing), ORF{NaiveFinder}(ORF7, 362:373, +, 2, nothing), ORF{NaiveFinder}(ORF8, 470:496, +, 2, nothing), ORF{NaiveFinder}(ORF9, 551:574, +, 2, nothing), ORF{NaiveFinder}(ORF10, 569:574, +, 2, nothing), ORF{NaiveFinder}(ORF11, 581:601, +, 2, nothing), ORF{NaiveFinder}(ORF12, 695:706, +, 2, nothing)]
 
     # From pyrodigal issue #13 link: https://github.com/althonos/pyrodigal/blob/1f939b0913b48dbaa55d574b20e124f1b8323825/pyrodigal/tests/test_orf_finder.py#L271
     # Pyrodigal predicts 2 genes from this sequence:
@@ -23,7 +23,7 @@
     seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"
     orfs03 = findorfs(seq03, NaiveFinder(), min_len=75)
     @test length(orfs03) == 9
-    @test orfs03 == [ORF(37:156, '+', 1, 0.0), ORF(194:268, '-', 2, 0.0), ORF(194:283, '-', 2, 0.0), ORF(249:347, '+', 3, 0.0), ORF(426:590, '+', 3, 0.0), ORF(565:657, '+', 1, 0.0), ORF(650:727, '-', 2, 0.0), ORF(786:872, '+', 3, 0.0), ORF(887:976, '-', 2, 0.0)]
+    @test orfs03 == [ORF{NaiveFinder}(ORF1, 37:156, +, 1, nothing), ORF{NaiveFinder}(ORF9, 194:268, -, 2, nothing), ORF{NaiveFinder}(ORF8, 194:283, -, 2, nothing), ORF{NaiveFinder}(ORF2, 249:347, +, 3, nothing), ORF{NaiveFinder}(ORF3, 426:590, +, 3, nothing), ORF{NaiveFinder}(ORF4, 565:657, +, 1, nothing), ORF{NaiveFinder}(ORF7, 650:727, -, 2, nothing), ORF{NaiveFinder}(ORF5, 786:872, +, 3, nothing), ORF{NaiveFinder}(ORF6, 887:976, -, 2, nothing)]
                                                                                                            #|->  This occured in Pyrodigal
     # Lambda phage tests
     # Compare to https://github.com/jonas-fuchs/viral_orf_finder/blob/master/orf_finder.py 

From 87b49c3931867965496f2f2f51bdedb78946e439 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Tue, 21 May 2024 17:46:00 -0500
Subject: [PATCH 03/40] Some attempts to correct tests

---
 src/algorithms/naivefinder.jl |   4 +-
 src/types.jl                  |   9 ++-
 src/utils.jl                  |  11 +++-
 test/Project.toml             |   2 +
 test/findorfstest.jl          | 102 ++++++++++++++++++----------------
 test/getindextest.jl          |   9 ++-
 test/iotest.jl                |  68 +++++++++++------------
 test/runtests.jl              |  26 ++++++++-
 8 files changed, 137 insertions(+), 94 deletions(-)

diff --git a/src/algorithms/naivefinder.jl b/src/algorithms/naivefinder.jl
index d013e2b..4351ed7 100644
--- a/src/algorithms/naivefinder.jl
+++ b/src/algorithms/naivefinder.jl
@@ -66,7 +66,7 @@ function naivefinder(
                 start = strand == '+' ? location.start : seqlen - location.stop + 1
                 stop = start + length(location) - 1
                 id += 1
-                push!(orfs, ORF("ORF$(id)", start:stop, strand, frame, NaiveFinder(), scheme))
+                push!(orfs, ORF(get_variable_name(sequence), start:stop, strand, frame, NaiveFinder(), scheme))
             end
         end
     end
@@ -114,7 +114,7 @@ function naivefinderscored(
                 # score = -10log10(dnaseqprobability(seq[start:stop], scoringscheme))
                 score = log_odds_ratio_score(seq[start:stop], model)
                 id += 1
-                push!(orfs, ORF("ORF$(id)", start:stop, strand, frame, NaiveFinderScored(), lors, score))
+                push!(orfs, ORF(get_variable_name(sequence), start:stop, strand, frame, NaiveFinderScored(), lors, score))
             end
         end
     end
diff --git a/src/types.jl b/src/types.jl
index cbd8eaf..68df0f9 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -1,6 +1,7 @@
 export ORF
 export NaiveFinder, NaiveFinderScored
 export NaiveScoringScheme
+export finder, frame, scheme, score
 # Structs associated with gene models 
 abstract type AbstractGene end
 
@@ -81,7 +82,7 @@ abstract type GeneScoringScheme end
 struct NaiveScoringScheme <: GeneScoringScheme end
 
 
-export ORF
+# export ORF
 
 struct ORF{F} <: GenomicFeatures.AbstractGenomicInterval{F}
     groupname::String
@@ -106,6 +107,7 @@ function ORF(
     score::Union{Nothing, Float64}=nothing,
     rbs::Union{Nothing, Vector{UnitRange{Int64}}}=nothing
 ) where {F <: GeneFinderMethod}
+    # @assert frame in (1, 2, 3) "Invalid frame value. Frame must be 1, 2, or 3."
     return ORF{F}(groupname, first, last, strand, frame, finder, scheme, score, rbs)
 end
 
@@ -118,6 +120,7 @@ function ORF(
     score::Union{Nothing, Float64}=nothing,
     rbs::Union{Nothing, Vector{UnitRange{Int64}}}=nothing
 ) where {F <: GeneFinderMethod}
+    # @assert frame in (1, 2, 3) "Invalid frame value. Frame must be 1, 2, or 3."
     return ORF{F}(id, first(location), last(location), strand, frame, finder, scheme, score, rbs)
 end
 
@@ -140,10 +143,10 @@ end
 function Base.show(io::IO, i::ORF)
     if get(io, :compact, false)
         # print(io, groupname(i), ":", leftposition(i), "-", rightposition(i))
-        print(io, "ORF{$(typeof(finder(i)))}($(groupname(i)), $(leftposition(i)):$(rightposition(i)), $(strand(i)), $(frame(i)), $(score(i)))")
+        print(io, "ORF{$(typeof(finder(i)))}($(leftposition(i)):$(rightposition(i)), '$(strand(i))', $(frame(i)), $(score(i)))")
     else
         # print(io, "ORF{$(typeof(finder(i))),$(scheme(i))}($(groupname(i)), $(leftposition(i)):$(rightposition(i)), $(strand(i)), $(frame(i)), $(round(score(i), digits=5)))")
-        print(io, "ORF{$(typeof(finder(i)))}($(groupname(i)), $(leftposition(i)):$(rightposition(i)), $(strand(i)), $(frame(i)), $(score(i)))")
+        print(io, "ORF{$(typeof(finder(i)))}($(leftposition(i)):$(rightposition(i)), '$(strand(i))', $(frame(i)), $(score(i)))")
         # println(io, " ", groupname(i), " ", leftposition(i), ":", rightposition(i), ", ", strand(i), ", ", frame(i), ", score: $(round(score(i), digits=2))")
     end
 end
\ No newline at end of file
diff --git a/src/utils.jl b/src/utils.jl
index 5214223..404bf9f 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,4 +1,4 @@
-export hasprematurestop, fasta_to_dna
+export hasprematurestop, fasta_to_dna, get_variable_name
 # General purposes methods supporting main functions
 
 """
@@ -35,4 +35,13 @@ function fasta_to_dna(input::AbstractString)::Vector{LongSequence{DNAAlphabet{4}
     FASTAReader(open(input)) do reader
         return [LongSequence{DNAAlphabet{4}}(sequence(record)) for record in reader]
     end
+end
+
+function get_variable_name(var)
+    for name in names(Main)
+        if getfield(Main, name) === var
+            return string(name)
+        end
+    end
+    return nothing
 end
\ No newline at end of file
diff --git a/test/Project.toml b/test/Project.toml
index 60aa073..baa2a68 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -3,3 +3,5 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
 FASTX = "c2308a5c-f048-11e8-3e8a-31650f418d12"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
+TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe"
diff --git a/test/findorfstest.jl b/test/findorfstest.jl
index ae7f731..7c9f1cf 100644
--- a/test/findorfstest.jl
+++ b/test/findorfstest.jl
@@ -1,57 +1,61 @@
-@testset "findorfs" begin
-    # cd(@__DIR__)
-
-    # A random seq to start
-    seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
-    orfs01 = findorfs(seq01, NaiveFinder())
-
-    @test orfs01 == [ORF{NaiveFinder}(ORF1, 1:33, +, 1, nothing), ORF{NaiveFinder}(ORF2, 4:33, +, 1, nothing), ORF{NaiveFinder}(ORF3, 8:22, +, 2, nothing), ORF{NaiveFinder}(ORF4, 12:29, +, 3, nothing), ORF{NaiveFinder}(ORF5, 16:33, +, 1, nothing)]
-    @test length(orfs01) == 5
-
-    # > 180195.SAMN03785337.LFLS01000089 -> finds only 1 gene in Prodigal (from Pyrodigal tests)
-    seq02 = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCAATCTGACTGTGGGCGGTGTTACCAACGGCACTGCTACTACTGGCAACATCGCACTGACCGGTAACAATGCGCTGAGCGGTCCGGTCAATCTGAATGCGTCGAATGGCACGGTGACCTTGAACACGACCGGCAATACCACGCTCGGTAACGTGACGGCACAAGGCAATGTGACGACCAATGTGTCCAACGGCAGTCTGACGGTTACCGGCAATACGACAGGTGCCAACACCAACCTCAGTGCCAGCGGCAACCTGACCGTGGGTAACCAGGGCAATATCAGTACCGCAGGCAATGCAACCCTGACGGCCGGCGACAACCTGACGAGCACTGGCAATCTGACTGTGGGCGGCGTCACCAACGGCACGGCCACCACCGGCAACATCGCGCTGACCGGTAACAATGCACTGGCTGGTCCTGTCAATCTGAACGCGCCGAACGGCACCGTGACCCTGAACACAACCGGCAATACCACGCTGGGTAATGTCACCGCACAAGGCAATGTGACGACTAATGTGTCCAACGGCAGCCTGACAGTCGCTGGCAATACCACAGGTGCCAACACCAACCTGAGTGCCAGCGGCAATCTGACCGTGGGCAACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAGC"
-    orfs02 = findorfs(seq02, NaiveFinder())
-
-    @test length(orfs02) == 12
-    @test orfs02 == [ORF{NaiveFinder}(ORF1, 29:40, +, 2, nothing), ORF{NaiveFinder}(ORF2, 137:145, +, 2, nothing), ORF{NaiveFinder}(ORF3, 164:184, +, 2, nothing), ORF{NaiveFinder}(ORF4, 173:184, +, 2, nothing), ORF{NaiveFinder}(ORF5, 236:241, +, 2, nothing), ORF{NaiveFinder}(ORF6, 248:268, +, 2, nothing), ORF{NaiveFinder}(ORF7, 362:373, +, 2, nothing), ORF{NaiveFinder}(ORF8, 470:496, +, 2, nothing), ORF{NaiveFinder}(ORF9, 551:574, +, 2, nothing), ORF{NaiveFinder}(ORF10, 569:574, +, 2, nothing), ORF{NaiveFinder}(ORF11, 581:601, +, 2, nothing), ORF{NaiveFinder}(ORF12, 695:706, +, 2, nothing)]
-
-    # From pyrodigal issue #13 link: https://github.com/althonos/pyrodigal/blob/1f939b0913b48dbaa55d574b20e124f1b8323825/pyrodigal/tests/test_orf_finder.py#L271
-    # Pyrodigal predicts 2 genes from this sequence:
-    # 1) An alternative start codon (GTG) sequence at 48:347
-    # 2) A common start codon sequence at 426:590
-    # On the other hand, the NCBI ORFfinder program predicts 9 ORFs whose length is greater than 75 nt, from which one has an "outbound" stop
-    seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"
-    orfs03 = findorfs(seq03, NaiveFinder(), min_len=75)
-    @test length(orfs03) == 9
-    @test orfs03 == [ORF{NaiveFinder}(ORF1, 37:156, +, 1, nothing), ORF{NaiveFinder}(ORF9, 194:268, -, 2, nothing), ORF{NaiveFinder}(ORF8, 194:283, -, 2, nothing), ORF{NaiveFinder}(ORF2, 249:347, +, 3, nothing), ORF{NaiveFinder}(ORF3, 426:590, +, 3, nothing), ORF{NaiveFinder}(ORF4, 565:657, +, 1, nothing), ORF{NaiveFinder}(ORF7, 650:727, -, 2, nothing), ORF{NaiveFinder}(ORF5, 786:872, +, 3, nothing), ORF{NaiveFinder}(ORF6, 887:976, -, 2, nothing)]
-                                                                                                           #|->  This occured in Pyrodigal
-    # Lambda phage tests
-    # Compare to https://github.com/jonas-fuchs/viral_orf_finder/blob/master/orf_finder.py 
-    # Salisbury and Tsorukas (2019) paper used the Lambda phage genome with 73 CDS and 545 non-CDS ORFs (a total of 618) to compare predictions between several Gene Finder programs
-    # For a minimal length of 75 nt the following ORFs are predicted: 
-    # orf_finder.py --> 885 (222 complete)
-    # findorfs (GeneFinder.jl) --> 885
-    # NCBI ORFfinder --> 375 ORFs
-    # orfipy --> 375 (`orfipy NC_001416.1.fasta --start ATG --include-stop --min 75`)
-    NC_001416 = fasta_to_dna("data/NC_001416.1.fasta")[1]
-    NC_001416_orfs = findorfs(NC_001416, NaiveFinder(), min_len=75)
-    @test length(NC_001416_orfs) == 885
-end
-
-@testset "getorfs" begin
+# @testset "findorfs" begin
+#     # cd(@__DIR__)
+
+#     # A random seq to start
+#     seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
+#     orfs01 = findorfs(seq01, NaiveFinder())
+
+#     @test orfs01 == [ORF{NaiveFinder}(1:33, '+', 1, nothing), ORF{NaiveFinder}(4:33, '+', 1, nothing), ORF{NaiveFinder}(8:22, '+', 2, nothing), ORF{NaiveFinder}(12:29, '+', 3, nothing), ORF{NaiveFinder}(16:33, '+', 1, nothing)]
+#     @test length(orfs01) == 5
+
+#     # > 180195.SAMN03785337.LFLS01000089 -> finds only 1 gene in Prodigal (from Pyrodigal tests)
+#     seq02 = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCAATCTGACTGTGGGCGGTGTTACCAACGGCACTGCTACTACTGGCAACATCGCACTGACCGGTAACAATGCGCTGAGCGGTCCGGTCAATCTGAATGCGTCGAATGGCACGGTGACCTTGAACACGACCGGCAATACCACGCTCGGTAACGTGACGGCACAAGGCAATGTGACGACCAATGTGTCCAACGGCAGTCTGACGGTTACCGGCAATACGACAGGTGCCAACACCAACCTCAGTGCCAGCGGCAACCTGACCGTGGGTAACCAGGGCAATATCAGTACCGCAGGCAATGCAACCCTGACGGCCGGCGACAACCTGACGAGCACTGGCAATCTGACTGTGGGCGGCGTCACCAACGGCACGGCCACCACCGGCAACATCGCGCTGACCGGTAACAATGCACTGGCTGGTCCTGTCAATCTGAACGCGCCGAACGGCACCGTGACCCTGAACACAACCGGCAATACCACGCTGGGTAATGTCACCGCACAAGGCAATGTGACGACTAATGTGTCCAACGGCAGCCTGACAGTCGCTGGCAATACCACAGGTGCCAACACCAACCTGAGTGCCAGCGGCAATCTGACCGTGGGCAACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAGC"
+#     orfs02 = findorfs(seq02, NaiveFinder())
+
+#     @test length(orfs02) == 12
+#     @test orfs02 == [ORF{NaiveFinder}(29:40, '+', 2, nothing), ORF{NaiveFinder}(137:145, '+', 2, nothing), ORF{NaiveFinder}(164:184, '+', 2, nothing), ORF{NaiveFinder}(173:184, '+', 2, nothing), ORF{NaiveFinder}(236:241, '+', 2, nothing), ORF{NaiveFinder}(248:268, '+', 2, nothing), ORF{NaiveFinder}(362:373, '+', 2, nothing), ORF{NaiveFinder}(470:496, '+', 2, nothing), ORF{NaiveFinder}(551:574, '+', 2, nothing), ORF{NaiveFinder}(569:574, '+', 2, nothing), ORF{NaiveFinder}(581:601, '+', 2, nothing), ORF{NaiveFinder}(695:706, '+', 2, nothing)]
+
+#     # From pyrodigal issue #13 link: https://github.com/althonos/pyrodigal/blob/1f939b0913b48dbaa55d574b20e124f1b8323825/pyrodigal/tests/test_orf_finder.py#L271
+#     # Pyrodigal predicts 2 genes from this sequence:
+#     # 1) An alternative start codon (GTG) sequence at 48:347
+#     # 2) A common start codon sequence at 426:590
+#     # On the other hand, the NCBI ORFfinder program predicts 9 ORFs whose length is greater than 75 nt, from which one has an "outbound" stop
+#     seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"
+#     orfs03 = findorfs(seq03, NaiveFinder(), min_len=75)
+#     @test length(orfs03) == 9
+#     @test orfs03 == [ORF{NaiveFinder}(37:156, '+', 1, nothing), ORF{NaiveFinder}(194:268, '-', 2, nothing), ORF{NaiveFinder}(194:283, '-', 2, nothing), ORF{NaiveFinder}(249:347, '+', 3, nothing), ORF{NaiveFinder}(426:590, '+', 3, nothing), ORF{NaiveFinder}(565:657, '+', 1, nothing), ORF{NaiveFinder}(650:727, '-', 2, nothing), ORF{NaiveFinder}(786:872, '+', 3, nothing), ORF{NaiveFinder}(887:976, '-', 2, nothing)]
+#                                                                                                            #|->  This occured in Pyrodigal
+#     # Lambda phage tests
+#     # Compare to https://github.com/jonas-fuchs/viral_orf_finder/blob/master/orf_finder.py 
+#     # Salisbury and Tsorukas (2019) paper used the Lambda phage genome with 73 CDS and 545 non-CDS ORFs (a total of 618) to compare predictions between several Gene Finder programs
+#     # For a minimal length of 75 nt the following ORFs are predicted: 
+#     # orf_finder.py --> 885 (222 complete)
+#     # findorfs (GeneFinder.jl) --> 885
+#     # NCBI ORFfinder --> 375 ORFs
+#     # orfipy --> 375 (`orfipy NC_001416.1.fasta --start ATG --include-stop --min 75`)
+#     NC_001416 = fasta_to_dna("data/NC_001416.1.fasta")[1]
+#     NC_001416_orfs = findorfs(NC_001416, NaiveFinder(), min_len=75)
+#     @test length(NC_001416_orfs) == 885
+# end
+@testitem "getorfs dna" default_imports=false begin
+
+    using BioSequences: @dna_str, DNAAlphabet
+    using GeneFinder: NaiveFinder, getorfs, findorfs, ORF
+    using Test: @test
 
     seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
     orfseqs = getorfs(seq01, DNAAlphabet{4}(), NaiveFinder())
 
-    @test length(orfseqs) == 5
-    @test orfseqs[1] == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
+    # @test length(orfseqs) == 5
+    # @test orfseqs[1] == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
 end
 
-@testset "getorfs" begin
+# @testitem "getorfs proteins" begin
+#     using BioSequences, GeneFinder, Test
 
-    seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
-    aas = getorfs(seq01, AminoAcidAlphabet(), NaiveFinder())
+#     seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
+#     aas = getorfs(seq01, AminoAcidAlphabet(), NaiveFinder())
 
-    @test length(aas) == 5
-    @test aas[1] == aa"MMHACMLVTS*"
-end
\ No newline at end of file
+#     @test length(aas) == 5
+#     @test aas[1] == aa"MMHACMLVTS*"
+# end
\ No newline at end of file
diff --git a/test/getindextest.jl b/test/getindextest.jl
index e961b89..3ab5f71 100644
--- a/test/getindextest.jl
+++ b/test/getindextest.jl
@@ -1,10 +1,15 @@
 # Import the function to be tested
 
 # Run the tests
-@testset "getindex" begin
+@testitem "getindex" default_imports=false begin
+    
+    using BioSequences: @dna_str
+    using GeneFinder: ORF, NaiveFinder
+    using Test: @test
+
     seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
     
-    orfs01 = [ORF(1:33, '+', 1, 0.0), ORF(4:33, '+', 1, 0.0), ORF(8:22, '+', 2, 0.0), ORF(12:29, '+', 3, 0.0), ORF(16:33, '+', 1, 0.0)]
+    orfs01 = [ORF("seq01", 1:33, '+', 1, NaiveFinder()), ORF("seq01", 4:33, '+', 1, NaiveFinder()), ORF("seq01", 8:22, '+', 2, NaiveFinder()), ORF("seq01", 12:29, '+', 3, NaiveFinder()), ORF("seq01", 16:33, '+', 1, NaiveFinder())]
 
     @test getindex(seq01, orfs01[1]) == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
     @test getindex(seq01, orfs01[2]) == dna"ATGCATGCATGCATGCTAGTAACTAGCTAG"
diff --git a/test/iotest.jl b/test/iotest.jl
index 598c5ed..f7ed3da 100644
--- a/test/iotest.jl
+++ b/test/iotest.jl
@@ -1,48 +1,48 @@
 # Test the write_orfs_* methods
-@testset "write_orfs_f* " begin
+# @testset "write_orfs_f* " begin
     
-    # seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
-    # seq02 = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCAATCTGACTGTGGGCGGTGTTACCAACGGCACTGCTACTACTGGCAACATCGCACTGACCGGTAACAATGCGCTGAGCGGTCCGGTCAATCTGAATGCGTCGAATGGCACGGTGACCTTGAACACGACCGGCAATACCACGCTCGGTAACGTGACGGCACAAGGCAATGTGACGACCAATGTGTCCAACGGCAGTCTGACGGTTACCGGCAATACGACAGGTGCCAACACCAACCTCAGTGCCAGCGGCAACCTGACCGTGGGTAACCAGGGCAATATCAGTACCGCAGGCAATGCAACCCTGACGGCCGGCGACAACCTGACGAGCACTGGCAATCTGACTGTGGGCGGCGTCACCAACGGCACGGCCACCACCGGCAACATCGCGCTGACCGGTAACAATGCACTGGCTGGTCCTGTCAATCTGAACGCGCCGAACGGCACCGTGACCCTGAACACAACCGGCAATACCACGCTGGGTAATGTCACCGCACAAGGCAATGTGACGACTAATGTGTCCAACGGCAGCCTGACAGTCGCTGGCAATACCACAGGTGCCAACACCAACCTGAGTGCCAGCGGCAATCTGACCGTGGGCAACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAGC"
+#     # seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
+#     # seq02 = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCAATCTGACTGTGGGCGGTGTTACCAACGGCACTGCTACTACTGGCAACATCGCACTGACCGGTAACAATGCGCTGAGCGGTCCGGTCAATCTGAATGCGTCGAATGGCACGGTGACCTTGAACACGACCGGCAATACCACGCTCGGTAACGTGACGGCACAAGGCAATGTGACGACCAATGTGTCCAACGGCAGTCTGACGGTTACCGGCAATACGACAGGTGCCAACACCAACCTCAGTGCCAGCGGCAACCTGACCGTGGGTAACCAGGGCAATATCAGTACCGCAGGCAATGCAACCCTGACGGCCGGCGACAACCTGACGAGCACTGGCAATCTGACTGTGGGCGGCGTCACCAACGGCACGGCCACCACCGGCAACATCGCGCTGACCGGTAACAATGCACTGGCTGGTCCTGTCAATCTGAACGCGCCGAACGGCACCGTGACCCTGAACACAACCGGCAATACCACGCTGGGTAATGTCACCGCACAAGGCAATGTGACGACTAATGTGTCCAACGGCAGCCTGACAGTCGCTGGCAATACCACAGGTGCCAACACCAACCTGAGTGCCAGCGGCAATCTGACCGTGGGCAACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAGC"
 
-    # Test case 1
+#     # Test case 1
 
-    # From pyrodigal issue #13 link: https://github.com/althonos/pyrodigal/blob/1f939b0913b48dbaa55d574b20e124f1b8323825/pyrodigal/tests/test_orf_finder.py#L271
-    # Pyrodigal predicts 2 genes from this sequence:
-    # 1) An alternative start codon (GTG) sequence at 48:347
-    # 2) A common start codon sequence at 426:590
-    # On the other hand, the NCBI ORFfinder program predicts 9 ORFs whose length is greater than 75 nt, from which one has an "outbound" stop
-    seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"
+#     # From pyrodigal issue #13 link: https://github.com/althonos/pyrodigal/blob/1f939b0913b48dbaa55d574b20e124f1b8323825/pyrodigal/tests/test_orf_finder.py#L271
+#     # Pyrodigal predicts 2 genes from this sequence:
+#     # 1) An alternative start codon (GTG) sequence at 48:347
+#     # 2) A common start codon sequence at 426:590
+#     # On the other hand, the NCBI ORFfinder program predicts 9 ORFs whose length is greater than 75 nt, from which one has an "outbound" stop
+#     seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"
     
-    seq03fna = "data/out-seq03.fna"
-    open(seq03fna, "w") do io
-        write_orfs_fna(seq03, io, NaiveFinder())
-    end
+#     seq03fna = "data/out-seq03.fna"
+#     open(seq03fna, "w") do io
+#         write_orfs_fna(seq03, io, NaiveFinder())
+#     end
 
-    seq03fnarecords = open(collect, FASTAReader, "data/out-seq03.fna")
+#     seq03fnarecords = open(collect, FASTAReader, "data/out-seq03.fna")
 
-    @test seq03fnarecords[1] == FASTX.FASTA.Record("ORF01 id=01 start=5 stop=22 strand=- frame=2", "ATGAAACAGAACGACTGA")
-    @test length(seq03fnarecords) == 32
-    @test identifier(seq03fnarecords[1]) == "ORF01"
-    @test description(seq03fnarecords[1]) == "ORF01 id=01 start=5 stop=22 strand=- frame=2"
-    @test sequence(seq03fnarecords[1]) == "ATGAAACAGAACGACTGA"
+#     @test seq03fnarecords[1] == FASTX.FASTA.Record("ORF01 id=01 start=5 stop=22 strand=- frame=2", "ATGAAACAGAACGACTGA")
+#     @test length(seq03fnarecords) == 32
+#     @test identifier(seq03fnarecords[1]) == "ORF01"
+#     @test description(seq03fnarecords[1]) == "ORF01 id=01 start=5 stop=22 strand=- frame=2"
+#     @test sequence(seq03fnarecords[1]) == "ATGAAACAGAACGACTGA"
 
-    # Test case 2
+#     # Test case 2
 
-    seq03faa = "data/out-seq03.faa"
-    open(seq03faa, "w") do io
-        write_orfs_faa(seq03, io, NaiveFinder())
-    end
+#     seq03faa = "data/out-seq03.faa"
+#     open(seq03faa, "w") do io
+#         write_orfs_faa(seq03, io, NaiveFinder())
+#     end
 
-    seq03faarecords = open(collect, FASTAReader, "data/out-seq03.faa")
+#     seq03faarecords = open(collect, FASTAReader, "data/out-seq03.faa")
 
-    @test seq03faarecords[2] == FASTX.FASTA.Record("ORF02 id=02 start=37 stop=156 strand=+ frame=1", "MYFSCISGGIVPSSIASRYPLIEVRGERKSWDTLATNFF*")
-    @test length(seq03faarecords) == 32
-    @test identifier(seq03faarecords[2]) == "ORF02"
-    @test description(seq03faarecords[2]) == "ORF02 id=02 start=37 stop=156 strand=+ frame=1"
-    @test sequence(seq03faarecords[2]) == "MYFSCISGGIVPSSIASRYPLIEVRGERKSWDTLATNFF*"
+#     @test seq03faarecords[2] == FASTX.FASTA.Record("ORF02 id=02 start=37 stop=156 strand=+ frame=1", "MYFSCISGGIVPSSIASRYPLIEVRGERKSWDTLATNFF*")
+#     @test length(seq03faarecords) == 32
+#     @test identifier(seq03faarecords[2]) == "ORF02"
+#     @test description(seq03faarecords[2]) == "ORF02 id=02 start=37 stop=156 strand=+ frame=1"
+#     @test sequence(seq03faarecords[2]) == "MYFSCISGGIVPSSIASRYPLIEVRGERKSWDTLATNFF*"
 
-    # Test case 3 
+#     # Test case 3 
 
-    @test seq03faarecords[3] == FASTX.FASTA.Record("ORF03 id=03 start=107 stop=136 strand=- frame=2", "MYPTISVPL*")
+#     @test seq03faarecords[3] == FASTX.FASTA.Record("ORF03 id=03 start=107 stop=136 strand=- frame=2", "MYPTISVPL*")
 
-end
+# end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index d89a385..764a090 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,12 +3,32 @@ module GeneFinderTests
 using Test
 using BioSequences
 using FASTX
-using GeneFinder
+# using GeneFinder
 using Aqua
+using TestItems
+using TestItemRunner
+
+@run_package_tests
 
 include("findorfstest.jl")
-include("iotest.jl")
+# include("iotest.jl")
 include("getindextest.jl")
-include("aquatest.jl")
+# include("aquatest.jl")
+
+
+
+@testitem "getorfs dna" default_imports=false begin
+
+    using BioSequences: @dna_str, DNAAlphabet
+    using GeneFinder: NaiveFinder, getorfs, findorfs, ORF
+    using Test: @test
+
+    seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
+    orfseqs = getorfs(seq01, DNAAlphabet{4}(), NaiveFinder())
+
+    # @test length(orfseqs) == 5
+    # @test orfseqs[1] == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
+end
+
 
 end

From 9132e110496e0658fef945a8712d53bea2cdd15a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Thu, 23 May 2024 17:43:52 -0500
Subject: [PATCH 04/40] refactor: Remove unused code in runtests.jl

---
 test/runtests.jl | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 764a090..3bdd666 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,20 +15,4 @@ include("findorfstest.jl")
 include("getindextest.jl")
 # include("aquatest.jl")
 
-
-
-@testitem "getorfs dna" default_imports=false begin
-
-    using BioSequences: @dna_str, DNAAlphabet
-    using GeneFinder: NaiveFinder, getorfs, findorfs, ORF
-    using Test: @test
-
-    seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
-    orfseqs = getorfs(seq01, DNAAlphabet{4}(), NaiveFinder())
-
-    # @test length(orfseqs) == 5
-    # @test orfseqs[1] == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
-end
-
-
 end

From 8e138bada34058ce180ed3fcd91e929741b5bba5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Thu, 23 May 2024 17:45:57 -0500
Subject: [PATCH 05/40] Relax some tests

---
 test/findorfstest.jl | 18 +++++++++---------
 test/runtests.jl     |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/test/findorfstest.jl b/test/findorfstest.jl
index 7c9f1cf..2eb9081 100644
--- a/test/findorfstest.jl
+++ b/test/findorfstest.jl
@@ -37,18 +37,18 @@
 #     NC_001416_orfs = findorfs(NC_001416, NaiveFinder(), min_len=75)
 #     @test length(NC_001416_orfs) == 885
 # end
-@testitem "getorfs dna" default_imports=false begin
+# @testitem "getorfs dna" default_imports=false begin
 
-    using BioSequences: @dna_str, DNAAlphabet
-    using GeneFinder: NaiveFinder, getorfs, findorfs, ORF
-    using Test: @test
+#     using BioSequences: @dna_str, DNAAlphabet
+#     using GeneFinder: NaiveFinder, getorfs, findorfs, ORF
+#     using Test: @test
 
-    seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
-    orfseqs = getorfs(seq01, DNAAlphabet{4}(), NaiveFinder())
+#     seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
+#     orfseqs = getorfs(seq01, DNAAlphabet{4}(), NaiveFinder())
 
-    # @test length(orfseqs) == 5
-    # @test orfseqs[1] == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
-end
+#     # @test length(orfseqs) == 5
+#     # @test orfseqs[1] == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
+# end
 
 # @testitem "getorfs proteins" begin
 #     using BioSequences, GeneFinder, Test
diff --git a/test/runtests.jl b/test/runtests.jl
index 3bdd666..34f222c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,7 +10,7 @@ using TestItemRunner
 
 @run_package_tests
 
-include("findorfstest.jl")
+# include("findorfstest.jl")
 # include("iotest.jl")
 include("getindextest.jl")
 # include("aquatest.jl")

From b4123558ae4be2cdb3625c464e76f9192297e037 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sat, 25 May 2024 12:50:10 -0500
Subject: [PATCH 06/40] Move get_variable_name outside loop

---
 src/algorithms/naivefinder.jl | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/algorithms/naivefinder.jl b/src/algorithms/naivefinder.jl
index 4351ed7..e02c11d 100644
--- a/src/algorithms/naivefinder.jl
+++ b/src/algorithms/naivefinder.jl
@@ -56,17 +56,15 @@ function naivefinder(
     seqlen = length(sequence)
     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
     orfs = Vector{ORF}()
-    id = 0
+    seqname = get_variable_name(sequence)
     for strand in ('+', '-')
         seq = strand == '-' ? reverse_complement(sequence) : sequence
-
         @inbounds for location in @views _locationiterator(seq; alternative_start)
             if length(location) >= min_len
                 frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
                 start = strand == '+' ? location.start : seqlen - location.stop + 1
                 stop = start + length(location) - 1
-                id += 1
-                push!(orfs, ORF(get_variable_name(sequence), start:stop, strand, frame, NaiveFinder(), scheme))
+                push!(orfs, ORF(seqname, start:stop, strand, frame, scheme)) #NaiveFinder()
             end
         end
     end
@@ -102,7 +100,7 @@ function naivefinderscored(
     seqlen = length(sequence)
     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
     orfs = Vector{ORF}()
-    id = 0
+    seqname = get_variable_name(sequence)
     for strand in ('+', '-')
         seq = strand == '-' ? reverse_complement(sequence) : sequence
 
@@ -113,8 +111,7 @@ function naivefinderscored(
                 stop = start + length(location) - 1
                 # score = -10log10(dnaseqprobability(seq[start:stop], scoringscheme))
                 score = log_odds_ratio_score(seq[start:stop], model)
-                id += 1
-                push!(orfs, ORF(get_variable_name(sequence), start:stop, strand, frame, NaiveFinderScored(), lors, score))
+                push!(orfs, ORF(seqname, start:stop, strand, frame, NaiveFinderScored(), lors, score))
             end
         end
     end

From 0aaf4e2830bdd9e33c836e29fe9e96e89425696f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 3 Jun 2024 00:40:56 -0500
Subject: [PATCH 07/40] Major refactor of the ORF type and also add some
 groundwork for RBS

---
 src/types.jl | 197 +++++++++++++++++++++------------------------------
 1 file changed, 81 insertions(+), 116 deletions(-)

diff --git a/src/types.jl b/src/types.jl
index 68df0f9..2aa52a4 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -1,152 +1,117 @@
-export ORF
-export NaiveFinder, NaiveFinderScored
-export NaiveScoringScheme
-export finder, frame, scheme, score
-# Structs associated with gene models 
-abstract type AbstractGene end
-
-"""
-    struct GeneFeatures
-        seqname::String
-        start::Int64
-        stop::Int64
-        strand::Char
-        frame::Int8
-        score::Union{Nothing, Float64} # Add score field
-        attribute::Dict
-    end
-
-This is the main Gene struct, based on the fields that could be found in a GFF3, still needs to be defined correctly,
-    The idea is correct the frame and attributes that will have something like a possible list (id=Char;name=;locus_tag).
-    The `write` and `get` functions should have a dedicated method for this struct.
-"""
-# struct GeneFeatures <: AbstractGene
-#     seqname::String
-#     start::Int64
-#     stop::Int64
-#     strand::Char
-#     frame::Int8 # But maybe a Union to allow empty when reading a GFF? 
-#     score::Union{Float64, Nothing} # Add score field
-#     attribute::Dict # Should be a Dict perhaps or a NamedTuple
-# end
+import GenomicFeatures: first, last, length, strand, groupname
+import FASTX: sequence
 
-# struct GeneFeatures <: AbstractGene
-#     seqname::String could be a Union{String, Nothing} and named id.
-#     orf::ORF
-#     attribute::Dict
-# end
-
-"""
-    struct ORF
-        location::UnitRange{Int64}
-        strand::Char
-        frame::Int
-        score::Union{Nothing, Float64}
-    end
-
-The ORF struct represents an open reading frame in a DNA sequence. It has two fields: 
-
-- `location`: which is a UnitRange{Int64} indicating the start and end locations of the ORF in the sequence
-- `strand`:  is a Char type indicating whether the ORF is on the forward ('+') or reverse ('-') strand of the sequence.
-- `frame`: is an Int type indicating the reading frame of the ORF. The frame is the position of the first nucleotide of the codon that starts the ORF, relative to the start of the sequence. It can be 1, 2, or 3.
-- `score`: is a Union{Nothing, Float64} type indicating the score of the ORF. It can be a Float64 or nothing.
-"""
-# struct ORF <: AbstractGene # could also be mutable so that scores can be updated later, but tests fails
-#     #TODOs:  location might be a complex Union allowing UnitRange or a Join of ranges (e.g. 1..100, 200..300)?
-#     location::UnitRange{Int64}  # Note that it is also called position for gene struct in GenomicAnotations
-#     strand::Char
-#     frame::Int # Use Int64 instead of Int
-#     score::Union{Nothing, Float64} # Add score field shold be a namedtuple with the score and the method used to score
-#     # rbs::Union{Nothing, Vector{UnitRange{Int64}}} # Add RBS field  see https://github.com/deprekate/PHANOTATE/blob/c77e80caef8dc3264f7dc698b087bcd486216bcb/phanotate_modules/functions.py#L48
-
-#     function ORF(location::UnitRange{Int64}, strand::Char, frame::Int, score::Union{Nothing, Float64} = nothing)
-#         @assert frame in (1, 2, 3) "Invalid frame value. Frame must be 1, 2, or 3."
-#         @assert strand in ('+', '-') "Invalid strand value. Strand must be '+' or '-'."
-#         @assert location[1] < location[end] "Invalid location. Start must be less than stop."
-#         @assert score isa Union{Nothing, Float64} "Invalid score value. Score must be a Float64 or nothing."
-    
-#         return new(location, strand, frame, score)
-#     end
-# end
+export RBS, ORF
+export gc, features, sequence
+export groupname, finder, frame, scheme, score
 
-## Interfaces for gene finders
+#### Ribosome Binding Site (RBS) struct ####
 
-abstract type GeneFinderMethod end
-struct NaiveFinder <: GeneFinderMethod end
-struct NaiveFinderScored <: GeneFinderMethod end
+# seq[orf.first-bin01.offset.start:orf.first-1]
 
+# motifs = [dna"GGA", dna"GAG", dna"AGG"]
+struct RBS
+    motif::BioRegex{DNA}
+    offset::UnitRange{Int64} # offset
+    score::Float64
 
-## Interfaces for gene scoring schemes
-
-abstract type GeneScoringScheme end
-struct NaiveScoringScheme <: GeneScoringScheme end
-
+    function RBS(motif::BioRegex{DNA}, offset::UnitRange{Int64}, score::Float64)
+        return new(motif, offset, score)
+    end
+end
 
-# export ORF
+# Structs associated with gene models
 
-struct ORF{F} <: GenomicFeatures.AbstractGenomicInterval{F}
+# const FEATUREDICT = Dict{Symbol,Any}(:gc => 0.0, :length => 0, :score => 0.0)
+# const Feature = Union{Real, Vector{RBS}}
+struct ORF{N,F} <: GenomicFeatures.AbstractGenomicInterval{F}
     groupname::String
     first::Int64
     last::Int64
     strand::Strand
     frame::Int
-    finder::F
-    scheme::Union{Nothing, Function}
-    score::Union{Nothing, Float64}
-    rbs::Union{Nothing, Vector{UnitRange{Int64}}} # Add RBS field 
+    seq::LongSubSeq{DNAAlphabet{N}}
+    features::Dict{Symbol,Any}
+    scheme::Union{Nothing,Function}
 end
 
-function ORF(
+function ORF{N,F}(
+    ::Type{F}, #finder
     groupname::String,
     first::Int64,
     last::Int64,
     strand::Union{Strand,Char},
     frame::Int,
-    finder::F,
-    scheme::Union{Nothing, Function}=nothing,
-    score::Union{Nothing, Float64}=nothing,
-    rbs::Union{Nothing, Vector{UnitRange{Int64}}}=nothing
-) where {F <: GeneFinderMethod}
-    # @assert frame in (1, 2, 3) "Invalid frame value. Frame must be 1, 2, or 3."
-    return ORF{F}(groupname, first, last, strand, frame, finder, scheme, score, rbs)
+    seq::LongSubSeq{DNAAlphabet{N}},
+    features::Dict{Symbol,Any},
+    scheme::Union{Nothing,Function}=nothing
+) where {N,F<:GeneFinderMethod}
+    # @assert frame in (1, 2, 3) && location[1] < location[end] && score isa Union{Nothing, Float64} && length(seq) == last(location) - first(location) + 1 && length(seq) % 3 == 0 "Invalid input. Please check the frame, location, score, and sequence length."
+    return ORF{N,F}(groupname, first, last, strand, frame, seq, features, scheme) #finder
 end
 
-function ORF(
-    id::String,
-    location::UnitRange{Int64},
-    strand::Union{Strand,Char},
-    frame::Int, finder::F,
-    scheme::Union{Nothing, Function}=nothing,
-    score::Union{Nothing, Float64}=nothing,
-    rbs::Union{Nothing, Vector{UnitRange{Int64}}}=nothing
-) where {F <: GeneFinderMethod}
-    # @assert frame in (1, 2, 3) "Invalid frame value. Frame must be 1, 2, or 3."
-    return ORF{F}(id, first(location), last(location), strand, frame, finder, scheme, score, rbs)
+function groupname(i::ORF{N,F}) where {N,F}
+    return i.groupname
+end
+
+function first(i::ORF{N,F}) where {N,F}
+    return i.first
 end
 
-function frame(i::ORF)
+function last(i::ORF{N,F}) where {N,F}
+    return i.last
+end
+
+function frame(i::ORF{N,F}) where {N,F}
     return i.frame
 end
 
-function scheme(i::ORF)
+finder(i::ORF{N,F}) where {N,F} = F
+
+function scheme(i::ORF{N,F}) where {N,F}
     return i.scheme
 end
 
-function score(i::ORF)
-    return i.score
+function sequence(i::ORF{N,F}) where {N,F}
+    return i.strand == STRAND_POS ? i.seq : reverse_complement(i.seq)
+end
+
+function score(i::ORF{N,F}) where {N,F}
+    return i.features[:score]
+end
+
+function gc(i::ORF{N,F}) where {N,F}
+    return i.features[:gc]
+end
+
+function length(i::ORF{N,F}) where {N,F}
+    return i.features[:length]
 end
 
-function finder(i::ORF)
-    return i.finder
+function features(i::ORF{N,F}) where {N,F}
+    return i.features
 end
 
-function Base.show(io::IO, i::ORF)
+function Base.show(io::IO, i::ORF{N,F}) where {N,F}
     if get(io, :compact, false)
-        # print(io, groupname(i), ":", leftposition(i), "-", rightposition(i))
-        print(io, "ORF{$(typeof(finder(i)))}($(leftposition(i)):$(rightposition(i)), '$(strand(i))', $(frame(i)), $(score(i)))")
+        print(io, "ORF{$(finder(i))}($(leftposition(i)):$(rightposition(i)), '$(strand(i))', $(frame(i)))") #{$(typeof(finder(i)))} $(score(i))
     else
-        # print(io, "ORF{$(typeof(finder(i))),$(scheme(i))}($(groupname(i)), $(leftposition(i)):$(rightposition(i)), $(strand(i)), $(frame(i)), $(round(score(i), digits=5)))")
-        print(io, "ORF{$(typeof(finder(i)))}($(leftposition(i)):$(rightposition(i)), '$(strand(i))', $(frame(i)), $(score(i)))")
-        # println(io, " ", groupname(i), " ", leftposition(i), ":", rightposition(i), ", ", strand(i), ", ", frame(i), ", score: $(round(score(i), digits=2))")
+        print(io, "ORF{$(finder(i))}($(leftposition(i)):$(rightposition(i)), '$(strand(i))', $(frame(i)))") # , $(score(i))
     end
-end
\ No newline at end of file
+end
+
+## Ideas for Gene struct
+
+# struct CDS <: AbstractGene
+#     orf::ORF
+#     coding::Bool
+# end
+
+
+# frm = 1
+# frst = 1
+# lst = 99
+# str = '+'
+# seq = randdnaseq(99)
+
+# orf = ORF{4,NaiveFinder}("seq1", frst, lst, str, frm, seq, nothing, nothing, nothing)
\ No newline at end of file

From 14fff61c20a511b9b96702873eb115b076f86cd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 3 Jun 2024 00:41:07 -0500
Subject: [PATCH 08/40] Update deps

---
 Manifest.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index 65da618..0cc8efb 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -147,9 +147,9 @@ uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 version = "1.0.3"
 
 [[deps.TranscodingStreams]]
-git-tree-sha1 = "5d54d076465da49d6746c647022f3b3674e64156"
+git-tree-sha1 = "a947ea21087caba0a798c5e494d0bb78e3a1a3a0"
 uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
-version = "0.10.8"
+version = "0.10.9"
 
     [deps.TranscodingStreams.extensions]
     TestExt = ["Test", "Random"]

From 4dc47a6bab3e1d7ec61ffb4026352cf225b2a2d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 3 Jun 2024 00:41:47 -0500
Subject: [PATCH 09/40] Improve getindex with some types

---
 docs/src/simplecodingrule.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/src/simplecodingrule.md b/docs/src/simplecodingrule.md
index b92e9a7..c6bbaf6 100644
--- a/docs/src/simplecodingrule.md
+++ b/docs/src/simplecodingrule.md
@@ -17,7 +17,7 @@ using GeneFinder, BioSequences
 
 seq = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"
 
-orfs = findorfs(seq, min_len=75, NaiveFinderScored())
+orfs = findorfs(seq, minlen=75, NaiveFinderScored())
 ```
 
 ```julia
@@ -48,7 +48,7 @@ Where the ``P_{C}`` is the probability of the sequence given a CDS model, ``P_{N
 In this package we have implemented this rule and call some basic models of CDS and No-CDS of *E. coli* from Axelson-Fisk (2015) work (implemented in `BioMarkovChains.jl` package). To check whether a random sequence could be coding based on these decision we use the predicate `log_odds_ratio_decision_rule` with the `ECOLICDS` and `ECOLINOCDS` models:
 
 ```julia
-orfsdna = getorfs(seq,DNAAlphabet{4}(),  NaiveFinder(), min_len=75, alternative_start=true)
+orfsdna = getorfs(seq,DNAAlphabet{4}(),  NaiveFinder(), minlen=75, alternative_start=true)
 
 20-element Vector{LongSubSeq{DNAAlphabet{4}}}:
  ATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCC…CGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGA
@@ -118,7 +118,7 @@ orfs = filter(orf -> iscoding(orf), orfsdna)
 Or in terms of the `ORF` object:
 
 ```julia
-orfs = findorfs(seq, min_len=75, NaiveFinderScored(), alternative_start=true) # find ORFs with alternative start as well
+orfs = findorfs(seq, minlen=75, NaiveFinderScored(), alternative_start=true) # find ORFs with alternative start as well
 orfs[iscoding.(orfsdna)]
 
     3-element Vector{ORF}:

From 61ac5c79e396f8d5ea4483c8a53d850c18d794ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 3 Jun 2024 00:42:28 -0500
Subject: [PATCH 10/40] Up translate and other utils are now more stable

---
 src/extended.jl | 32 +++++++-------------------------
 1 file changed, 7 insertions(+), 25 deletions(-)

diff --git a/src/extended.jl b/src/extended.jl
index 4d08a56..e45cd44 100644
--- a/src/extended.jl
+++ b/src/extended.jl
@@ -1,28 +1,10 @@
 # Methods from main packages that expand their fuctions to this package structs
-import Base: isless, iterate, sort, getindex
+import Base: isless, iterate, sort, getindex, length
+import BioSequences: translate
 
+Base.isless(a::ORF{N,F}, b::ORF{N,F}) where {N,F} = isless(a.first:a.last, b.first:b.last)
 
-# Base.isless(a::ORF, b::ORF) = isless(a.location, b.location)
-Base.isless(a::ORF, b::ORF) = isless(a.first:a.last, b.first:b.last)
-# Base.isless(a::ORF, b::ORF) = isless(a.location, b.location)
-# Base.isless(a::ORF, b::ORF) = isless(a.score, b.score)
-
-# Base.sort(v::Vector{<:ORF}; kwargs...) = sort(v, by = _orf_sort_key)
-# Base.sort!(v::Vector{<:ORF}; kwargs...) = sort!(v, by = _orf_sort_key)
-
-
-#TODOs: how to make more robust the getindex method? confroning the frames?
-# Base.getindex(sequence::NucleicSeqOrView{A}, orf::ORF) where {A} = orf.strand == '+' ? (@view sequence[orf.location]) : reverse_complement(@view sequence[orf.location])
-
-# function getindex(sequence::NucleicSeqOrView{A}, orf::ORF) where {A}
-#     if orf.strand == '+'
-#         return @view sequence[orf.location]
-#     else
-#         return reverse_complement(@view sequence[orf.location])
-#     end
-# end
-
-function getindex(sequence::NucleicSeqOrView{A}, orf::ORF) where {A}
+function getindex(sequence::NucleicSeqOrView{A}, orf::ORF{N,F}) where {A,N,F}
     if orf.strand == '+' || orf.strand == STRAND_POS
         return @view sequence[orf.first:orf.last]
     else
@@ -30,6 +12,6 @@ function getindex(sequence::NucleicSeqOrView{A}, orf::ORF) where {A}
     end
 end
 
-# function _orf_sort_key(orf::ORF)
-#     return (orf.location, orf.strand, orf.score)
-# end
\ No newline at end of file
+function translate(orf::ORF{N,F}; kwargs...) where {N,F}
+    return translate(sequence(orf); kwargs...)
+end
\ No newline at end of file

From 27ca3f8cc134b792adc59bd5f3ee5ccfdd951495 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 3 Jun 2024 00:42:44 -0500
Subject: [PATCH 11/40] Update the min_len into minlen

---
 scripts/findorfs-script.jl       |  12 ++
 src/GeneFinder.jl                |  19 ++-
 src/algorithms/naivecollector.jl |  49 ++++++
 src/algorithms/naivefinder.jl    | 232 ++++++++++-----------------
 src/criteria/lordr.jl            |  65 ++++++++
 src/findorfs.jl                  |  35 ++--
 src/getorfs.jl                   | 265 +++++++++++++++----------------
 src/io.jl                        | 260 +++++++-----------------------
 src/iscoding.jl                  |   4 +-
 src/utils.jl                     |   8 +-
 test/findorfstest.jl             |   7 +-
 test/getindextest.jl             |  18 +--
 test/runtests.jl                 |   4 +-
 13 files changed, 449 insertions(+), 529 deletions(-)
 create mode 100644 scripts/findorfs-script.jl
 create mode 100644 src/algorithms/naivecollector.jl
 create mode 100644 src/criteria/lordr.jl

diff --git a/scripts/findorfs-script.jl b/scripts/findorfs-script.jl
new file mode 100644
index 0000000..94459b2
--- /dev/null
+++ b/scripts/findorfs-script.jl
@@ -0,0 +1,12 @@
+#!/usr/bin/env julia
+
+fasta = ARGS[1]
+
+using BioSequences: NucleicSeqOrView, DNAAlphabet, LongDNA
+using GeneFinder: findorfs, fasta2bioseq, lors, NaiveFinder
+using BenchmarkTools: @btime
+
+seq = fasta2bioseq(fasta)[1]
+time = @btime findorfs(seq, NaiveFinder(), minlen=64, scheme=lors);
+
+println("Time: $time")
\ No newline at end of file
diff --git a/src/GeneFinder.jl b/src/GeneFinder.jl
index b52e45d..94df7d6 100644
--- a/src/GeneFinder.jl
+++ b/src/GeneFinder.jl
@@ -5,6 +5,7 @@ using BioSequences:
     DNA_Gap, DNA_A, DNA_C, DNA_M, DNA_G, DNA_R, DNA_S, DNA_V, DNA_T, DNA_W, DNA_Y, DNA_H, DNA_K, DNA_D, DNA_B, DNA_N,
     
     Alphabet,
+    BioRegex,
     NucleicAcidAlphabet,
     DNAAlphabet,
     AminoAcidAlphabet,
@@ -19,31 +20,41 @@ using BioSequences:
     reverse_complement,
     randdnaseq,
     ncbi_trans_table,
-    translate
+    translate,
+    gc_content
 
 using BioMarkovChains: BioMarkovChain, dnaseqprobability, ECOLICDS, ECOLINOCDS, log_odds_ratio_score
 using FASTX: FASTAReader, sequence
 using IterTools: takewhile, iterated
 using PrecompileTools: @setup_workload, @compile_workload
-using GenomicFeatures: GenomicFeatures, AbstractGenomicInterval, GenomicInterval, Strand, summary, groupname, leftposition, rightposition, strand, metadata, STRAND_POS
+using GenomicFeatures: GenomicFeatures, AbstractGenomicInterval, GenomicInterval, Strand, summary, groupname, strand, metadata, STRAND_POS, STRAND_NEG, STRAND_BOTH, STRAND_NA
 
+# Finder Algorithms
 include("algorithms/naivefinder.jl")
+include("algorithms/naivecollector.jl")
+
+# Coding Criteria
+include("criteria/lordr.jl")
+
+# Main functions
 include("types.jl")
 include("findorfs.jl")
 include("iscoding.jl")
-include("getorfs.jl")
 include("io.jl")
+
+# Utils and extended functions
 include("utils.jl")
 include("extended.jl")
 
 @setup_workload begin
     # Putting some things in `@setup_workload` instead of `@compile_workload` can reduce the size of the
     # precompile file and potentially make loading faster.
+    # using BioSequences, GeneFinder
     seq = randdnaseq(99)
     @compile_workload begin
         # all calls in this block will be precompiled, regardless of whether
         # they belong to your package or not (on Julia 1.8 and higher)
-        findorfs(seq, NaiveFinder())
+        # findorfs(seq)
     end
 end
 
diff --git a/src/algorithms/naivecollector.jl b/src/algorithms/naivecollector.jl
new file mode 100644
index 0000000..4efb608
--- /dev/null
+++ b/src/algorithms/naivecollector.jl
@@ -0,0 +1,49 @@
+export NaiveCollector
+
+# abstract type GeneFinderMethod end
+struct NaiveCollector <: GeneFinderMethod end
+
+function NaiveCollector(
+    sequence::NucleicSeqOrView{DNAAlphabet{N}};
+    alternative_start::Bool = false,
+    minlen::Int64 = 6,
+    scheme::Union{Nothing, Function} = nothing,
+    overlap::Bool = false,
+    kwargs...
+) where {N}
+    
+    regorf = alternative_start ? biore"DTG(?:[N]{3})*?T(AG|AA|GA)"dna : biore"ATG(?:[N]{3})*?T(AG|AA|GA)"dna
+    framedict = Dict(0 => 3, 1 => 1, 2 => 2)
+    revseq = reverse_complement(sequence)
+    seqlen = length(sequence)
+    seqname = get_var_name(sequence)
+
+    function createorfs(x, strand, s)
+        if length(x.captured[1]:x.captured[3]) < minlen
+            return nothing
+        end
+        if strand == '+'
+            start = x.captured[1]
+            stop = x.captured[3] + 1
+            frame = framedict[x.captured[1] % 3]
+        else
+            start = seqlen - x.captured[3] 
+            stop = seqlen - x.captured[1] + 1
+            frame = framedict[(seqlen - x.captured[3]) % 3]
+        end
+        seq = @view(sequence[start:stop])
+        score = scheme === nothing ? 0 : scheme(@view(s[start:stop]), ECOLICDS)
+        fts = Dict(:gc => gc_content(seq), :length => length(seq), :score => score)
+        return ORF{N,NaiveCollector}(seqname, start, stop, strand, frame, seq, fts, scheme)
+    end
+
+    orfs = Vector{ORF{N,NaiveCollector}}()
+    for strand in ('+', '-')
+        s = strand == '-' ? revseq : sequence
+        matches = eachmatch(regorf, s, overlap)
+        strandedorfs = filter(!isnothing, [createorfs(x, strand, s) for x in matches])
+        append!(orfs, strandedorfs)
+    end
+
+    return sort!(orfs; kwargs...)
+end
diff --git a/src/algorithms/naivefinder.jl b/src/algorithms/naivefinder.jl
index e02c11d..2c7cbed 100644
--- a/src/algorithms/naivefinder.jl
+++ b/src/algorithms/naivefinder.jl
@@ -1,6 +1,8 @@
-export naivefinder, naivefinderscored, log_odds_ratio_decision_rule, lordr, lors
+export NaiveFinder
 
-const lors = log_odds_ratio_score
+abstract type GeneFinderMethod end
+
+struct NaiveFinder <: GeneFinderMethod end
 
 """
     locationiterator(sequence::NucleicSeqOrView{DNAAlphabet{N}}; alternative_start::Bool=false) where {N}
@@ -29,8 +31,8 @@ function _locationiterator(
     return itr
 end
 
-"""
-    naivefinder(sequence::NucleicAlphabet{DNAAlphabet{N}}; alternative_start::Bool=false, min_len::Int64=6)::Vector{ORF} where {N}
+@doc raw"""
+    naivefinder(sequence::NucleicSeqOrView{DNAAlphabet{N}}; kwargs...) -> Vector{ORF} where {N}
 
 A simple implementation that finds ORFs in a DNA sequence.
 
@@ -39,163 +41,103 @@ The `naivefinder` function takes a LongSequence{DNAAlphabet{4}} sequence and ret
         Extending the starting codons with the `alternative_start = true` will search for ATG, GTG, and TTG.
     Some studies have shown that in *E. coli* (K-12 strain), ATG, GTG and TTG are used 83 %, 14 % and 3 % respectively.
 !!! note
-    This function has not ORFs scoring scheme. Thus it might consider `aa"M*"` a posible encoding protein from the resulting ORFs.
-    
-# Keywords
+    This function has neither ORFs scoring scheme by default nor length constraints. Thus it might consider `aa"M*"` a posible encoding protein from the resulting ORFs.
 
-- `alternative_start::Bool=false`: If true will pass the extended start codons to search. This will increase 3x the exec. time.
-- `min_len::Int64=6`:  Length of the allowed ORF. Default value allow `aa"M*"` a posible encoding protein from the resulting ORFs.
-"""
-function naivefinder(
-    sequence::NucleicSeqOrView{DNAAlphabet{N}};
-    alternative_start::Bool = false,
-    min_len::Int64 = 6,
-    scheme::Union{Nothing, Function} = nothing,
-    kwargs...
-) where {N}
-    seqlen = length(sequence)
-    framedict = Dict(0 => 3, 1 => 1, 2 => 2)
-    orfs = Vector{ORF}()
-    seqname = get_variable_name(sequence)
-    for strand in ('+', '-')
-        seq = strand == '-' ? reverse_complement(sequence) : sequence
-        @inbounds for location in @views _locationiterator(seq; alternative_start)
-            if length(location) >= min_len
-                frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
-                start = strand == '+' ? location.start : seqlen - location.stop + 1
-                stop = start + length(location) - 1
-                push!(orfs, ORF(seqname, start:stop, strand, frame, scheme)) #NaiveFinder()
-            end
-        end
-    end
-    return sort!(orfs; kwargs...)
-end
-
-@doc raw"""
-    naivefinderscored(sequence::NucleicSeqOrView{DNAAlphabet{N}}; alternative_start=false, min_len=6) where {N}
+ 
+# Required Arguments
 
-Find open reading frames (ORFs) in a nucleic acid sequence using a naive scoring algorithm. In this method the scoring is a log-odds ratio score defines as:
+- `sequence::NucleicSeqOrView{DNAAlphabet{N}}`: The nucleic acid sequence to search for ORFs.
 
-```math
-S(x) = \sum_{i=1}^{L} \beta_{x_{i}x} = \sum_{i=1} \log \frac{a^{\mathscr{m}_{1}}_{i-1} x_i}{a^{\mathscr{m}_{2}}_{i-1} x_i}
-```
+# Keywords Arguments
 
-## Arguments
-- `sequence`: The nucleic acid sequence to search for ORFs.
-- `alternative_start`: A boolean indicating whether to consider alternative start codons. Default is `false`.
-- `min_len`: The minimum length of an ORF to be considered. Default is `6`.
-- `scoringscheme`: The scoring scheme to use for the scoring algorithm. Default is `ECOLICDS`.
+- `alternative_start::Bool`: If true will pass the extended start codons to search. This will increase 3x the execution time. Default is `false`.
+- `minlen::Int64=6`:  Length of the allowed ORF. Default value allow `aa"M*"` a posible encoding protein from the resulting ORFs.
+- `scheme::Function`: The scoring scheme to use for scoring the sequence from the ORF. Default is `nothing`.
 
-## Returns
-A sorted vector of `ORF` objects representing the identified open reading frames.
+!!! note
+    As the scheme is generally a scoring function that at least requires a sequence, one simple scheme is the log-odds ratio score. This score is a log-odds ratio that compares the probability of the sequence generated by a coding model to the probability of the sequence generated by a non-coding model:
+    ```math
+    S(x) = \sum_{i=1}^{L} \beta_{x_{i}x} = \sum_{i=1} \log \frac{a^{\mathscr{m}_{1}}_{i-1} x_i}{a^{\mathscr{m}_{2}}_{i-1} x_i}
+    ```
+    If the log-odds ratio exceeds a given threshold (`η`), the sequence is considered likely to be coding. See [`lordr`](@ref) for more information about coding creteria.
 
 """
-function naivefinderscored(
+# function NaiveFinder(
+#     sequence::NucleicSeqOrView{DNAAlphabet{N}};
+#     alternative_start::Bool = false,
+#     minlen::Int64 = 6,
+#     scheme::Union{Nothing, Function} = nothing,
+#     kwargs...
+# ) where {N}
+#     seqlen = length(sequence)
+#     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
+#     orfs = Vector{ORF{N,NaiveFinder}}()
+#     seqname = get_var_name(sequence)
+#     for strand in ('+', '-')
+#         s = strand == '-' ? reverse_complement(sequence) : sequence
+#         @inbounds for location in @views _locationiterator(s; alternative_start)
+#             if length(location) >= minlen
+#                 start = strand == '+' ? location.start : seqlen - location.stop + 1
+#                 stop = start + length(location) - 1
+#                 frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
+#                 seq = @view(sequence[start:stop])
+#                 score = scheme === nothing ? 0 : scheme(@view(s[start:stop]), ECOLICDS)
+#                 gc = gc_content(seq)
+#                 len = length(seq)
+#                 fts = Dict(:gc => gc, :length => len, :score => score)
+#                 push!(orfs, ORF{N,NaiveFinder}(seqname, start, stop, strand, frame, NaiveFinder, seq, fts, scheme))
+#             end
+#         end
+#     end
+#     return sort!(orfs; kwargs...)
+# end
+
+function NaiveFinder(
     sequence::NucleicSeqOrView{DNAAlphabet{N}};
     alternative_start::Bool = false,
-    min_len::Int64 = 6,
-    model::BioMarkovChain = ECOLICDS,
+    minlen::Int64 = 6,
+    scheme::Union{Nothing, Function} = nothing,
     kwargs...
 ) where {N}
+    
     seqlen = length(sequence)
     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
-    orfs = Vector{ORF}()
-    seqname = get_variable_name(sequence)
-    for strand in ('+', '-')
-        seq = strand == '-' ? reverse_complement(sequence) : sequence
-
-        @inbounds for location in @views _locationiterator(seq; alternative_start)
-            if length(location) >= min_len
-                frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
-                start = strand == '+' ? location.start : seqlen - location.stop + 1
-                stop = start + length(location) - 1
-                # score = -10log10(dnaseqprobability(seq[start:stop], scoringscheme))
-                score = log_odds_ratio_score(seq[start:stop], model)
-                push!(orfs, ORF(seqname, start:stop, strand, frame, NaiveFinderScored(), lors, score))
-            end
-        end
-    end
+    orfs = Vector{ORF{N,NaiveFinder}}()
+    seqname = get_var_name(sequence)
 
-    return sort!(orfs; kwargs...) 
-end
-
-@doc raw"""
-    log_odds_ratio_decision_rule(
-        sequence::LongSequence{DNAAlphabet{4}};
-        codingmodel::BioMarkovChain,
-        noncodingmodel::BioMarkovChain,
-        η::Float64 = 1e-5
-        )
-
-Check if a given DNA sequence is likely to be coding based on a log-odds ratio.
-    The log-odds ratio is a statistical measure used to assess the likelihood of a sequence being coding or non-coding. It compares the probability of the sequence generated by a coding model to the probability of the sequence generated by a non-coding model. If the log-odds ratio exceeds a given threshold (`η`), the sequence is considered likely to be coding.
-    It is formally described as a decision rule:
-
-```math
-S(X) = \log \left( \frac{{P_C(X_1=i_1, \ldots, X_T=i_T)}}{{P_N(X_1=i_1, \ldots, X_T=i_T)}} \right) \begin{cases} > \eta & \Rightarrow \text{{coding}} \\ < \eta & \Rightarrow \text{{noncoding}} \end{cases}
-```
-
-# Arguments
-- `sequence::NucleicSeqOrView{DNAAlphabet{N}}`: The DNA sequence to be evaluated.
-
-## Keyword Arguments
-- `codingmodel::BioMarkovChain`: The transition model for coding regions, (default: `ECOLICDS`).
-- `noncodingmodel::BioMarkovChain`: The transition model for non-coding regions, (default: `ECOLINOCDS`)
-- `η::Float64 = 1e-5`: The threshold value (eta) for the log-odds ratio (default: 1e-5).
-
-# Returns
-- `true` if the sequence is likely to be coding.
-- `false` if the sequence is likely to be non-coding.
-
-# Raises
-- `ErrorException`: if the length of the sequence is not divisible by 3.
-- `ErrorException`: if the sequence contains a premature stop codon.
-
-# Example
-
-```
-sequence = dna"ATGGCATCTAG"
-iscoding(sequence)  # Returns: true or false
-```
-"""
-function lordr( #log_odds_ratio_decision, also lordr/cudr/kfdr/aadr
-    sequence::NucleicSeqOrView{DNAAlphabet{N}};
-    codingmodel::BioMarkovChain = ECOLICDS,
-    noncodingmodel::BioMarkovChain = ECOLINOCDS,
-    η::Float64 = 1e-5
-) where {N}
-    pcoding = dnaseqprobability(sequence, codingmodel)
-    pnoncoding = dnaseqprobability(sequence, noncodingmodel)
-
-    logodds = log(pcoding / pnoncoding)
-
-    length(sequence) % 3 == 0 || error("The sequence is not divisible by 3")
+    function createorf(location, strand, s, seqlen)
+        if length(location) < minlen
+            return nothing
+        end
+        
+        if strand == '+'
+            start = location.start
+            stop = start + length(location) - 1
+            frame = framedict[location.start % 3]
+        else
+            start = seqlen - location.stop + 1
+            stop = start + length(location) - 1
+            frame = framedict[(seqlen - location.stop + 1) % 3]
+        end
 
-    !hasprematurestop(sequence) || error("There is a premature stop codon in the sequence")
+        seq = @view(sequence[start:stop])
+        score = scheme === nothing ? 0 : scheme(@view(s[start:stop]), ECOLICDS)
+        gc = gc_content(seq)
+        len = length(seq)
+        fts = Dict(:gc => gc, :length => len, :score => score)
 
-    if logodds > η
-        return true
-    else
-        false
+        return ORF{N, NaiveFinder}(seqname, start, stop, strand, frame, seq, fts, scheme)
     end
-end
-
-const log_odds_ratio_decision_rule = lordr # criteria
-
-## Another alternative:
-
-## This is the way window function are implemented in the DSP.jl package
-
-# function make_orf_finder(finderfunc::Function, sequence::NucleicSeqOrView{DNAAlphabet{N}}) where {N}
-#     ...
-# end
 
-# function locationiterator(sequence::NucleicSeqOrView{DNAAlphabet{N}}) where {N}
-#     make_orf_finder(sequence::NucleicSeqOrView{DNAAlphabet{N}}) do x
-#         ...
-#     end
-# end
+    @inbounds for strand in ('+', '-')
+        s = strand == '-' ? reverse_complement(sequence) : sequence
+        @inbounds for location in @views _locationiterator(s; alternative_start)
+            orf = createorf(location, strand, s, seqlen)
+            if orf !== nothing
+                push!(orfs, orf)
+            end
+        end
+    end
 
-# function findorfs(sequence::NucleicSeqOrView{DNAAlphabet{N}}; finder::Function=locationiterator) where {N}
-#     ...
-# end
\ No newline at end of file
+    return sort!(orfs; kwargs...)
+end
\ No newline at end of file
diff --git a/src/criteria/lordr.jl b/src/criteria/lordr.jl
new file mode 100644
index 0000000..f2d446e
--- /dev/null
+++ b/src/criteria/lordr.jl
@@ -0,0 +1,65 @@
+export log_odds_ratio_decision_rule, lordr, lors
+
+@doc raw"""
+    log_odds_ratio_decision_rule(
+        sequence::LongSequence{DNAAlphabet{4}};
+        codingmodel::BioMarkovChain,
+        noncodingmodel::BioMarkovChain,
+        η::Float64 = 1e-5
+        )
+
+Check if a given DNA sequence is likely to be coding based on a log-odds ratio.
+    The log-odds ratio is a statistical measure used to assess the likelihood of a sequence being coding or non-coding. It compares the probability of the sequence generated by a coding model to the probability of the sequence generated by a non-coding model. If the log-odds ratio exceeds a given threshold (`η`), the sequence is considered likely to be coding.
+    It is formally described as a decision rule:
+
+```math
+S(X) = \log \left( \frac{{P_C(X_1=i_1, \ldots, X_T=i_T)}}{{P_N(X_1=i_1, \ldots, X_T=i_T)}} \right) \begin{cases} > \eta & \Rightarrow \text{{coding}} \\ < \eta & \Rightarrow \text{{noncoding}} \end{cases}
+```
+
+# Arguments
+- `sequence::NucleicSeqOrView{DNAAlphabet{N}}`: The DNA sequence to be evaluated.
+
+## Keyword Arguments
+- `codingmodel::BioMarkovChain`: The transition model for coding regions, (default: `ECOLICDS`).
+- `noncodingmodel::BioMarkovChain`: The transition model for non-coding regions, (default: `ECOLINOCDS`)
+- `η::Float64 = 1e-5`: The threshold value (eta) for the log-odds ratio (default: 1e-5).
+
+# Returns
+- `true` if the sequence is likely to be coding.
+- `false` if the sequence is likely to be non-coding.
+
+# Raises
+- `ErrorException`: if the length of the sequence is not divisible by 3.
+- `ErrorException`: if the sequence contains a premature stop codon.
+
+# Example
+
+```
+sequence = dna"ATGGCATCTAG"
+iscoding(sequence)  # Returns: true or false
+```
+"""
+function lordr( #log_odds_ratio_decision, also lordr/cudr/kfdr/aadr
+    sequence::NucleicSeqOrView{DNAAlphabet{N}};
+    codingmodel::BioMarkovChain = ECOLICDS,
+    noncodingmodel::BioMarkovChain = ECOLINOCDS,
+    η::Float64 = 1e-5
+) where {N}
+    pcoding = dnaseqprobability(sequence, codingmodel)
+    pnoncoding = dnaseqprobability(sequence, noncodingmodel)
+
+    logodds = log(pcoding / pnoncoding)
+
+    length(sequence) % 3 == 0 || error("The sequence is not divisible by 3")
+
+    !hasprematurestop(sequence) || error("There is a premature stop codon in the sequence")
+
+    if logodds > η
+        return true
+    else
+        false
+    end
+end
+
+const log_odds_ratio_decision_rule = lordr # criteria
+const lors = log_odds_ratio_score # from BioMarkovChains
\ No newline at end of file
diff --git a/src/findorfs.jl b/src/findorfs.jl
index 76d8f38..9d093a9 100644
--- a/src/findorfs.jl
+++ b/src/findorfs.jl
@@ -5,17 +5,15 @@ export findorfs
 
 This is the main interface method for finding open reading frames (ORFs) in a DNA sequence.
 
-It takes the following arguments:
+It takes the following required arguments:
 
 - `sequence`: The nucleic acid sequence to search for ORFs.
 - `method`: The algorithm used to find ORFs. It can be either `NaiveFinder()`, `NaiveFinderScored()` or yet other implementations.
 
 ## Keyword Arguments regardless of the finder method:
-- `alternative_start`: A boolean indicating whether to consider alternative start codons. Default is `false`.
-- `min_len`: The minimum length of an ORF. Default is `6`.
-
-## Keyword Arguments for `NaiveFinderScored()`:
-- `scoringscheme::BioMarkovChain`: The scoring scheme to use for the scoring algorithm. Default is `ECOLICDS`.
+- `alternative_start::Bool`: A boolean indicating whether to consider alternative start codons. Default is `false`.
+- `minlen::Int`: The minimum length of an ORF. Default is `6`.
+- `scheme::Function`: The scoring scheme to use for scoring the sequence from the ORF. Default is `nothing`.
 
 ## Returns
 A vector of `ORF` objects representing the found ORFs.
@@ -31,30 +29,21 @@ sequence = randdnaseq(120)
 findorfs(sequence, NaiveFinder())
 
 1-element Vector{ORF}:
- ORF(77:118, '-', 2, 0.0)
+ ORF{NaiveFinder}(77:118, '-', 2, 0.0)
 ```
 
 """
 function findorfs end
 
 function findorfs(
-    sequence::NucleicSeqOrView{DNAAlphabet{N}},
-    ::NaiveFinder;
-    kwargs...
-) where {N}
-    return naivefinder(sequence; kwargs...)::Vector{ORF}
-end
-
-function findorfs(
-    sequence::NucleicSeqOrView{DNAAlphabet{N}},
-    ::NaiveFinderScored; # Should we decoupled the FinderMethod from the ScoringMethod? if so, what strategy to use for add the parameter (as a kwargs, as a ::SpecificScoringMethod or as a ::Type{M} where M<:ScoringMethod)?
+    sequence::NucleicSeqOrView{DNAAlphabet{N}};
+    finder::Type{F}=NaiveFinder,
     kwargs...
-) where {N}
-    return naivefinderscored(sequence; kwargs...)::Vector{ORF} #alternative_start, min_len, scoringscheme
+) where {N, F<:GeneFinderMethod}
+    return finder(sequence; kwargs...)::Vector{ORF{N,F}}
 end
 
 
-
 ### Some possible ideas:
 
 # function findorfs(
@@ -62,7 +51,7 @@ end
 #     method::Type{M};
 #     kwargs...
 # ) where {N, M<:GeneFinderMethod}
-#     return method(sequence; kwargs...)::Vector{ORF} #alternative_start, min_len, scoringscheme
+#     return method(sequence; kwargs...)::Vector{ORF} #alternative_start, minlen, scoringscheme
 # end
 
 # This general implementation would allow for the addition of new finder methods without changing the findorfs method.
@@ -82,7 +71,7 @@ end
 #     function NaiveFinderScored(
 #         sequence::NucleicSeqOrView{DNAAlphabet{N}};
 #         alternative_start::Bool = false,
-#         min_len::Int64 = 6,
+#         minlen::Int64 = 6,
 #         scoringscheme::BioMarkovChain = ECOLICDS,
 #         byf::Symbol = :location,
 #         kwargs...
@@ -94,7 +83,7 @@ end
 #             seq = strand == '-' ? reverse_complement(sequence) : sequence
 
 #             @inbounds for location in @views _locationiterator(seq; alternative_start)
-#                 if length(location) >= min_len
+#                 if length(location) >= minlen
 #                     frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
 #                     start = strand == '+' ? location.start : seqlen - location.stop + 1
 #                     stop = start + length(location) - 1
diff --git a/src/getorfs.jl b/src/getorfs.jl
index b8fefa5..c487aeb 100644
--- a/src/getorfs.jl
+++ b/src/getorfs.jl
@@ -1,142 +1,131 @@
-export getorfs
-
-#### get_orfs_* methods ####
-
-"""
-    getorfs(sequence::NucleicSeqOrView{DNAAlphabet{N}}, ::Alphabet, method::M; kwargs...) where {N, M<:GeneFinderMethod}
-
-This function takes a `NucleicSeqOrView{DNAAlphabet{N}}` sequence and identifies the open reading frames (ORFs) using the `findorfs()` function under the selected finder method. The function then extracts the DNA or aminoacid sequence of each ORF and stores it in a `Vector{LongSubSeq{A}}`.
-
-# Arguments
-
-- `sequence`: The input sequence as a `NucleicSeqOrView{DNAAlphabet{N}}`
-- `alphabet`: The output sequence alphabet. It can be either `DNAAlphabet{4}()` or `AminoAcidAlphabet()`.
-- `method`: The method used to find ORFs. It must be a subtype of `GeneFinderMethod`. (e.g., `NaiveFinder()`)
-
-# Keyword Arguments
-
-- `alternative_start::Bool=false`: If set to `true`, the function considers alternative start codons when searching for ORFs. This increases the execution time by approximately 3x.
-- `min_len::Int64=6`: The minimum length of the allowed ORF. By default, it allows ORFs that can encode at least one amino acid (e.g., `aa"M*"`).
-
-# Examples
-
-```julia
-sequence = randdnaseq(120)
-
-    120nt DNA Sequence:
-    GCCGGACAGCGAAGGCTAATAAATGCCCGTGCCAGTATC…TCTGAGTTACTGTACACCCGAAAGACGTTGTACGCATTT
-
-
-getorfs(sequence, DNAAlphabet{4}(), NaiveFinder())
-
-    1-element Vector{LongSubSeq{DNAAlphabet{4}}}:
-     ATGCGTACAACGTCTTTCGGGTGTACAGTAACTCAGAGCTAA
-
-```
-"""
-function getorfs(
-    sequence::NucleicSeqOrView{DNAAlphabet{N}},
-    ::DNAAlphabet{N},
-    method::M;
-    alternative_start::Bool = false,
-    min_len::Int64 = 6,
-    kwargs...
-) where {N,M<:GeneFinderMethod}
-    orfs = findorfs(sequence, method; alternative_start, min_len)
-    seqs = Vector{LongSubSeq{DNAAlphabet{N}}}(undef, length(orfs)) #Vector{}(undef, length(orfs)) # todo correct the output type
-    
-    @inbounds for (i, orf) in enumerate(orfs)
-        seqs[i] = sequence[orf]
-    end
-
-    return seqs
-end
-
-function getorfs(
-    sequence::NucleicSeqOrView{DNAAlphabet{N}},
-    ::AminoAcidAlphabet,
-    method::M;
-    alternative_start::Bool = false,
-    min_len::Int64 = 6,
-    code::GeneticCode = ncbi_trans_table[1]
-) where {N,M<:GeneFinderMethod}
-    orfs = findorfs(sequence, method; alternative_start, min_len)
-    seqs = Vector{LongSubSeq{AminoAcidAlphabet}}(undef, length(orfs))
-    
-    @inbounds for (i, orf) in enumerate(orfs)
-        seqs[i] = translate(sequence[orf]; code)
-    end
-
-    return seqs
-end
-
-#### record_orfs_* methods ####
-
-"""
-    record_orfs_fna(sequence::NucleicSeqOrView{DNAAlphabet{N}}; kwargs...) where {N}
-
-Record Open Reading Frames (ORFs) in a nucleic acid sequence.
-
-# Arguments
-- `sequence::NucleicSeqOrView{DNAAlphabet{N}}`: The nucleic acid sequence to search for ORFs.
-- `alternative_start::Bool=false`: If `true`, consider alternative start codons for ORFs.
-- `min_len::Int=6`: The minimum length of an ORF to be recorded.
-
-# Returns
-An array of `FASTARecord` objects representing the identified ORFs.
-
-# Description
-This function searches for Open Reading Frames (ORFs) in a given nucleic acid sequence. An ORF is a sequence of DNA that starts with a start codon and ends with a stop codon, without any other stop codons in between. By default, only the standard start codon (ATG) is considered, but if `alternative_start` is set to `true`, alternative start codons are also considered. The minimum length of an ORF to be recorded can be specified using the `min_len` argument.
-"""
-# function record_orfs_fna(
-#     sequence::NucleicSeqOrView{DNAAlphabet{N}}; 
-#     alternative_start::Bool = false, 
-#     min_len::Int64 = 6
-# ) where {N}
-#     orfs = findorfs(sequence; alternative_start, min_len)
-#     norfs = length(orfs)
-#     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-#     records = FASTARecord[]
-#     @inbounds for (index, orf) in enumerate(orfs)
-#         id = string(lpad(string(index), padding, "0"))
-#         header = "ORF$(id) id=$(id) start=$(orf.location.start) stop=$(orf.location.stop) strand=$(orf.strand) frame=$(orf.frame)"
-#         record = FASTARecord(header, sequence[orf])
-#         push!(records, record)
-#     end
-#     return records
+# export getorfs
+
+# #### get_orfs_* methods ####
+
+# """
+#     getorfs(sequence::NucleicSeqOrView{DNAAlphabet{N}}, ::Alphabet, method::M; kwargs...) where {N, M<:GeneFinderMethod}
+
+# This function takes a `NucleicSeqOrView{DNAAlphabet{N}}` sequence and identifies the open reading frames (ORFs) using the `findorfs()` function under the selected finder method. The function then extracts the DNA or aminoacid sequence of each ORF and stores it in a `Vector{LongSubSeq{A}}`.
+
+# # Arguments
+
+# - `sequence`: The input sequence as a `NucleicSeqOrView{DNAAlphabet{N}}`
+# - `alphabet`: The output sequence alphabet. It can be either `DNAAlphabet{4}()` or `AminoAcidAlphabet()`.
+# - `method`: The method used to find ORFs. It must be a subtype of `GeneFinderMethod`. (e.g., `NaiveFinder()`)
+
+# # Keyword Arguments
+
+# - `alternative_start::Bool=false`: If set to `true`, the function considers alternative start codons when searching for ORFs. This increases the execution time by approximately 3x.
+# - `minlen::Int64=6`: The minimum length of the allowed ORF. By default, it allows ORFs that can encode at least one amino acid (e.g., `aa"M*"`).
+
+# # Examples
+
+# ```julia
+# sequence = randdnaseq(120)
+
+#     120nt DNA Sequence:
+#     GCCGGACAGCGAAGGCTAATAAATGCCCGTGCCAGTATC…TCTGAGTTACTGTACACCCGAAAGACGTTGTACGCATTT
+
+
+# getorfs(sequence, DNAAlphabet{4}(), NaiveFinder())
+
+#     1-element Vector{LongSubSeq{DNAAlphabet{4}}}:
+#      ATGCGTACAACGTCTTTCGGGTGTACAGTAACTCAGAGCTAA
+
+# ```
+# """
+# function getorfs(
+#     seq::NucleicSeqOrView{DNAAlphabet{N}},
+#     ::DNAAlphabet{N};
+#     finder::Type{F}=NaiveFinder,
+#     alternative_start::Bool = false,
+#     minlen::Int64 = 6,
+#     kwargs...
+# ) where {N,F<:GeneFinderMethod}
+#     orfs = findorfs(seq; finder, alternative_start, minlen)
+#     return sequence.(orfs)
 # end
 
-"""
-    record_orfs_faa(sequence::NucleicSeqOrView{DNAAlphabet{N}}; kwargs...) where {N}
-
-This function takes a nucleic acid sequence and finds all open reading frames (ORFs) in the sequence. 
-An ORF is a sequence of DNA that starts with a start codon and ends with a stop codon. 
-The function returns a list of FASTA records, where each record represents an ORF in the sequence.
-
-# Arguments
-- `sequence`: The nucleic acid sequence to search for ORFs.
-- `alternative_start`: A boolean indicating whether to consider alternative start codons. Default is `false`.
-- `code`: The genetic code to use for translation. Default is the NCBI translation table 1.
-- `min_len`: The minimum length of an ORF. ORFs shorter than this length will be ignored. Default is 6.
-
-# Returns
-- A list of FASTA records representing the ORFs found in the sequence.
-"""
-# function record_orfs_faa(
-#     sequence::NucleicSeqOrView{DNAAlphabet{N}};
-#     alternative_start::Bool = false, 
+# function getorfs(
+#     seq::NucleicSeqOrView{DNAAlphabet{N}},
+#     ::AminoAcidAlphabet;
+#     finder::Type{F}=NaiveFinder,
+#     alternative_start::Bool = false,
+#     minlen::Int64 = 6,
 #     code::GeneticCode = ncbi_trans_table[1],
-#     min_len::Int64 = 6
-# ) where {N}
-#     orfs = findorfs(sequence; alternative_start, min_len)
-#     norfs = length(orfs)
-#     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-#     records = FASTARecord[]
-#     @inbounds for (index, orf) in enumerate(orfs)
-#         id = string(lpad(string(index), padding, "0"))
-#         header = "ORF$(id) id=$(id) start=$(orf.location.start) stop=$(orf.location.stop) strand=$(orf.strand) frame=$(orf.frame)"
-#         record = FASTARecord(header, translate(sequence[orf]; code))
-#         push!(records, record)
-#     end
-#     return records
+#     kwargs...
+# ) where {N,F<:GeneFinderMethod}
+#     orfs = findorfs(seq; finder, alternative_start, minlen)
+#     return @. translate(sequence(orfs); code)
 # end
+
+# #### record_orfs_* methods ####
+
+# """
+#     record_orfs_fna(sequence::NucleicSeqOrView{DNAAlphabet{N}}; kwargs...) where {N}
+
+# Record Open Reading Frames (ORFs) in a nucleic acid sequence.
+
+# # Arguments
+# - `sequence::NucleicSeqOrView{DNAAlphabet{N}}`: The nucleic acid sequence to search for ORFs.
+# - `alternative_start::Bool=false`: If `true`, consider alternative start codons for ORFs.
+# - `minlen::Int=6`: The minimum length of an ORF to be recorded.
+
+# # Returns
+# An array of `FASTARecord` objects representing the identified ORFs.
+
+# # Description
+# This function searches for Open Reading Frames (ORFs) in a given nucleic acid sequence. An ORF is a sequence of DNA that starts with a start codon and ends with a stop codon, without any other stop codons in between. By default, only the standard start codon (ATG) is considered, but if `alternative_start` is set to `true`, alternative start codons are also considered. The minimum length of an ORF to be recorded can be specified using the `minlen` argument.
+# """
+# # function record_orfs_fna(
+# #     sequence::NucleicSeqOrView{DNAAlphabet{N}}; 
+# #     alternative_start::Bool = false, 
+# #     minlen::Int64 = 6
+# # ) where {N}
+# #     orfs = findorfs(sequence; alternative_start, minlen)
+# #     norfs = length(orfs)
+# #     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
+# #     records = FASTARecord[]
+# #     @inbounds for (index, orf) in enumerate(orfs)
+# #         id = string(lpad(string(index), padding, "0"))
+# #         header = "ORF$(id) id=$(id) start=$(orf.location.start) stop=$(orf.location.stop) strand=$(orf.strand) frame=$(orf.frame)"
+# #         record = FASTARecord(header, sequence[orf])
+# #         push!(records, record)
+# #     end
+# #     return records
+# # end
+
+# """
+#     record_orfs_faa(sequence::NucleicSeqOrView{DNAAlphabet{N}}; kwargs...) where {N}
+
+# This function takes a nucleic acid sequence and finds all open reading frames (ORFs) in the sequence. 
+# An ORF is a sequence of DNA that starts with a start codon and ends with a stop codon. 
+# The function returns a list of FASTA records, where each record represents an ORF in the sequence.
+
+# # Arguments
+# - `sequence`: The nucleic acid sequence to search for ORFs.
+# - `alternative_start`: A boolean indicating whether to consider alternative start codons. Default is `false`.
+# - `code`: The genetic code to use for translation. Default is the NCBI translation table 1.
+# - `minlen`: The minimum length of an ORF. ORFs shorter than this length will be ignored. Default is 6.
+
+# # Returns
+# - A list of FASTA records representing the ORFs found in the sequence.
+# """
+# # function record_orfs_faa(
+# #     sequence::NucleicSeqOrView{DNAAlphabet{N}};
+# #     alternative_start::Bool = false, 
+# #     code::GeneticCode = ncbi_trans_table[1],
+# #     minlen::Int64 = 6
+# # ) where {N}
+# #     orfs = findorfs(sequence; alternative_start, minlen)
+# #     norfs = length(orfs)
+# #     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
+# #     records = FASTARecord[]
+# #     @inbounds for (index, orf) in enumerate(orfs)
+# #         id = string(lpad(string(index), padding, "0"))
+# #         header = "ORF$(id) id=$(id) start=$(orf.location.start) stop=$(orf.location.stop) strand=$(orf.strand) frame=$(orf.frame)"
+# #         record = FASTARecord(header, translate(sequence[orf]; code))
+# #         push!(records, record)
+# #     end
+# #     return records
+# # end
diff --git a/src/io.jl b/src/io.jl
index 9e0a88f..8924eaf 100644
--- a/src/io.jl
+++ b/src/io.jl
@@ -15,16 +15,17 @@ Write BED data to a file.
 
 # Keywords
 - `alternative_start::Bool=false`: If true, alternative start codons will be used when identifying CDSs. Default is `false`.
-- `min_len::Int64=6`: The minimum length that a CDS must have in order to be included in the output file. Default is `6`.
+- `minlen::Int64=6`: The minimum length that a CDS must have in order to be included in the output file. Default is `6`.
 """
 function write_orfs_bed(
     input::Union{String,NucleicSeqOrView{DNAAlphabet{N}}},
-    output::Union{IOStream, IOBuffer},
-    method::M;
+    output::Union{IOStream, IOBuffer};
+    finder::Type{F}=NaiveFinder,
     alternative_start::Bool = false,
-    min_len::Int64 = 6
-) where {N , M<:GeneFinderMethod}
-    orfs = findorfs(input, method; alternative_start, min_len)
+    minlen::Int64 = 6
+) where {N,F<:GeneFinderMethod}
+    sequence = typeof(input) === String ? fasta2bioseq(input)[1] : input
+    orfs = findorfs(sequence; finder, alternative_start, minlen)
     @inbounds for orf in orfs
         println(output, orf.first, "\t", orf.last, "\t", orf.strand, "\t", orf.frame)
     end
@@ -32,12 +33,13 @@ end
 
 function write_orfs_bed(
     input::Union{String,NucleicSeqOrView{DNAAlphabet{N}}},
-    output::String,
-    method::M;
+    output::String;
+    finder::Type{F}=NaiveFinder,
     alternative_start::Bool = false,
-    min_len::Int64 = 6
-) where {N, M<:GeneFinderMethod}
-    orfs = findorfs(input, method; alternative_start, min_len)
+    minlen::Int64 = 6
+) where {N, F<:GeneFinderMethod}
+    sequence = typeof(input) === String ? fasta2bioseq(input)[1] : input
+    orfs = findorfs(sequence; finder, alternative_start, minlen)
     open(output, "w") do f
         @inbounds for orf in orfs
             write(f, "$(orf.first)\t$(orf.last)\t$(orf.strand)\t$(orf.frame)\n")
@@ -45,36 +47,6 @@ function write_orfs_bed(
     end
 end
 
-# function write_orfs_bed(
-#     input::String,
-#     output::Union{IOStream, IOBuffer},
-#     method::M;
-#     alternative_start::Bool = false,
-#     min_len::Int64 = 6
-# ) where {M<:GeneFinderMethod}
-#     input = fasta_to_dna(input)[1] # rewrite input to be a DNA sequence
-#     orfs = findorfs(input, method; alternative_start, min_len)
-#     @inbounds for orf in orfs
-#         println(output, orf.first, "\t", orf.last, "\t", orf.strand, "\t", orf.frame)
-#     end
-# end
-
-# function write_orfs_bed(
-#     input::String,
-#     output::String,
-#     method::M;
-#     alternative_start::Bool = false,
-#     min_len::Int64 = 6
-# ) where {M<:GeneFinderMethod}
-#     input = fasta_to_dna(input)[1]
-#     orfs = findorfs(input, method; alternative_start, min_len)
-#     open(output, "w") do f
-#         @inbounds for orf in orfs
-#             write(f, "$(orf.first)\t$(orf.last)\t$(orf.strand)\t$(orf.frame)\n")
-#         end
-#     end
-# end
-
 """
     write_orfs_fna(input::NucleicSeqOrView{DNAAlphabet{N}}, output::Union{IOStream, IOBuffer}, method::M; kwargs...) where {N, M<:GeneFinderMethod}
     write_orfs_fna(input::NucleicSeqOrView{DNAAlphabet{N}}, output::String, method::M; kwargs...) where {N, M<:GeneFinderMethod}
@@ -92,7 +64,7 @@ Write a file containing the coding sequences (CDSs) of a given DNA sequence to t
 # Keywords
 
 - `alternative_start::Bool=false`: If true, alternative start codons will be used when identifying CDSs. Default is `false`.
-- `min_len::Int64=6`: The minimum length that a CDS must have in order to be included in the output file. Default is `6`.
+- `minlen::Int64=6`: The minimum length that a CDS must have in order to be included in the output file. Default is `6`.
 
 # Examples
 
@@ -108,76 +80,41 @@ end
 """
 function write_orfs_fna(
     input::Union{String, NucleicSeqOrView{DNAAlphabet{N}}},
-    output::Union{IOStream, IOBuffer},
-    method::M;
+    output::Union{IOStream, IOBuffer};
+    finder::Type{F}=NaiveFinder,
     alternative_start::Bool = false, 
-    min_len::Int64 = 6
-) where {N, M<:GeneFinderMethod}
-    orfs = findorfs(input, method; alternative_start, min_len)
+    minlen::Int64 = 6
+) where {N, F<:GeneFinderMethod}
+    seq = typeof(input) === String ? fasta2bioseq(input)[1] : input
+    orfs = findorfs(seq; finder, alternative_start, minlen)
     norfs = length(orfs)
     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
     @inbounds for (i, orf) in enumerate(orfs)
         id = string(lpad(string(i), padding, "0"))
         println(output, ">", orf.groupname, " id=", id, " start=", orf.first, " stop=", orf.last, " strand=", orf.strand, " frame=", orf.frame, " score=", orf.score)
-        println(output, input[orf])
+        println(output, sequence(orf))
     end
 end
 
 function write_orfs_fna(
     input::Union{String,NucleicSeqOrView{DNAAlphabet{N}}}, 
-    output::String,
-    method::M;
+    output::String;
+    finder::Type{F}=NaiveFinder,
     alternative_start::Bool = false,
-    min_len::Int64 = 6
-) where {N, M<:GeneFinderMethod}
-    orfs = findorfs(input, method; alternative_start, min_len)
+    minlen::Int64 = 6
+) where {N, F<:GeneFinderMethod}
+    seq = typeof(input) === String ? fasta2bioseq(input)[1] : input
+    orfs = findorfs(seq; finder, alternative_start, minlen)
     norfs = length(orfs)
     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
     open(output, "w") do f
         for (i, orf) in enumerate(orfs)
             id = string(lpad(string(i), padding, "0"))
-            write(f, ">$(orf.groupname) id=$(id) start=$(orf.first) stop=$(orf.last) strand=$(orf.strand) frame=$(orf.frame) score=$(orf.score)\n$(input[orf])\n") # i.strand == '+' ? input[i.location] : reverse_complement(@view input[i.location])
+            write(f, ">$(orf.groupname) id=$(id) start=$(orf.first) stop=$(orf.last) strand=$(orf.strand) frame=$(orf.frame) score=$(orf.score)\n$(sequence(orf))\n") # i.strand == '+' ? input[i.location] : reverse_complement(@view input[i.location])
         end
     end
 end
 
-# function write_orfs_fna(
-#     input::String,
-#     output::Union{IOStream, IOBuffer},
-#     method::M;
-#     alternative_start::Bool = false, 
-#     min_len::Int64 = 6
-# ) where {M<:GeneFinderMethod}
-#     input = fasta_to_dna(input)[1]
-#     orfs = findorfs(input, method; alternative_start, min_len)
-#     norfs = length(orfs)
-#     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-#     @inbounds for (i, orf) in enumerate(orfs)
-#         id = string(lpad(string(i), padding, "0"))
-#         println(output, ">ORF", id, " id=", id, " start=", orf.first, " stop=", orf.last, " strand=", orf.strand, " frame=", orf.frame)
-#         println(output, input[orf])
-#     end
-# end
-
-# function write_orfs_fna(
-#     input::String, 
-#     output::String,
-#     method::M;
-#     alternative_start::Bool = false,
-#     min_len::Int64 = 6
-# ) where {M<:GeneFinderMethod}
-#     input = fasta_to_dna(input)[1]
-#     orfs = findorfs(input, method; alternative_start, min_len)
-#     norfs = length(orfs)
-#     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-#     open(output, "w") do f
-#         for (i, orf) in enumerate(orfs)
-#             id = string(lpad(string(i), padding, "0"))
-#             write(f, ">ORF$(id) id=$(id) start=$(orf.first) stop=$(orf.last) strand=$(orf.strand) frame=$(orf.frame)\n$(input[orf])\n") # i.strand == '+' ? input[i.location] : reverse_complement(@view input[i.location])
-#         end
-#     end
-# end
-
 """
 	write_orfs_faa(input::NucleicSeqOrView{DNAAlphabet{4}}, output::Union{IOStream, IOBuffer}, method::M; kwargs...) where {N, M<:GeneFinderMethod}
 	write_orfs_faa(input::NucleicSeqOrView{DNAAlphabet{4}}, output::String, method::M; kwargs...) where {N, M<:GeneFinderMethod}
@@ -195,7 +132,7 @@ Write the protein sequences encoded by the coding sequences (CDSs) of a given DN
 
 - `code::GeneticCode=BioSequences.standard_genetic_code`: The genetic code by which codons will be translated. See `BioSequences.ncbi_trans_table` for more info. 
 - `alternative_start::Bool=false`: If true will pass the extended start codons to search. This will increase 3x the exec. time.
-- `min_len::Int64=6`:  Length of the allowed ORF. Default value allow `aa"M*"` a posible encoding protein from the resulting ORFs.
+- `minlen::Int64=6`:  Length of the allowed ORF. Default value allow `aa"M*"` a posible encoding protein from the resulting ORFs.
 
 # Examples 
 
@@ -210,77 +147,40 @@ end
 ```
 """
 function write_orfs_faa(
-    input::NucleicSeqOrView{DNAAlphabet{N}},
-    output::Union{IOStream, IOBuffer},
-    method::M;
+    input::Union{String, NucleicSeqOrView{DNAAlphabet{N}}},
+    output::Union{IOStream, IOBuffer};
+    finder::Type{F}=NaiveFinder,
     alternative_start::Bool = false,
-    min_len::Int64 = 6,
+    minlen::Int64 = 6,
     code::GeneticCode = ncbi_trans_table[1]
-) where {N, M<:GeneFinderMethod}
-    orfs = findorfs(input, method; alternative_start, min_len)
+) where {N,F<:GeneFinderMethod}
+    seq = typeof(input) === String ? fasta2bioseq(input)[1] : input
+    orfs = findorfs(seq; finder, alternative_start, minlen)
     norfs = length(orfs)
     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
     @inbounds for (i, orf) in enumerate(orfs)
         id = string(lpad(string(i), padding, "0"))
         println(output, ">ORF", id, " id=", id, " start=", orf.first, " stop=", orf.last, " strand=", orf.strand, " frame=", orf.frame)
-        println(output, translate(input[orf]; code))
-    end
-end
-
-function write_orfs_faa(
-    input::NucleicSeqOrView{DNAAlphabet{N}},
-    output::String,
-    method::M;
-    alternative_start::Bool = false,
-    min_len::Int64 = 6,
-    code::GeneticCode = ncbi_trans_table[1]
-) where {N, M<:GeneFinderMethod}
-    orfs = findorfs(input, method; alternative_start, min_len)
-    norfs = length(orfs)
-    padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-    open(output, "w") do f
-        for (i, orf) in enumerate(orfs)
-            id = string(lpad(string(i), padding, "0"))
-            write(f, ">ORF$(id) id=$(id) start=$(orf.first) stop=$(orf.last) strand=$(orf.strand) frame=$(orf.frame)\n$(translate(input[orf]; code))\n")
-        end
+        println(output, translate(orf; code))
     end
 end
 
 function write_orfs_faa(
-    input::String,
-    output::Union{IOStream, IOBuffer},
-    method::M;
+    input::Union{String, NucleicSeqOrView{DNAAlphabet{N}}},
+    output::String;
+    finder::Type{F}=NaiveFinder,
     alternative_start::Bool = false,
-    min_len::Int64 = 6,
+    minlen::Int64 = 6,
     code::GeneticCode = ncbi_trans_table[1]
-) where {M<:GeneFinderMethod}
-    input = fasta_to_dna(input)[1]
-    orfs = findorfs(input, method; alternative_start, min_len)
-    norfs = length(orfs)
-    padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-    @inbounds for (i, orf) in enumerate(orfs)
-        id = string(lpad(string(i), padding, "0"))
-        println(output, ">ORF", id, " id=", id, " start=", orf.first, " stop=", orf.last, " strand=", orf.strand, " frame=", orf.frame)
-        println(output, translate(input[orf]; code))
-    end
-end
-
-function write_orfs_faa(
-    input::String,
-    output::String,
-    method::M;
-    alternative_start::Bool = false,
-    code::GeneticCode = ncbi_trans_table[1],
-    min_len::Int64 = 6,
-) where {M<:GeneFinderMethod}
-    input = fasta_to_dna(input)[1]
-    orfs = findorfs(input, method; alternative_start, min_len)
+) where {N,F<:GeneFinderMethod}
+    seq = typeof(input) === String ? fasta2bioseq(input)[1] : input
+    orfs = findorfs(seq; finder, alternative_start, minlen)
     norfs = length(orfs)
     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
     open(output, "w") do f
         for (i, orf) in enumerate(orfs)
             id = string(lpad(string(i), padding, "0"))
-            write(f, ">ORF$(id) id=$(id) start=$(orf.first) stop=$(orf.last) strand=$(orf.strand) frame=$(orf.frame)\n$(translate(input[orf]; code))\n")
+            write(f, ">ORF$(id) id=$(id) start=$(orf.first) stop=$(orf.last) strand=$(orf.strand) frame=$(orf.frame)\n$(translate(orf; code))\n")
         end
     end
 end
@@ -302,34 +202,36 @@ Write GFF data to a file.
 
 - `code::GeneticCode=BioSequences.standard_genetic_code`: The genetic code by which codons will be translated. See `BioSequences.ncbi_trans_table` for more info. 
 - `alternative_start::Bool=false`: If true will pass the extended start codons to search. This will increase 3x the exec. time.
-- `min_len::Int64=6`:  Length of the allowed ORF. Default value allow `aa"M*"` a posible encoding protein from the resulting ORFs.
+- `minlen::Int64=6`:  Length of the allowed ORF. Default value allow `aa"M*"` a posible encoding protein from the resulting ORFs.
 
 """
 function write_orfs_gff(
-    input::NucleicSeqOrView{A},
-    output::Union{IOStream, IOBuffer},
-    method::M;
+    input::Union{String, NucleicSeqOrView{DNAAlphabet{N}}},
+    output::Union{IOStream, IOBuffer};
+    finder::Type{F}=NaiveFinder,
     alternative_start::Bool = false,
-    min_len::Int64 = 6,
-) where {A, M<:GeneFinderMethod}
-    orfs = findorfs(input, method; alternative_start, min_len)
+    minlen::Int64 = 6,
+) where {N, F<:GeneFinderMethod}
+    seq = typeof(input) === String ? fasta2bioseq(input)[1] : input
+    orfs = findorfs(seq; finder, alternative_start, minlen)
     norfs = length(orfs)
     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
     println(output, "##gff-version 3\n##sequence-region Chr 1 $(length(input))")
     for (i, orf) in enumerate(orfs)
         id = string("ORF", lpad(string(i), padding, "0"))
         println(output, "Chr\t.\tORF\t", orf.first, "\t", orf.last, "\t.\t", orf.strand, "\t.\tID=", id, ";Name=", id, ";Frame=", orf.frame)
-        end
+    end
 end
 
 function write_orfs_gff(
-    input::NucleicSeqOrView{A},
-    output::String,
-    method::M;
+    input::Union{String, NucleicSeqOrView{DNAAlphabet{N}}},
+    output::String;
+    finder::Type{F}=NaiveFinder,
     alternative_start::Bool = false,
-    min_len::Int64 = 6,
-) where {A, M<:GeneFinderMethod}
-    orfs = findorfs(input, method; alternative_start, min_len)
+    minlen::Int64 = 6,
+) where {N, F<:GeneFinderMethod}
+    seq = typeof(input) === String ? fasta2bioseq(input)[1] : input
+    orfs = findorfs(seq; finder, alternative_start, minlen)
     norfs = length(orfs)
     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
     open(output, "w") do f
@@ -344,45 +246,5 @@ function write_orfs_gff(
     end
 end
 
-function write_orfs_gff(
-    input::String, 
-    output::Union{IOStream, IOBuffer},
-    method::M;
-    alternative_start::Bool = false, 
-    min_len::Int64 = 6,
-) where {M<:GeneFinderMethod}
-    input = fasta_to_dna(input)[1]
-    orfs = findorfs(input, method; alternative_start, min_len)
-    norfs = length(orfs)
-    padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-    println(output, "##gff-version 3\n##sequence-region Chr 1 $(length(input))")
-    for (i, orf) in enumerate(orfs)
-        id = string("ORF", lpad(string(i), padding, "0"))
-        println(output, "Chr\t.\tORF\t", orf.first, "\t", orf.last, "\t.\t", orf.strand, "\t.\tID=", id, ";Name=", id, ";Frame=", orf.frame)
-    end
-end
-
-function write_orfs_gff(
-    input::String,
-    output::String,
-    method::M;
-    alternative_start::Bool = false, 
-    min_len::Int64 = 6,
-) where {M<:GeneFinderMethod}
-    input = fasta_to_dna(input)[1]
-    orfs = findorfs(input, method; alternative_start, min_len)
-    norfs = length(orfs)
-    padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-    open(output, "w") do f
-        write(f, "##gff-version 3\n##sequence-region Chr 1 $(length(input))\n") 
-        for (i, orf) in enumerate(orfs)
-            id = string("ORF", lpad(string(i), padding, "0"))
-            write(
-                f,
-                "Chr\t.\tORF\t$(orf.first)\t$(orf.last)\t.\t$(orf.strand)\t.\tID=$(id);Name=$(id);Frame=$(orf.frame)\n",
-            )
-        end
-    end
-end
 
 # Chr needs to be changed for fasta (?) or the name of seq (?) to fit well on IGV 
\ No newline at end of file
diff --git a/src/iscoding.jl b/src/iscoding.jl
index a0d31c8..982efbc 100644
--- a/src/iscoding.jl
+++ b/src/iscoding.jl
@@ -29,10 +29,10 @@ end
 
 function iscoding(
     sequence::NucleicSeqOrView{DNAAlphabet{N}},
-    orf::ORF{M};
+    orf::ORF{N,F};
     criteria::Function = lordr,
     kwargs...
-) where {N,M <: GeneFinderMethod}
+) where {N,F<:GeneFinderMethod}
     return criteria(sequence[orf]; kwargs...)
 end
 
diff --git a/src/utils.jl b/src/utils.jl
index 404bf9f..27bf9f3 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,4 +1,4 @@
-export hasprematurestop, fasta_to_dna, get_variable_name
+export hasprematurestop, fasta2bioseq, get_var_name
 # General purposes methods supporting main functions
 
 """
@@ -27,17 +27,17 @@ function hasprematurestop(sequence::NucleicSeqOrView{DNAAlphabet{N}})::Bool wher
 end
 
 """
-    fasta_to_dna(input::String)
+    fasta2bioseq(input::String)
 
 Converts a FASTA formatted file (even if it is a multi-fasta) to an array of `LongSequence{DNAAlphabet{4}}` objects.
 """
-function fasta_to_dna(input::AbstractString)::Vector{LongSequence{DNAAlphabet{4}}}
+function fasta2bioseq(input::AbstractString)::Vector{LongSequence{DNAAlphabet{4}}}
     FASTAReader(open(input)) do reader
         return [LongSequence{DNAAlphabet{4}}(sequence(record)) for record in reader]
     end
 end
 
-function get_variable_name(var)
+function get_var_name(var::NucleicSeqOrView{DNAAlphabet{N}}) where {N}
     for name in names(Main)
         if getfield(Main, name) === var
             return string(name)
diff --git a/test/findorfstest.jl b/test/findorfstest.jl
index 2eb9081..3596075 100644
--- a/test/findorfstest.jl
+++ b/test/findorfstest.jl
@@ -21,7 +21,7 @@
 #     # 2) A common start codon sequence at 426:590
 #     # On the other hand, the NCBI ORFfinder program predicts 9 ORFs whose length is greater than 75 nt, from which one has an "outbound" stop
 #     seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"
-#     orfs03 = findorfs(seq03, NaiveFinder(), min_len=75)
+#     orfs03 = findorfs(seq03, NaiveFinder(), minlen=75)
 #     @test length(orfs03) == 9
 #     @test orfs03 == [ORF{NaiveFinder}(37:156, '+', 1, nothing), ORF{NaiveFinder}(194:268, '-', 2, nothing), ORF{NaiveFinder}(194:283, '-', 2, nothing), ORF{NaiveFinder}(249:347, '+', 3, nothing), ORF{NaiveFinder}(426:590, '+', 3, nothing), ORF{NaiveFinder}(565:657, '+', 1, nothing), ORF{NaiveFinder}(650:727, '-', 2, nothing), ORF{NaiveFinder}(786:872, '+', 3, nothing), ORF{NaiveFinder}(887:976, '-', 2, nothing)]
 #                                                                                                            #|->  This occured in Pyrodigal
@@ -33,10 +33,11 @@
 #     # findorfs (GeneFinder.jl) --> 885
 #     # NCBI ORFfinder --> 375 ORFs
 #     # orfipy --> 375 (`orfipy NC_001416.1.fasta --start ATG --include-stop --min 75`)
-#     NC_001416 = fasta_to_dna("data/NC_001416.1.fasta")[1]
-#     NC_001416_orfs = findorfs(NC_001416, NaiveFinder(), min_len=75)
+#     NC_001416 = fasta2bioseq("data/NC_001416.1.fasta")[1]
+#     NC_001416_orfs = findorfs(NC_001416, NaiveFinder(), minlen=75)
 #     @test length(NC_001416_orfs) == 885
 # end
+
 # @testitem "getorfs dna" default_imports=false begin
 
 #     using BioSequences: @dna_str, DNAAlphabet
diff --git a/test/getindextest.jl b/test/getindextest.jl
index 3ab5f71..cf57df8 100644
--- a/test/getindextest.jl
+++ b/test/getindextest.jl
@@ -1,16 +1,16 @@
 # Import the function to be tested
 
 # Run the tests
-@testitem "getindex" default_imports=false begin
+# @testitem "getindex" default_imports=false begin
     
-    using BioSequences: @dna_str
-    using GeneFinder: ORF, NaiveFinder
-    using Test: @test
+#     using BioSequences: @dna_str
+#     using GeneFinder: ORF, NaiveFinder
+#     using Test: @test
 
-    seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
+#     seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
     
-    orfs01 = [ORF("seq01", 1:33, '+', 1, NaiveFinder()), ORF("seq01", 4:33, '+', 1, NaiveFinder()), ORF("seq01", 8:22, '+', 2, NaiveFinder()), ORF("seq01", 12:29, '+', 3, NaiveFinder()), ORF("seq01", 16:33, '+', 1, NaiveFinder())]
+#     orfs01 = [ORF("seq01", 1:33, '+', 1, NaiveFinder()), ORF("seq01", 4:33, '+', 1, NaiveFinder()), ORF("seq01", 8:22, '+', 2, NaiveFinder()), ORF("seq01", 12:29, '+', 3, NaiveFinder()), ORF("seq01", 16:33, '+', 1, NaiveFinder())]
 
-    @test getindex(seq01, orfs01[1]) == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
-    @test getindex(seq01, orfs01[2]) == dna"ATGCATGCATGCATGCTAGTAACTAGCTAG"
-end
\ No newline at end of file
+#     @test getindex(seq01, orfs01[1]) == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
+#     @test getindex(seq01, orfs01[2]) == dna"ATGCATGCATGCATGCTAGTAACTAGCTAG"
+# end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 34f222c..2900ce2 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,7 +3,7 @@ module GeneFinderTests
 using Test
 using BioSequences
 using FASTX
-# using GeneFinder
+using GeneFinder
 using Aqua
 using TestItems
 using TestItemRunner
@@ -12,7 +12,7 @@ using TestItemRunner
 
 # include("findorfstest.jl")
 # include("iotest.jl")
-include("getindextest.jl")
+# include("getindextest.jl")
 # include("aquatest.jl")
 
 end

From 43365420f04286f671dda16ce7b5ff3740bad4b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 3 Jun 2024 15:01:49 -0500
Subject: [PATCH 12/40] Remove unused function

---
 src/findorfs.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/findorfs.jl b/src/findorfs.jl
index 9d093a9..84d9819 100644
--- a/src/findorfs.jl
+++ b/src/findorfs.jl
@@ -33,8 +33,6 @@ findorfs(sequence, NaiveFinder())
 ```
 
 """
-function findorfs end
-
 function findorfs(
     sequence::NucleicSeqOrView{DNAAlphabet{N}};
     finder::Type{F}=NaiveFinder,

From 051650863717140a853d7ba90882d5c47abb2b3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 3 Jun 2024 15:02:09 -0500
Subject: [PATCH 13/40] Update deps

---
 src/GeneFinder.jl                |   2 +-
 src/algorithms/naivecollector.jl |  14 ++--
 src/algorithms/naivefinder.jl    | 139 +++++++++++++++++--------------
 src/types.jl                     |   8 +-
 src/utils.jl                     |   1 +
 5 files changed, 87 insertions(+), 77 deletions(-)

diff --git a/src/GeneFinder.jl b/src/GeneFinder.jl
index 94df7d6..cbc4834 100644
--- a/src/GeneFinder.jl
+++ b/src/GeneFinder.jl
@@ -27,7 +27,7 @@ using BioMarkovChains: BioMarkovChain, dnaseqprobability, ECOLICDS, ECOLINOCDS,
 using FASTX: FASTAReader, sequence
 using IterTools: takewhile, iterated
 using PrecompileTools: @setup_workload, @compile_workload
-using GenomicFeatures: GenomicFeatures, AbstractGenomicInterval, GenomicInterval, Strand, summary, groupname, strand, metadata, STRAND_POS, STRAND_NEG, STRAND_BOTH, STRAND_NA
+using GenomicFeatures: GenomicFeatures, AbstractGenomicInterval, GenomicInterval, Strand, summary, groupname, strand, metadata, STRAND_POS, STRAND_NEG, STRAND_BOTH, STRAND_NA, leftposition, rightposition, length
 
 # Finder Algorithms
 include("algorithms/naivefinder.jl")
diff --git a/src/algorithms/naivecollector.jl b/src/algorithms/naivecollector.jl
index 4efb608..f733e67 100644
--- a/src/algorithms/naivecollector.jl
+++ b/src/algorithms/naivecollector.jl
@@ -18,7 +18,7 @@ function NaiveCollector(
     seqlen = length(sequence)
     seqname = get_var_name(sequence)
 
-    function createorfs(x, strand, s)
+    function createorfs(x, strand)
         if length(x.captured[1]:x.captured[3]) < minlen
             return nothing
         end
@@ -31,17 +31,17 @@ function NaiveCollector(
             stop = seqlen - x.captured[1] + 1
             frame = framedict[(seqlen - x.captured[3]) % 3]
         end
-        seq = @view(sequence[start:stop])
-        score = scheme === nothing ? 0 : scheme(@view(s[start:stop]), ECOLICDS)
-        fts = Dict(:gc => gc_content(seq), :length => length(seq), :score => score)
+        seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop])) #@view(sequence[start:stop])
+        score = scheme === nothing ? 0.0 : scheme(@view(seq[start:stop]), ECOLICDS)
+        fts = Dict(:score => score)
         return ORF{N,NaiveCollector}(seqname, start, stop, strand, frame, seq, fts, scheme)
     end
 
     orfs = Vector{ORF{N,NaiveCollector}}()
     for strand in ('+', '-')
-        s = strand == '-' ? revseq : sequence
-        matches = eachmatch(regorf, s, overlap)
-        strandedorfs = filter(!isnothing, [createorfs(x, strand, s) for x in matches])
+        seq = strand == '-' ? revseq : sequence
+        matches = eachmatch(regorf, seq, overlap)
+        strandedorfs = filter(!isnothing, [createorfs(x, strand) for x in matches])
         append!(orfs, strandedorfs)
     end
 
diff --git a/src/algorithms/naivefinder.jl b/src/algorithms/naivefinder.jl
index 2c7cbed..d78c20e 100644
--- a/src/algorithms/naivefinder.jl
+++ b/src/algorithms/naivefinder.jl
@@ -62,82 +62,95 @@ The `naivefinder` function takes a LongSequence{DNAAlphabet{4}} sequence and ret
     If the log-odds ratio exceeds a given threshold (`η`), the sequence is considered likely to be coding. See [`lordr`](@ref) for more information about coding creteria.
 
 """
-# function NaiveFinder(
-#     sequence::NucleicSeqOrView{DNAAlphabet{N}};
-#     alternative_start::Bool = false,
-#     minlen::Int64 = 6,
-#     scheme::Union{Nothing, Function} = nothing,
-#     kwargs...
-# ) where {N}
-#     seqlen = length(sequence)
-#     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
-#     orfs = Vector{ORF{N,NaiveFinder}}()
-#     seqname = get_var_name(sequence)
-#     for strand in ('+', '-')
-#         s = strand == '-' ? reverse_complement(sequence) : sequence
-#         @inbounds for location in @views _locationiterator(s; alternative_start)
-#             if length(location) >= minlen
-#                 start = strand == '+' ? location.start : seqlen - location.stop + 1
-#                 stop = start + length(location) - 1
-#                 frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
-#                 seq = @view(sequence[start:stop])
-#                 score = scheme === nothing ? 0 : scheme(@view(s[start:stop]), ECOLICDS)
-#                 gc = gc_content(seq)
-#                 len = length(seq)
-#                 fts = Dict(:gc => gc, :length => len, :score => score)
-#                 push!(orfs, ORF{N,NaiveFinder}(seqname, start, stop, strand, frame, NaiveFinder, seq, fts, scheme))
-#             end
-#         end
-#     end
-#     return sort!(orfs; kwargs...)
-# end
-
 function NaiveFinder(
     sequence::NucleicSeqOrView{DNAAlphabet{N}};
     alternative_start::Bool = false,
     minlen::Int64 = 6,
-    scheme::Union{Nothing, Function} = nothing,
+    scheme::Union{Nothing,Function} = nothing,
     kwargs...
 ) where {N}
-    
     seqlen = length(sequence)
     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
     orfs = Vector{ORF{N,NaiveFinder}}()
     seqname = get_var_name(sequence)
-
-    function createorf(location, strand, s, seqlen)
-        if length(location) < minlen
-            return nothing
-        end
-        
-        if strand == '+'
-            start = location.start
-            stop = start + length(location) - 1
-            frame = framedict[location.start % 3]
-        else
-            start = seqlen - location.stop + 1
-            stop = start + length(location) - 1
-            frame = framedict[(seqlen - location.stop + 1) % 3]
-        end
-
-        seq = @view(sequence[start:stop])
-        score = scheme === nothing ? 0 : scheme(@view(s[start:stop]), ECOLICDS)
-        gc = gc_content(seq)
-        len = length(seq)
-        fts = Dict(:gc => gc, :length => len, :score => score)
-
-        return ORF{N, NaiveFinder}(seqname, start, stop, strand, frame, seq, fts, scheme)
-    end
-
-    @inbounds for strand in ('+', '-')
+    for strand in ('+', '-')
         s = strand == '-' ? reverse_complement(sequence) : sequence
         @inbounds for location in @views _locationiterator(s; alternative_start)
-            orf = createorf(location, strand, s, seqlen)
-            if orf !== nothing
-                push!(orfs, orf)
+            if length(location) >= minlen
+                #main fields
+                start = strand == '+' ? location.start : seqlen - location.stop + 1
+                stop = start + length(location) - 1
+                frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
+                seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop])) #@view(sequence[start:stop])
+                #features
+                score = scheme === nothing ? 0.0 : scheme(@view(seq[start:stop]), ECOLICDS)
+
+                #populate the feature dict
+                fts = Dict(:score => score) #:gc => gc, :length => len,
+
+                push!(orfs, ORF{N,NaiveFinder}(seqname, start, stop, strand, frame, seq, fts, scheme))
             end
         end
     end
-
     return sort!(orfs; kwargs...)
-end
\ No newline at end of file
+end
+
+# export createorf
+
+# function createorf(
+#     sequence::NucleicSeqOrView{DNAAlphabet{N}},
+#     seqname::String,
+#     location::UnitRange{Int64},
+#     strand::Char,
+#     seqlen::Int64,
+#     minlen::Int64,
+#     scheme::Union{Nothing,Function}
+# ) where {N}
+#     if length(location) < minlen
+#         return nothing
+#     end
+
+#     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
+
+#     if strand == '+'
+#         start = location.start
+#         stop = start + length(location) - 1
+#         frame = framedict[location.start % 3]
+#     else
+#         start = seqlen - location.stop + 1
+#         stop = start + length(location) - 1
+#         frame = framedict[(seqlen - location.stop + 1) % 3]
+#     end
+
+#     seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop]))
+#     score = scheme === nothing ? 0 : scheme(@view(seq[start:stop]), ECOLICDS)
+#     gc = gc_content(seq)
+#     len = length(seq)
+#     fts = Dict(:gc => gc, :length => len, :score => score)
+
+#     return ORF{N, NaiveFinder}(seqname, start, stop, strand, frame, seq, fts, scheme)
+# end
+
+# function NaiveFinder(
+#     sequence::NucleicSeqOrView{DNAAlphabet{N}};
+#     alternative_start::Bool = false,
+#     minlen::Int64 = 6,
+#     scheme::Union{Nothing,Function} = nothing,
+#     kwargs...
+# ) where {N}
+#     seqlen = length(sequence)
+#     orfs = Vector{ORF{N,NaiveFinder}}()
+#     seqname = get_var_name(sequence)
+
+#     @inbounds for strand in ('+', '-')
+#         seq = strand == '-' ? reverse_complement(sequence) : sequence
+#         @inbounds for location in @views _locationiterator(s; alternative_start)
+#             orf = createorf(seq, seqname, location, strand, seqlen, minlen, scheme)
+#             if orf !== nothing
+#                 push!(orfs, orf)
+#             end
+#         end
+#     end
+
+#     return sort!(orfs; kwargs...)
+# end
\ No newline at end of file
diff --git a/src/types.jl b/src/types.jl
index 2aa52a4..9e4ec85 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -73,19 +73,15 @@ function scheme(i::ORF{N,F}) where {N,F}
 end
 
 function sequence(i::ORF{N,F}) where {N,F}
-    return i.strand == STRAND_POS ? i.seq : reverse_complement(i.seq)
+    return i.seq #i.strand == STRAND_POS ? i.seq : reverse_complement(i.seq)
 end
 
 function score(i::ORF{N,F}) where {N,F}
     return i.features[:score]
 end
 
-function gc(i::ORF{N,F}) where {N,F}
-    return i.features[:gc]
-end
-
 function length(i::ORF{N,F}) where {N,F}
-    return i.features[:length]
+    return length(i.seq)
 end
 
 function features(i::ORF{N,F}) where {N,F}
diff --git a/src/utils.jl b/src/utils.jl
index 27bf9f3..f40a09f 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,4 +1,5 @@
 export hasprematurestop, fasta2bioseq, get_var_name
+
 # General purposes methods supporting main functions
 
 """

From f17a1f5dac629230a3123d69de4c106239c44083 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Tue, 4 Jun 2024 21:43:02 -0500
Subject: [PATCH 14/40] Refactor finders with correct score calculation

---
 src/algorithms/naivecollector.jl | 2 +-
 src/algorithms/naivefinder.jl    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/algorithms/naivecollector.jl b/src/algorithms/naivecollector.jl
index f733e67..0377de1 100644
--- a/src/algorithms/naivecollector.jl
+++ b/src/algorithms/naivecollector.jl
@@ -32,7 +32,7 @@ function NaiveCollector(
             frame = framedict[(seqlen - x.captured[3]) % 3]
         end
         seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop])) #@view(sequence[start:stop])
-        score = scheme === nothing ? 0.0 : scheme(@view(seq[start:stop]), ECOLICDS)
+        score = scheme === nothing ? 0.0 : scheme(seq, ECOLICDS)
         fts = Dict(:score => score)
         return ORF{N,NaiveCollector}(seqname, start, stop, strand, frame, seq, fts, scheme)
     end
diff --git a/src/algorithms/naivefinder.jl b/src/algorithms/naivefinder.jl
index d78c20e..8fa92d9 100644
--- a/src/algorithms/naivefinder.jl
+++ b/src/algorithms/naivefinder.jl
@@ -83,7 +83,7 @@ function NaiveFinder(
                 frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
                 seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop])) #@view(sequence[start:stop])
                 #features
-                score = scheme === nothing ? 0.0 : scheme(@view(seq[start:stop]), ECOLICDS)
+                score = scheme === nothing ? 0.0 : scheme(seq, ECOLICDS) # @view(sequence[start:stop]
 
                 #populate the feature dict
                 fts = Dict(:score => score) #:gc => gc, :length => len,

From 41e509b35f27c4e45092c6ca1e2d955541867b24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Wed, 5 Jun 2024 00:07:49 -0500
Subject: [PATCH 15/40] chore: Update Julia version to 1.10.4

---
 Manifest.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Manifest.toml b/Manifest.toml
index 0cc8efb..49d18d7 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -1,6 +1,6 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.10.3"
+julia_version = "1.10.4"
 manifest_format = "2.0"
 project_hash = "514456eec9673906fe896a3ae36685951a71c19f"
 

From 140b672085eb4dfe5a7c14a33105b464772399a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sat, 8 Jun 2024 16:59:32 -0500
Subject: [PATCH 16/40] Update README

---
 README.md | 56 +++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 33e7b82..be45347 100644
--- a/README.md
+++ b/README.md
@@ -50,27 +50,27 @@ seq = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCA
 Now lest us find the ORFs
 
 ```julia
-findorfs(seq, NaiveFinder())
-
-12-element Vector{ORF}:
- ORF(29:40, '+', 2, 0.0)
- ORF(137:145, '+', 2, 0.0)
- ORF(164:184, '+', 2, 0.0)
- ORF(173:184, '+', 2, 0.0)
- ORF(236:241, '+', 2, 0.0)
- ORF(248:268, '+', 2, 0.0)
- ORF(362:373, '+', 2, 0.0)
- ORF(470:496, '+', 2, 0.0)
- ORF(551:574, '+', 2, 0.0)
- ORF(569:574, '+', 2, 0.0)
- ORF(581:601, '+', 2, 0.0)
- ORF(695:706, '+', 2, 0.0)
+orfs = findorfs(seq, finder=NaiveFinder) # use finder=NaiveCollector as an alternative
+
+12-element Vector{ORF{4, NaiveFinder}}:
+ ORF{NaiveFinder}(29:40, '+', 2)
+ ORF{NaiveFinder}(137:145, '+', 2)
+ ORF{NaiveFinder}(164:184, '+', 2)
+ ORF{NaiveFinder}(173:184, '+', 2)
+ ORF{NaiveFinder}(236:241, '+', 2)
+ ORF{NaiveFinder}(248:268, '+', 2)
+ ORF{NaiveFinder}(362:373, '+', 2)
+ ORF{NaiveFinder}(470:496, '+', 2)
+ ORF{NaiveFinder}(551:574, '+', 2)
+ ORF{NaiveFinder}(569:574, '+', 2)
+ ORF{NaiveFinder}(581:601, '+', 2)
+ ORF{NaiveFinder}(695:706, '+', 2)
 ```
 
-Two other methods where implemented into `getorfs` to get the ORFs in DNA or aminoacid sequences, respectively. They use the `findorfs` function to first get the ORFs and then get the correspondance array of `BioSequence` objects.
+The `ORF` structure contains also the sequence of the ORF, the frame, and several features. If you want to get the sequence of the ORFs you can broadcast the `sequence` function to safely extract the sequences of all ORFs.
 
 ```julia
-getorfs(seq, DNAAlphabet{4}(), NaiveFinder())
+sequence.(orfs)
 
 12-element Vector{LongSubSeq{DNAAlphabet{4}}}:
  ATGCAACCCTGA
@@ -87,6 +87,26 @@ getorfs(seq, DNAAlphabet{4}(), NaiveFinder())
  ATGCAACCCTGA
 ```
 
+Similarly, you can extract the amino acid sequences of the ORFs using the `translate` function.
+
+```julia
+translate.(orfs)
+
+12-element Vector{LongAA}:
+ MQP*
+ MR*
+ MRRMAR*
+ MAR*
+ M*
+ MCPTAV*
+ MQP*
+ MHWLVLSI*
+ MSPHKAM*
+ M*
+ MCPTAA*
+ MQP*
+```
+
 ## Writting ORF information into bioinformatic formats
 
 This package facilitates now the creation of `FASTA`, `BED`, and `GFF` files, specifically extracting Open Reading Frame (ORF) information from `BioSequence` instances, particularly those of type `NucleicSeqOrView{A} where A`, and then writing the information into the desired format.
@@ -118,7 +138,7 @@ Once a `BioSequence` object has been instantiated, the `write_orfs_fna` function
 outfile = "LFLS01000089.fna"
 
 open(outfile, "w") do io
-    write_orfs_fna(seq, io, NaiveFinder())
+    write_orfs_fna(seq, io, finder=NaiveFinder) # use finder=NaiveCollector as an alternative
 end
 ```
 

From 8cbd9611ebf1d3aca5cc16c55d8fe7d17708544f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sat, 8 Jun 2024 17:01:08 -0500
Subject: [PATCH 17/40] Manifest update

---
 Manifest.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index 49d18d7..a00ab9a 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -132,9 +132,9 @@ uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 version = "0.7.0"
 
 [[deps.StaticArraysCore]]
-git-tree-sha1 = "36b3d696ce6366023a0ea192b4cd442268995a0d"
+git-tree-sha1 = "192954ef1208c7019899fbf8049e717f92959682"
 uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
-version = "1.4.2"
+version = "1.4.3"
 
 [[deps.StringViews]]
 git-tree-sha1 = "f7b06677eae2571c888fd686ba88047d8738b0e3"

From 56d3f1c2721135359faebacc46b1476518351b86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sat, 8 Jun 2024 17:02:06 -0500
Subject: [PATCH 18/40] Create Features struct and include it in the ORF as the
 NamedTuple field

---
 src/types.jl | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/types.jl b/src/types.jl
index 9e4ec85..f9d1755 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -1,8 +1,8 @@
-import GenomicFeatures: first, last, length, strand, groupname
+import GenomicFeatures: first, last, length, strand, groupname, metadata
 import FASTX: sequence
 
-export RBS, ORF
-export gc, features, sequence
+export Features, RBS, ORF
+export features, sequence
 export groupname, finder, frame, scheme, score
 
 #### Ribosome Binding Site (RBS) struct ####
@@ -10,6 +10,12 @@ export groupname, finder, frame, scheme, score
 # seq[orf.first-bin01.offset.start:orf.first-1]
 
 # motifs = [dna"GGA", dna"GAG", dna"AGG"]
+
+struct Features{K}
+    fts::K
+end
+
+Features(fts::NamedTuple) = Features{typeof(fts)}(fts)
 struct RBS
     motif::BioRegex{DNA}
     offset::UnitRange{Int64} # offset
@@ -20,8 +26,8 @@ struct RBS
     end
 end
 
-# Structs associated with gene models
 
+# Structs associated with gene models
 # const FEATUREDICT = Dict{Symbol,Any}(:gc => 0.0, :length => 0, :score => 0.0)
 # const Feature = Union{Real, Vector{RBS}}
 struct ORF{N,F} <: GenomicFeatures.AbstractGenomicInterval{F}
@@ -31,7 +37,7 @@ struct ORF{N,F} <: GenomicFeatures.AbstractGenomicInterval{F}
     strand::Strand
     frame::Int
     seq::LongSubSeq{DNAAlphabet{N}}
-    features::Dict{Symbol,Any}
+    features::Features  # features::Dict{Symbol,Any} or # features::@NamedTuple{score::Float64, rbs::Any}
     scheme::Union{Nothing,Function}
 end
 
@@ -43,10 +49,11 @@ function ORF{N,F}(
     strand::Union{Strand,Char},
     frame::Int,
     seq::LongSubSeq{DNAAlphabet{N}},
-    features::Dict{Symbol,Any},
+    features::Features, # ::Dict{Symbol,Any} or # ::@NamedTuple{score::Float64, rbs::Any} or @NamedTuple{Vararg{typeof(...)}}
     scheme::Union{Nothing,Function}=nothing
 ) where {N,F<:GeneFinderMethod}
     # @assert frame in (1, 2, 3) && location[1] < location[end] && score isa Union{Nothing, Float64} && length(seq) == last(location) - first(location) + 1 && length(seq) % 3 == 0 "Invalid input. Please check the frame, location, score, and sequence length."
+    # groupname == "" && error("Please provide a groupname for the ORF.")
     return ORF{N,F}(groupname, first, last, strand, frame, seq, features, scheme) #finder
 end
 
@@ -85,7 +92,7 @@ function length(i::ORF{N,F}) where {N,F}
 end
 
 function features(i::ORF{N,F}) where {N,F}
-    return i.features
+    return i.features.fts
 end
 
 function Base.show(io::IO, i::ORF{N,F}) where {N,F}
@@ -96,6 +103,10 @@ function Base.show(io::IO, i::ORF{N,F}) where {N,F}
     end
 end
 
+function metadata(i::ORF{N,F}) where {N,F}
+    return features(i)
+end
+
 ## Ideas for Gene struct
 
 # struct CDS <: AbstractGene

From fd839c99402978f2bdc1a5514277dcd5f6f4c415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sat, 8 Jun 2024 17:02:21 -0500
Subject: [PATCH 19/40] Refactor code to improve precompile file size and
 loading speed

---
 src/GeneFinder.jl | 13 +------------
 src/workload.jl   | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 12 deletions(-)
 create mode 100644 src/workload.jl

diff --git a/src/GeneFinder.jl b/src/GeneFinder.jl
index cbc4834..467c120 100644
--- a/src/GeneFinder.jl
+++ b/src/GeneFinder.jl
@@ -26,7 +26,6 @@ using BioSequences:
 using BioMarkovChains: BioMarkovChain, dnaseqprobability, ECOLICDS, ECOLINOCDS, log_odds_ratio_score
 using FASTX: FASTAReader, sequence
 using IterTools: takewhile, iterated
-using PrecompileTools: @setup_workload, @compile_workload
 using GenomicFeatures: GenomicFeatures, AbstractGenomicInterval, GenomicInterval, Strand, summary, groupname, strand, metadata, STRAND_POS, STRAND_NEG, STRAND_BOTH, STRAND_NA, leftposition, rightposition, length
 
 # Finder Algorithms
@@ -46,16 +45,6 @@ include("io.jl")
 include("utils.jl")
 include("extended.jl")
 
-@setup_workload begin
-    # Putting some things in `@setup_workload` instead of `@compile_workload` can reduce the size of the
-    # precompile file and potentially make loading faster.
-    # using BioSequences, GeneFinder
-    seq = randdnaseq(99)
-    @compile_workload begin
-        # all calls in this block will be precompiled, regardless of whether
-        # they belong to your package or not (on Julia 1.8 and higher)
-        # findorfs(seq)
-    end
-end
+include("workload.jl")
 
 end
\ No newline at end of file
diff --git a/src/workload.jl b/src/workload.jl
new file mode 100644
index 0000000..203879e
--- /dev/null
+++ b/src/workload.jl
@@ -0,0 +1,15 @@
+using PrecompileTools: @setup_workload, @compile_workload
+
+@setup_workload begin
+    # Putting some things in `@setup_workload` instead of `@compile_workload` can reduce the size of the
+    # precompile file and potentially make loading faster.
+    # using BioSequences, GeneFinder
+    seq = randdnaseq(99)
+    @compile_workload begin
+        _locationiterator(seq; alternative_start=false)
+        # findorfs(seq)
+        # all calls in this block will be precompiled, regardless of whether
+        # they belong to your package or not (on Julia 1.8 and higher)
+        # findorfs(seq)
+    end
+end
\ No newline at end of file

From d0cf75477a19909b96266af14aaf0994268171f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sat, 8 Jun 2024 17:02:42 -0500
Subject: [PATCH 20/40] Update io methods

---
 src/io.jl | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/io.jl b/src/io.jl
index 8924eaf..a67730e 100644
--- a/src/io.jl
+++ b/src/io.jl
@@ -20,9 +20,9 @@ Write BED data to a file.
 function write_orfs_bed(
     input::Union{String,NucleicSeqOrView{DNAAlphabet{N}}},
     output::Union{IOStream, IOBuffer};
-    finder::Type{F}=NaiveFinder,
+    finder::Type{F} = NaiveFinder,
     alternative_start::Bool = false,
-    minlen::Int64 = 6
+    minlen::Int64 = 6,
 ) where {N,F<:GeneFinderMethod}
     sequence = typeof(input) === String ? fasta2bioseq(input)[1] : input
     orfs = findorfs(sequence; finder, alternative_start, minlen)
@@ -34,10 +34,10 @@ end
 function write_orfs_bed(
     input::Union{String,NucleicSeqOrView{DNAAlphabet{N}}},
     output::String;
-    finder::Type{F}=NaiveFinder,
+    finder::Type{F} = NaiveFinder,
     alternative_start::Bool = false,
-    minlen::Int64 = 6
-) where {N, F<:GeneFinderMethod}
+    minlen::Int64 = 6,
+) where {N,F<:GeneFinderMethod}
     sequence = typeof(input) === String ? fasta2bioseq(input)[1] : input
     orfs = findorfs(sequence; finder, alternative_start, minlen)
     open(output, "w") do f
@@ -81,10 +81,10 @@ end
 function write_orfs_fna(
     input::Union{String, NucleicSeqOrView{DNAAlphabet{N}}},
     output::Union{IOStream, IOBuffer};
-    finder::Type{F}=NaiveFinder,
+    finder::Type{F} = NaiveFinder,
     alternative_start::Bool = false, 
-    minlen::Int64 = 6
-) where {N, F<:GeneFinderMethod}
+    minlen::Int64 = 6,
+) where {N,F<:GeneFinderMethod}
     seq = typeof(input) === String ? fasta2bioseq(input)[1] : input
     orfs = findorfs(seq; finder, alternative_start, minlen)
     norfs = length(orfs)
@@ -99,10 +99,10 @@ end
 function write_orfs_fna(
     input::Union{String,NucleicSeqOrView{DNAAlphabet{N}}}, 
     output::String;
-    finder::Type{F}=NaiveFinder,
+    finder::Type{F} = NaiveFinder,
     alternative_start::Bool = false,
-    minlen::Int64 = 6
-) where {N, F<:GeneFinderMethod}
+    minlen::Int64 = 6,
+) where {N,F<:GeneFinderMethod}
     seq = typeof(input) === String ? fasta2bioseq(input)[1] : input
     orfs = findorfs(seq; finder, alternative_start, minlen)
     norfs = length(orfs)
@@ -149,10 +149,10 @@ end
 function write_orfs_faa(
     input::Union{String, NucleicSeqOrView{DNAAlphabet{N}}},
     output::Union{IOStream, IOBuffer};
-    finder::Type{F}=NaiveFinder,
+    finder::Type{F} = NaiveFinder,
     alternative_start::Bool = false,
     minlen::Int64 = 6,
-    code::GeneticCode = ncbi_trans_table[1]
+    code::GeneticCode = ncbi_trans_table[1],
 ) where {N,F<:GeneFinderMethod}
     seq = typeof(input) === String ? fasta2bioseq(input)[1] : input
     orfs = findorfs(seq; finder, alternative_start, minlen)
@@ -171,7 +171,7 @@ function write_orfs_faa(
     finder::Type{F}=NaiveFinder,
     alternative_start::Bool = false,
     minlen::Int64 = 6,
-    code::GeneticCode = ncbi_trans_table[1]
+    code::GeneticCode = ncbi_trans_table[1],
 ) where {N,F<:GeneFinderMethod}
     seq = typeof(input) === String ? fasta2bioseq(input)[1] : input
     orfs = findorfs(seq; finder, alternative_start, minlen)
@@ -208,10 +208,10 @@ Write GFF data to a file.
 function write_orfs_gff(
     input::Union{String, NucleicSeqOrView{DNAAlphabet{N}}},
     output::Union{IOStream, IOBuffer};
-    finder::Type{F}=NaiveFinder,
+    finder::Type{F} = NaiveFinder,
     alternative_start::Bool = false,
     minlen::Int64 = 6,
-) where {N, F<:GeneFinderMethod}
+) where {N,F<:GeneFinderMethod}
     seq = typeof(input) === String ? fasta2bioseq(input)[1] : input
     orfs = findorfs(seq; finder, alternative_start, minlen)
     norfs = length(orfs)
@@ -226,7 +226,7 @@ end
 function write_orfs_gff(
     input::Union{String, NucleicSeqOrView{DNAAlphabet{N}}},
     output::String;
-    finder::Type{F}=NaiveFinder,
+    finder::Type{F} = NaiveFinder,
     alternative_start::Bool = false,
     minlen::Int64 = 6,
 ) where {N, F<:GeneFinderMethod}

From 2ac4ce92a1741c445efc7a5210191061caa4a32d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sat, 8 Jun 2024 17:02:57 -0500
Subject: [PATCH 21/40] Remove getorfs.jl

---
 src/getorfs.jl | 131 -------------------------------------------------
 1 file changed, 131 deletions(-)
 delete mode 100644 src/getorfs.jl

diff --git a/src/getorfs.jl b/src/getorfs.jl
deleted file mode 100644
index c487aeb..0000000
--- a/src/getorfs.jl
+++ /dev/null
@@ -1,131 +0,0 @@
-# export getorfs
-
-# #### get_orfs_* methods ####
-
-# """
-#     getorfs(sequence::NucleicSeqOrView{DNAAlphabet{N}}, ::Alphabet, method::M; kwargs...) where {N, M<:GeneFinderMethod}
-
-# This function takes a `NucleicSeqOrView{DNAAlphabet{N}}` sequence and identifies the open reading frames (ORFs) using the `findorfs()` function under the selected finder method. The function then extracts the DNA or aminoacid sequence of each ORF and stores it in a `Vector{LongSubSeq{A}}`.
-
-# # Arguments
-
-# - `sequence`: The input sequence as a `NucleicSeqOrView{DNAAlphabet{N}}`
-# - `alphabet`: The output sequence alphabet. It can be either `DNAAlphabet{4}()` or `AminoAcidAlphabet()`.
-# - `method`: The method used to find ORFs. It must be a subtype of `GeneFinderMethod`. (e.g., `NaiveFinder()`)
-
-# # Keyword Arguments
-
-# - `alternative_start::Bool=false`: If set to `true`, the function considers alternative start codons when searching for ORFs. This increases the execution time by approximately 3x.
-# - `minlen::Int64=6`: The minimum length of the allowed ORF. By default, it allows ORFs that can encode at least one amino acid (e.g., `aa"M*"`).
-
-# # Examples
-
-# ```julia
-# sequence = randdnaseq(120)
-
-#     120nt DNA Sequence:
-#     GCCGGACAGCGAAGGCTAATAAATGCCCGTGCCAGTATC…TCTGAGTTACTGTACACCCGAAAGACGTTGTACGCATTT
-
-
-# getorfs(sequence, DNAAlphabet{4}(), NaiveFinder())
-
-#     1-element Vector{LongSubSeq{DNAAlphabet{4}}}:
-#      ATGCGTACAACGTCTTTCGGGTGTACAGTAACTCAGAGCTAA
-
-# ```
-# """
-# function getorfs(
-#     seq::NucleicSeqOrView{DNAAlphabet{N}},
-#     ::DNAAlphabet{N};
-#     finder::Type{F}=NaiveFinder,
-#     alternative_start::Bool = false,
-#     minlen::Int64 = 6,
-#     kwargs...
-# ) where {N,F<:GeneFinderMethod}
-#     orfs = findorfs(seq; finder, alternative_start, minlen)
-#     return sequence.(orfs)
-# end
-
-# function getorfs(
-#     seq::NucleicSeqOrView{DNAAlphabet{N}},
-#     ::AminoAcidAlphabet;
-#     finder::Type{F}=NaiveFinder,
-#     alternative_start::Bool = false,
-#     minlen::Int64 = 6,
-#     code::GeneticCode = ncbi_trans_table[1],
-#     kwargs...
-# ) where {N,F<:GeneFinderMethod}
-#     orfs = findorfs(seq; finder, alternative_start, minlen)
-#     return @. translate(sequence(orfs); code)
-# end
-
-# #### record_orfs_* methods ####
-
-# """
-#     record_orfs_fna(sequence::NucleicSeqOrView{DNAAlphabet{N}}; kwargs...) where {N}
-
-# Record Open Reading Frames (ORFs) in a nucleic acid sequence.
-
-# # Arguments
-# - `sequence::NucleicSeqOrView{DNAAlphabet{N}}`: The nucleic acid sequence to search for ORFs.
-# - `alternative_start::Bool=false`: If `true`, consider alternative start codons for ORFs.
-# - `minlen::Int=6`: The minimum length of an ORF to be recorded.
-
-# # Returns
-# An array of `FASTARecord` objects representing the identified ORFs.
-
-# # Description
-# This function searches for Open Reading Frames (ORFs) in a given nucleic acid sequence. An ORF is a sequence of DNA that starts with a start codon and ends with a stop codon, without any other stop codons in between. By default, only the standard start codon (ATG) is considered, but if `alternative_start` is set to `true`, alternative start codons are also considered. The minimum length of an ORF to be recorded can be specified using the `minlen` argument.
-# """
-# # function record_orfs_fna(
-# #     sequence::NucleicSeqOrView{DNAAlphabet{N}}; 
-# #     alternative_start::Bool = false, 
-# #     minlen::Int64 = 6
-# # ) where {N}
-# #     orfs = findorfs(sequence; alternative_start, minlen)
-# #     norfs = length(orfs)
-# #     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-# #     records = FASTARecord[]
-# #     @inbounds for (index, orf) in enumerate(orfs)
-# #         id = string(lpad(string(index), padding, "0"))
-# #         header = "ORF$(id) id=$(id) start=$(orf.location.start) stop=$(orf.location.stop) strand=$(orf.strand) frame=$(orf.frame)"
-# #         record = FASTARecord(header, sequence[orf])
-# #         push!(records, record)
-# #     end
-# #     return records
-# # end
-
-# """
-#     record_orfs_faa(sequence::NucleicSeqOrView{DNAAlphabet{N}}; kwargs...) where {N}
-
-# This function takes a nucleic acid sequence and finds all open reading frames (ORFs) in the sequence. 
-# An ORF is a sequence of DNA that starts with a start codon and ends with a stop codon. 
-# The function returns a list of FASTA records, where each record represents an ORF in the sequence.
-
-# # Arguments
-# - `sequence`: The nucleic acid sequence to search for ORFs.
-# - `alternative_start`: A boolean indicating whether to consider alternative start codons. Default is `false`.
-# - `code`: The genetic code to use for translation. Default is the NCBI translation table 1.
-# - `minlen`: The minimum length of an ORF. ORFs shorter than this length will be ignored. Default is 6.
-
-# # Returns
-# - A list of FASTA records representing the ORFs found in the sequence.
-# """
-# # function record_orfs_faa(
-# #     sequence::NucleicSeqOrView{DNAAlphabet{N}};
-# #     alternative_start::Bool = false, 
-# #     code::GeneticCode = ncbi_trans_table[1],
-# #     minlen::Int64 = 6
-# # ) where {N}
-# #     orfs = findorfs(sequence; alternative_start, minlen)
-# #     norfs = length(orfs)
-# #     padding = norfs < 10 ? length(string(norfs)) + 1 : length(string(norfs))
-# #     records = FASTARecord[]
-# #     @inbounds for (index, orf) in enumerate(orfs)
-# #         id = string(lpad(string(index), padding, "0"))
-# #         header = "ORF$(id) id=$(id) start=$(orf.location.start) stop=$(orf.location.stop) strand=$(orf.strand) frame=$(orf.frame)"
-# #         record = FASTARecord(header, translate(sequence[orf]; code))
-# #         push!(records, record)
-# #     end
-# #     return records
-# # end

From ba9e75ecc159b3c69751721282ad01a6f560efe7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sat, 8 Jun 2024 17:03:27 -0500
Subject: [PATCH 22/40] Update finder methods with the Feature struct

---
 src/algorithms/naivecollector.jl | 5 +++--
 src/algorithms/naivefinder.jl    | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/algorithms/naivecollector.jl b/src/algorithms/naivecollector.jl
index 0377de1..024e705 100644
--- a/src/algorithms/naivecollector.jl
+++ b/src/algorithms/naivecollector.jl
@@ -32,8 +32,9 @@ function NaiveCollector(
             frame = framedict[(seqlen - x.captured[3]) % 3]
         end
         seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop])) #@view(sequence[start:stop])
-        score = scheme === nothing ? 0.0 : scheme(seq, ECOLICDS)
-        fts = Dict(:score => score)
+        scr = scheme === nothing ? 0.0 : scheme(seq, ECOLICDS)
+        # fts = Dict(:score => score)
+        fts = Features((score = scr,)) # rbs = RBS(biore"RRR"dna, 3:4, 1.0)
         return ORF{N,NaiveCollector}(seqname, start, stop, strand, frame, seq, fts, scheme)
     end
 
diff --git a/src/algorithms/naivefinder.jl b/src/algorithms/naivefinder.jl
index 8fa92d9..9a06a96 100644
--- a/src/algorithms/naivefinder.jl
+++ b/src/algorithms/naivefinder.jl
@@ -83,10 +83,11 @@ function NaiveFinder(
                 frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
                 seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop])) #@view(sequence[start:stop])
                 #features
-                score = scheme === nothing ? 0.0 : scheme(seq, ECOLICDS) # @view(sequence[start:stop]
+                scr = scheme === nothing ? 0.0 : scheme(seq, ECOLICDS) # @view(sequence[start:stop]
 
                 #populate the feature dict
-                fts = Dict(:score => score) #:gc => gc, :length => len,
+                #fts = Dict(:score => score) #:gc => gc, :length => len,
+                fts = Features((score = scr,)) # rbs = RBS(biore"RRR"dna, 3:4, 1.0)
 
                 push!(orfs, ORF{N,NaiveFinder}(seqname, start, stop, strand, frame, seq, fts, scheme))
             end

From 80a05941ca6fcdd40b2c5815eac2f9e9b71daeda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sat, 8 Jun 2024 17:44:18 -0500
Subject: [PATCH 23/40] Update lors from BMC taking only one argument

---
 Manifest.toml                 |  6 +--
 Project.toml                  |  2 +-
 src/algorithms/naivefinder.jl |  2 +-
 test/findorfstest.jl          | 88 ++++++++++++++++++++---------------
 test/runtests.jl              |  4 +-
 5 files changed, 58 insertions(+), 44 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index a00ab9a..2448361 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.10.4"
 manifest_format = "2.0"
-project_hash = "514456eec9673906fe896a3ae36685951a71c19f"
+project_hash = "e13b3d62f031e8e91ec33e86f0dbf42fa3097ba0"
 
 [[deps.Automa]]
 deps = ["PrecompileTools", "TranscodingStreams"]
@@ -21,9 +21,9 @@ version = "0.1.4"
 
 [[deps.BioMarkovChains]]
 deps = ["BioSequences", "PrecompileTools", "VectorizedKmers"]
-git-tree-sha1 = "c48cc4efe18355a1a19ae627d05bf19df0a57253"
+git-tree-sha1 = "72c9d5027120756a71fb7b1036a052b6f9353ed6"
 uuid = "f861b655-cb5f-42ce-b66a-341b542d4f2c"
-version = "0.9.3"
+version = "0.10.0"
 
     [deps.BioMarkovChains.extensions]
     DiscreteMarkovChainsExt = "DiscreteMarkovChains"
diff --git a/Project.toml b/Project.toml
index cae9ace..2fe732b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -12,7 +12,7 @@ IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 
 [compat]
-BioMarkovChains = "0.9"
+BioMarkovChains = "0.10"
 BioSequences = "3"
 FASTX = "2"
 GenomicFeatures = "3"
diff --git a/src/algorithms/naivefinder.jl b/src/algorithms/naivefinder.jl
index 9a06a96..833d615 100644
--- a/src/algorithms/naivefinder.jl
+++ b/src/algorithms/naivefinder.jl
@@ -83,7 +83,7 @@ function NaiveFinder(
                 frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
                 seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop])) #@view(sequence[start:stop])
                 #features
-                scr = scheme === nothing ? 0.0 : scheme(seq, ECOLICDS) # @view(sequence[start:stop]
+                scr = scheme === nothing ? 0.0 : scheme(seq; kwargs...) # @view(sequence[start:stop]
 
                 #populate the feature dict
                 #fts = Dict(:score => score) #:gc => gc, :length => len,
diff --git a/test/findorfstest.jl b/test/findorfstest.jl
index 3596075..c083ad7 100644
--- a/test/findorfstest.jl
+++ b/test/findorfstest.jl
@@ -1,41 +1,55 @@
-# @testset "findorfs" begin
-#     # cd(@__DIR__)
 
-#     # A random seq to start
-#     seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
-#     orfs01 = findorfs(seq01, NaiveFinder())
-
-#     @test orfs01 == [ORF{NaiveFinder}(1:33, '+', 1, nothing), ORF{NaiveFinder}(4:33, '+', 1, nothing), ORF{NaiveFinder}(8:22, '+', 2, nothing), ORF{NaiveFinder}(12:29, '+', 3, nothing), ORF{NaiveFinder}(16:33, '+', 1, nothing)]
-#     @test length(orfs01) == 5
-
-#     # > 180195.SAMN03785337.LFLS01000089 -> finds only 1 gene in Prodigal (from Pyrodigal tests)
-#     seq02 = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCAATCTGACTGTGGGCGGTGTTACCAACGGCACTGCTACTACTGGCAACATCGCACTGACCGGTAACAATGCGCTGAGCGGTCCGGTCAATCTGAATGCGTCGAATGGCACGGTGACCTTGAACACGACCGGCAATACCACGCTCGGTAACGTGACGGCACAAGGCAATGTGACGACCAATGTGTCCAACGGCAGTCTGACGGTTACCGGCAATACGACAGGTGCCAACACCAACCTCAGTGCCAGCGGCAACCTGACCGTGGGTAACCAGGGCAATATCAGTACCGCAGGCAATGCAACCCTGACGGCCGGCGACAACCTGACGAGCACTGGCAATCTGACTGTGGGCGGCGTCACCAACGGCACGGCCACCACCGGCAACATCGCGCTGACCGGTAACAATGCACTGGCTGGTCCTGTCAATCTGAACGCGCCGAACGGCACCGTGACCCTGAACACAACCGGCAATACCACGCTGGGTAATGTCACCGCACAAGGCAATGTGACGACTAATGTGTCCAACGGCAGCCTGACAGTCGCTGGCAATACCACAGGTGCCAACACCAACCTGAGTGCCAGCGGCAATCTGACCGTGGGCAACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAGC"
-#     orfs02 = findorfs(seq02, NaiveFinder())
-
-#     @test length(orfs02) == 12
-#     @test orfs02 == [ORF{NaiveFinder}(29:40, '+', 2, nothing), ORF{NaiveFinder}(137:145, '+', 2, nothing), ORF{NaiveFinder}(164:184, '+', 2, nothing), ORF{NaiveFinder}(173:184, '+', 2, nothing), ORF{NaiveFinder}(236:241, '+', 2, nothing), ORF{NaiveFinder}(248:268, '+', 2, nothing), ORF{NaiveFinder}(362:373, '+', 2, nothing), ORF{NaiveFinder}(470:496, '+', 2, nothing), ORF{NaiveFinder}(551:574, '+', 2, nothing), ORF{NaiveFinder}(569:574, '+', 2, nothing), ORF{NaiveFinder}(581:601, '+', 2, nothing), ORF{NaiveFinder}(695:706, '+', 2, nothing)]
-
-#     # From pyrodigal issue #13 link: https://github.com/althonos/pyrodigal/blob/1f939b0913b48dbaa55d574b20e124f1b8323825/pyrodigal/tests/test_orf_finder.py#L271
-#     # Pyrodigal predicts 2 genes from this sequence:
-#     # 1) An alternative start codon (GTG) sequence at 48:347
-#     # 2) A common start codon sequence at 426:590
-#     # On the other hand, the NCBI ORFfinder program predicts 9 ORFs whose length is greater than 75 nt, from which one has an "outbound" stop
-#     seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"
-#     orfs03 = findorfs(seq03, NaiveFinder(), minlen=75)
-#     @test length(orfs03) == 9
-#     @test orfs03 == [ORF{NaiveFinder}(37:156, '+', 1, nothing), ORF{NaiveFinder}(194:268, '-', 2, nothing), ORF{NaiveFinder}(194:283, '-', 2, nothing), ORF{NaiveFinder}(249:347, '+', 3, nothing), ORF{NaiveFinder}(426:590, '+', 3, nothing), ORF{NaiveFinder}(565:657, '+', 1, nothing), ORF{NaiveFinder}(650:727, '-', 2, nothing), ORF{NaiveFinder}(786:872, '+', 3, nothing), ORF{NaiveFinder}(887:976, '-', 2, nothing)]
-#                                                                                                            #|->  This occured in Pyrodigal
-#     # Lambda phage tests
-#     # Compare to https://github.com/jonas-fuchs/viral_orf_finder/blob/master/orf_finder.py 
-#     # Salisbury and Tsorukas (2019) paper used the Lambda phage genome with 73 CDS and 545 non-CDS ORFs (a total of 618) to compare predictions between several Gene Finder programs
-#     # For a minimal length of 75 nt the following ORFs are predicted: 
-#     # orf_finder.py --> 885 (222 complete)
-#     # findorfs (GeneFinder.jl) --> 885
-#     # NCBI ORFfinder --> 375 ORFs
-#     # orfipy --> 375 (`orfipy NC_001416.1.fasta --start ATG --include-stop --min 75`)
-#     NC_001416 = fasta2bioseq("data/NC_001416.1.fasta")[1]
-#     NC_001416_orfs = findorfs(NC_001416, NaiveFinder(), minlen=75)
-#     @test length(NC_001416_orfs) == 885
+@testitem "findorfs tests" begin
+    # cd(@__DIR__)
+    # using GeneFinder: findorfs, NaiveFinder, ORF
+    # A random seq to start
+    using BioSequences: @dna_str
+
+    seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
+    # orfs01 = findorfs(seq01, finder=NaiveFinder)
+    orf01 = ORF{4,NaiveFinder}("seq01", 1, 33, '+', 1, seq01[1:33], Dict(:score => 0.0), nothing)
+    sequence(orf01) == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
+    groupname(orf01) == "seq01"
+
+    # @test orfs01 == [
+    #     ORF{4,NaiveFinder}("seq01", 1, 33, '+', 1, seq01[1:33], Dict(:score => 0.0), nothing),
+    #     ORF{4,NaiveFinder}("seq01", 4, 33, '+', 1, seq01[4:33], Dict(:score => 0.0), nothing),
+    #     ORF{4,NaiveFinder}("seq01", 8, 22, '+', 2, seq01[8:22], Dict(:score => 0.0), nothing),
+    #     ORF{4,NaiveFinder}("seq01", 12, 29, '+', 3, seq01[12:29], Dict(:score => 0.0), nothing),
+    #     ORF{4,NaiveFinder}("seq01", 16, 33, '+', 1, seq01[16:33], Dict(:score => 0.0), nothing)
+    # ]
+    
+    # @test length(orfs01) == 5
+end
+
+    # > 180195.SAMN03785337.LFLS01000089 -> finds only 1 gene in Prodigal (from Pyrodigal tests)
+    # seq02 = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCAATCTGACTGTGGGCGGTGTTACCAACGGCACTGCTACTACTGGCAACATCGCACTGACCGGTAACAATGCGCTGAGCGGTCCGGTCAATCTGAATGCGTCGAATGGCACGGTGACCTTGAACACGACCGGCAATACCACGCTCGGTAACGTGACGGCACAAGGCAATGTGACGACCAATGTGTCCAACGGCAGTCTGACGGTTACCGGCAATACGACAGGTGCCAACACCAACCTCAGTGCCAGCGGCAACCTGACCGTGGGTAACCAGGGCAATATCAGTACCGCAGGCAATGCAACCCTGACGGCCGGCGACAACCTGACGAGCACTGGCAATCTGACTGTGGGCGGCGTCACCAACGGCACGGCCACCACCGGCAACATCGCGCTGACCGGTAACAATGCACTGGCTGGTCCTGTCAATCTGAACGCGCCGAACGGCACCGTGACCCTGAACACAACCGGCAATACCACGCTGGGTAATGTCACCGCACAAGGCAATGTGACGACTAATGTGTCCAACGGCAGCCTGACAGTCGCTGGCAATACCACAGGTGCCAACACCAACCTGAGTGCCAGCGGCAATCTGACCGTGGGCAACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAGC"
+    # orfs02 = findorfs(seq02, NaiveFinder())
+
+    # @test length(orfs02) == 12
+    # @test orfs02 == [ORF{NaiveFinder}(29:40, '+', 2, nothing), ORF{NaiveFinder}(137:145, '+', 2, nothing), ORF{NaiveFinder}(164:184, '+', 2, nothing), ORF{NaiveFinder}(173:184, '+', 2, nothing), ORF{NaiveFinder}(236:241, '+', 2, nothing), ORF{NaiveFinder}(248:268, '+', 2, nothing), ORF{NaiveFinder}(362:373, '+', 2, nothing), ORF{NaiveFinder}(470:496, '+', 2, nothing), ORF{NaiveFinder}(551:574, '+', 2, nothing), ORF{NaiveFinder}(569:574, '+', 2, nothing), ORF{NaiveFinder}(581:601, '+', 2, nothing), ORF{NaiveFinder}(695:706, '+', 2, nothing)]
+
+    # From pyrodigal issue #13 link: https://github.com/althonos/pyrodigal/blob/1f939b0913b48dbaa55d574b20e124f1b8323825/pyrodigal/tests/test_orf_finder.py#L271
+    # Pyrodigal predicts 2 genes from this sequence:
+    # 1) An alternative start codon (GTG) sequence at 48:347
+    # 2) A common start codon sequence at 426:590
+    # On the other hand, the NCBI ORFfinder program predicts 9 ORFs whose length is greater than 75 nt, from which one has an "outbound" stop
+    # seq03 = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"
+    # orfs03 = findorfs(seq03, NaiveFinder(), minlen=75)
+    # @test length(orfs03) == 9
+    # @test orfs03 == [ORF{NaiveFinder}(37:156, '+', 1, nothing), ORF{NaiveFinder}(194:268, '-', 2, nothing), ORF{NaiveFinder}(194:283, '-', 2, nothing), ORF{NaiveFinder}(249:347, '+', 3, nothing), ORF{NaiveFinder}(426:590, '+', 3, nothing), ORF{NaiveFinder}(565:657, '+', 1, nothing), ORF{NaiveFinder}(650:727, '-', 2, nothing), ORF{NaiveFinder}(786:872, '+', 3, nothing), ORF{NaiveFinder}(887:976, '-', 2, nothing)]
+                                                                                                           #|->  This occured in Pyrodigal
+    # Lambda phage tests
+    # Compare to https://github.com/jonas-fuchs/viral_orf_finder/blob/master/orf_finder.py 
+    # Salisbury and Tsorukas (2019) paper used the Lambda phage genome with 73 CDS and 545 non-CDS ORFs (a total of 618) to compare predictions between several Gene Finder programs
+    # For a minimal length of 75 nt the following ORFs are predicted: 
+    # orf_finder.py --> 885 (222 complete)
+    # findorfs (GeneFinder.jl) --> 885
+    # NCBI ORFfinder --> 375 ORFs
+    # orfipy --> 375 (`orfipy NC_001416.1.fasta --start ATG --include-stop --min 75`)
+    # NC_001416 = fasta2bioseq("data/NC_001416.1.fasta")[1]
+    # NC_001416_orfs = findorfs(NC_001416, NaiveFinder(), minlen=75)
+    # @test length(NC_001416_orfs) == 885
 # end
 
 # @testitem "getorfs dna" default_imports=false begin
diff --git a/test/runtests.jl b/test/runtests.jl
index 2900ce2..3d34d6d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,9 +10,9 @@ using TestItemRunner
 
 @run_package_tests
 
-# include("findorfstest.jl")
+include("findorfstest.jl")
+include("aquatest.jl")
 # include("iotest.jl")
 # include("getindextest.jl")
-# include("aquatest.jl")
 
 end

From 44c791cae43e03393a1dff35a348b3d850fff477 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sat, 8 Jun 2024 17:44:58 -0500
Subject: [PATCH 24/40] Update naivecolletor.jl to new BMC losr

---
 src/algorithms/naivecollector.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/algorithms/naivecollector.jl b/src/algorithms/naivecollector.jl
index 024e705..0293958 100644
--- a/src/algorithms/naivecollector.jl
+++ b/src/algorithms/naivecollector.jl
@@ -32,7 +32,7 @@ function NaiveCollector(
             frame = framedict[(seqlen - x.captured[3]) % 3]
         end
         seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop])) #@view(sequence[start:stop])
-        scr = scheme === nothing ? 0.0 : scheme(seq, ECOLICDS)
+        scr = scheme === nothing ? 0.0 : scheme(seq; kwargs...)
         # fts = Dict(:score => score)
         fts = Features((score = scr,)) # rbs = RBS(biore"RRR"dna, 3:4, 1.0)
         return ORF{N,NaiveCollector}(seqname, start, stop, strand, frame, seq, fts, scheme)

From 7663ab6f886472a58850c560b85fda52f7fdccd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sun, 9 Jun 2024 01:07:24 -0500
Subject: [PATCH 25/40] Update iscoding method, so that it takes an ORF
 directly

---
 src/iscoding.jl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/iscoding.jl b/src/iscoding.jl
index 982efbc..ae74950 100644
--- a/src/iscoding.jl
+++ b/src/iscoding.jl
@@ -28,12 +28,11 @@ function iscoding(
 end
 
 function iscoding(
-    sequence::NucleicSeqOrView{DNAAlphabet{N}},
     orf::ORF{N,F};
     criteria::Function = lordr,
     kwargs...
 ) where {N,F<:GeneFinderMethod}
-    return criteria(sequence[orf]; kwargs...)
+    return criteria(sequence(orf); kwargs...)
 end
 
 

From b01af90428055c1443102c805fc973a64d882e36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sun, 9 Jun 2024 01:07:56 -0500
Subject: [PATCH 26/40] Update score method to handle directly the score field

---
 src/types.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/types.jl b/src/types.jl
index f9d1755..75078a5 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -84,7 +84,7 @@ function sequence(i::ORF{N,F}) where {N,F}
 end
 
 function score(i::ORF{N,F}) where {N,F}
-    return i.features[:score]
+    return i.features.fts[:score]
 end
 
 function length(i::ORF{N,F}) where {N,F}

From 784c1d5b40427bf7bde0de00da6eccf5bcae8a0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sun, 9 Jun 2024 01:08:42 -0500
Subject: [PATCH 27/40] Update finder methods kwargs to only use the scheme
 kwargs

---
 src/algorithms/naivecollector.jl | 2 +-
 src/algorithms/naivefinder.jl    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/algorithms/naivecollector.jl b/src/algorithms/naivecollector.jl
index 0293958..5035616 100644
--- a/src/algorithms/naivecollector.jl
+++ b/src/algorithms/naivecollector.jl
@@ -46,5 +46,5 @@ function NaiveCollector(
         append!(orfs, strandedorfs)
     end
 
-    return sort!(orfs; kwargs...)
+    return sort!(orfs)
 end
diff --git a/src/algorithms/naivefinder.jl b/src/algorithms/naivefinder.jl
index 833d615..54b8472 100644
--- a/src/algorithms/naivefinder.jl
+++ b/src/algorithms/naivefinder.jl
@@ -93,7 +93,7 @@ function NaiveFinder(
             end
         end
     end
-    return sort!(orfs; kwargs...)
+    return sort!(orfs)
 end
 
 # export createorf

From 01c3e3e13a5c66f50b687e8b4150c66c3b926e80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sun, 9 Jun 2024 01:09:39 -0500
Subject: [PATCH 28/40] After fixing the lors from BMC package the lordr is
 also updated, requires a new version of BMC.

---
 src/criteria/lordr.jl | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/src/criteria/lordr.jl b/src/criteria/lordr.jl
index f2d446e..aed311c 100644
--- a/src/criteria/lordr.jl
+++ b/src/criteria/lordr.jl
@@ -41,19 +41,13 @@ iscoding(sequence)  # Returns: true or false
 """
 function lordr( #log_odds_ratio_decision, also lordr/cudr/kfdr/aadr
     sequence::NucleicSeqOrView{DNAAlphabet{N}};
-    codingmodel::BioMarkovChain = ECOLICDS,
-    noncodingmodel::BioMarkovChain = ECOLINOCDS,
-    η::Float64 = 1e-5
+    modela::BioMarkovChain = ECOLICDS,
+    modelb::BioMarkovChain = ECOLINOCDS,
+    b::Number = 2,
+    η::Float64 = 5e-3
 ) where {N}
-    pcoding = dnaseqprobability(sequence, codingmodel)
-    pnoncoding = dnaseqprobability(sequence, noncodingmodel)
-
-    logodds = log(pcoding / pnoncoding)
-
-    length(sequence) % 3 == 0 || error("The sequence is not divisible by 3")
-
-    !hasprematurestop(sequence) || error("There is a premature stop codon in the sequence")
 
+    logodds = log_odds_ratio_score(sequence; modela=modela, modelb=modelb, b=b)
     if logodds > η
         return true
     else

From 24b05b56487671f1afe32a44996cdc8602ae7d75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sun, 9 Jun 2024 14:11:57 -0500
Subject: [PATCH 29/40] Update to BMC v0.10.1

---
 Manifest.toml     | 4 ++--
 src/GeneFinder.jl | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index 2448361..f244a35 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -21,9 +21,9 @@ version = "0.1.4"
 
 [[deps.BioMarkovChains]]
 deps = ["BioSequences", "PrecompileTools", "VectorizedKmers"]
-git-tree-sha1 = "72c9d5027120756a71fb7b1036a052b6f9353ed6"
+git-tree-sha1 = "ff9f9e36892cf21222f2f2a0fe709a61b5d8dbfe"
 uuid = "f861b655-cb5f-42ce-b66a-341b542d4f2c"
-version = "0.10.0"
+version = "0.10.1"
 
     [deps.BioMarkovChains.extensions]
     DiscreteMarkovChainsExt = "DiscreteMarkovChains"
diff --git a/src/GeneFinder.jl b/src/GeneFinder.jl
index 467c120..3999e94 100644
--- a/src/GeneFinder.jl
+++ b/src/GeneFinder.jl
@@ -23,7 +23,7 @@ using BioSequences:
     translate,
     gc_content
 
-using BioMarkovChains: BioMarkovChain, dnaseqprobability, ECOLICDS, ECOLINOCDS, log_odds_ratio_score
+using BioMarkovChains: BioMarkovChain, ECOLICDS, ECOLINOCDS, log_odds_ratio_score
 using FASTX: FASTAReader, sequence
 using IterTools: takewhile, iterated
 using GenomicFeatures: GenomicFeatures, AbstractGenomicInterval, GenomicInterval, Strand, summary, groupname, strand, metadata, STRAND_POS, STRAND_NEG, STRAND_BOTH, STRAND_NA, leftposition, rightposition, length

From 204765625fba17161a238c8e978ee4da6f3051b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Sun, 9 Jun 2024 14:13:46 -0500
Subject: [PATCH 30/40] Update some docstring in lordr

---
 src/criteria/lordr.jl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/criteria/lordr.jl b/src/criteria/lordr.jl
index aed311c..32cad02 100644
--- a/src/criteria/lordr.jl
+++ b/src/criteria/lordr.jl
@@ -3,8 +3,8 @@ export log_odds_ratio_decision_rule, lordr, lors
 @doc raw"""
     log_odds_ratio_decision_rule(
         sequence::LongSequence{DNAAlphabet{4}};
-        codingmodel::BioMarkovChain,
-        noncodingmodel::BioMarkovChain,
+        modela::BioMarkovChain,
+        modelb::BioMarkovChain,
         η::Float64 = 1e-5
         )
 
@@ -22,6 +22,7 @@ S(X) = \log \left( \frac{{P_C(X_1=i_1, \ldots, X_T=i_T)}}{{P_N(X_1=i_1, \ldots,
 ## Keyword Arguments
 - `codingmodel::BioMarkovChain`: The transition model for coding regions, (default: `ECOLICDS`).
 - `noncodingmodel::BioMarkovChain`: The transition model for non-coding regions, (default: `ECOLINOCDS`)
+- `b::Number = 2`: The base of the logarithm used to calculate the log-odds ratio (default: 2).
 - `η::Float64 = 1e-5`: The threshold value (eta) for the log-odds ratio (default: 1e-5).
 
 # Returns

From f21c4d991dc088509172a332f660fa0133535e6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 1 Jul 2024 14:12:47 -0500
Subject: [PATCH 31/40] Update deps

---
 Manifest.toml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index f244a35..88633f8 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -70,9 +70,9 @@ uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
 
 [[deps.FASTX]]
 deps = ["Automa", "BioGenerics", "PrecompileTools", "StringViews", "TranscodingStreams"]
-git-tree-sha1 = "24ce37a228990be0cb69b3a2dbcfb656f32fc679"
+git-tree-sha1 = "b998f092c8fe26f0ca417a12df18da5340153f34"
 uuid = "c2308a5c-f048-11e8-3e8a-31650f418d12"
-version = "2.1.5"
+version = "2.1.6"
 weakdeps = ["BioSequences"]
 
     [deps.FASTX.extensions]
@@ -147,9 +147,9 @@ uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 version = "1.0.3"
 
 [[deps.TranscodingStreams]]
-git-tree-sha1 = "a947ea21087caba0a798c5e494d0bb78e3a1a3a0"
+git-tree-sha1 = "d73336d81cafdc277ff45558bb7eaa2b04a8e472"
 uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
-version = "0.10.9"
+version = "0.10.10"
 
     [deps.TranscodingStreams.extensions]
     TestExt = ["Test", "Random"]

From 91cb0bb9ad3a426db712f86262815c0abff1d334 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 1 Jul 2024 14:13:05 -0500
Subject: [PATCH 32/40] Update getindex of ORFs

---
 src/extended.jl | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/extended.jl b/src/extended.jl
index e45cd44..b803858 100644
--- a/src/extended.jl
+++ b/src/extended.jl
@@ -4,12 +4,20 @@ import BioSequences: translate
 
 Base.isless(a::ORF{N,F}, b::ORF{N,F}) where {N,F} = isless(a.first:a.last, b.first:b.last)
 
-function getindex(sequence::NucleicSeqOrView{A}, orf::ORF{N,F}) where {A,N,F}
-    if orf.strand == '+' || orf.strand == STRAND_POS
-        return @view sequence[orf.first:orf.last]
+function getindex(seq::NucleicSeqOrView{A}, orf::ORF{N,F}) where {A,N,F}
+    @assert @view(seq[begin:end]) == source(orf) "The source sequence of the ORF and the given sequence are different"
+
+    if orf.strand == STRAND_POS
+        s = @view seq[orf.first:orf.last]
     else
-        return reverse_complement(@view sequence[orf.first:orf.last])
+        s = reverse_complement(@view seq[orf.first:orf.last])
+    end
+
+    if !occursin(biore"T(AG|AA|GA)"dna, s)
+        error("There is no stop codon at the end of the sequence/orf")
     end
+
+    return s
 end
 
 function translate(orf::ORF{N,F}; kwargs...) where {N,F}

From 5235e69df5f60a3e21e367c6d3bd2b1403d93902 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 1 Jul 2024 14:14:06 -0500
Subject: [PATCH 33/40] reduce ORF type fields: get rid of seq

---
 src/types.jl | 61 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 12 deletions(-)

diff --git a/src/types.jl b/src/types.jl
index 75078a5..f080d2c 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -2,8 +2,8 @@ import GenomicFeatures: first, last, length, strand, groupname, metadata
 import FASTX: sequence
 
 export Features, RBS, ORF
-export features, sequence
-export groupname, finder, frame, scheme, score
+export features, sequence, source
+export groupname, finder, frame, scheme, score, strand, STRAND_BOTH, STRAND_NEG, STRAND_POS, STRAND_NA
 
 #### Ribosome Binding Site (RBS) struct ####
 
@@ -26,19 +26,33 @@ struct RBS
     end
 end
 
-
 # Structs associated with gene models
 # const FEATUREDICT = Dict{Symbol,Any}(:gc => 0.0, :length => 0, :score => 0.0)
 # const Feature = Union{Real, Vector{RBS}}
+# features::Dict{Symbol,Any} or # features::@NamedTuple{score::Float64, rbs::Any}
+# seq::LongSubSeq{DNAAlphabet{N}}
 struct ORF{N,F} <: GenomicFeatures.AbstractGenomicInterval{F}
     groupname::String
     first::Int64
     last::Int64
     strand::Strand
     frame::Int
-    seq::LongSubSeq{DNAAlphabet{N}}
-    features::Features  # features::Dict{Symbol,Any} or # features::@NamedTuple{score::Float64, rbs::Any}
+    features::Features
     scheme::Union{Nothing,Function}
+
+    function ORF{N,F}(
+        groupname::String,
+        first::Int64,
+        last::Int64,
+        strand::Strand,
+        frame::Int,
+        features::Features,
+        scheme::Union{Nothing,Function}
+    ) where {N,F}
+        @assert frame in (1, 2, 3) "Invalid frame. Please provide a frame between 1 and 3."
+
+        return new{N,F}(groupname, first, last, strand, frame, features, scheme)
+    end
 end
 
 function ORF{N,F}(
@@ -46,17 +60,29 @@ function ORF{N,F}(
     groupname::String,
     first::Int64,
     last::Int64,
-    strand::Union{Strand,Char},
+    strand::Strand,
     frame::Int,
-    seq::LongSubSeq{DNAAlphabet{N}},
     features::Features, # ::Dict{Symbol,Any} or # ::@NamedTuple{score::Float64, rbs::Any} or @NamedTuple{Vararg{typeof(...)}}
     scheme::Union{Nothing,Function}=nothing
 ) where {N,F<:GeneFinderMethod}
-    # @assert frame in (1, 2, 3) && location[1] < location[end] && score isa Union{Nothing, Float64} && length(seq) == last(location) - first(location) + 1 && length(seq) % 3 == 0 "Invalid input. Please check the frame, location, score, and sequence length."
-    # groupname == "" && error("Please provide a groupname for the ORF.")
-    return ORF{N,F}(groupname, first, last, strand, frame, seq, features, scheme) #finder
+    return ORF{N,F}(groupname, first, last, strand, frame, features, scheme) #finder seq
 end
 
+
+# function ORF{N,F}(
+#     ::Type{F}, #finder
+#     groupname::Union{Nothing,String},
+#     first::Int64,
+#     last::Int64,
+#     strand::Strand,
+#     frame::Int,
+#     features::Features, # ::Dict{Symbol,Any} or # ::@NamedTuple{score::Float64, rbs::Any} or @NamedTuple{Vararg{typeof(...)}}
+#     scheme::Union{Nothing,Function}=nothing
+# ) where {N,F<:GeneFinderMethod}
+#     groupname = groupname === nothing ? "seq" : groupname
+#     return ORF{N,F}(groupname, first, last, strand, frame, features, scheme) #finder seq
+# end
+
 function groupname(i::ORF{N,F}) where {N,F}
     return i.groupname
 end
@@ -80,7 +106,14 @@ function scheme(i::ORF{N,F}) where {N,F}
 end
 
 function sequence(i::ORF{N,F}) where {N,F}
-    return i.seq #i.strand == STRAND_POS ? i.seq : reverse_complement(i.seq)
+    seqsymb = Symbol(i.groupname)
+    seq = getfield(Main, seqsymb)
+    return i.strand == STRAND_POS ? @view(seq[i.first:i.last]) : reverse_complement(@view(seq[i.first:i.last])) #seq[i.first:i.last]
+end
+
+function source(i::ORF{N,F}) where {N,F}
+    seqsymb = Symbol(i.groupname)
+    return getfield(Main, seqsymb)
 end
 
 function score(i::ORF{N,F}) where {N,F}
@@ -88,13 +121,17 @@ function score(i::ORF{N,F}) where {N,F}
 end
 
 function length(i::ORF{N,F}) where {N,F}
-    return length(i.seq)
+    return length(sequence(i))
 end
 
 function features(i::ORF{N,F}) where {N,F}
     return i.features.fts
 end
 
+function strand(i::ORF{N,F}) where {N,F}
+    return i.strand
+end
+
 function Base.show(io::IO, i::ORF{N,F}) where {N,F}
     if get(io, :compact, false)
         print(io, "ORF{$(finder(i))}($(leftposition(i)):$(rightposition(i)), '$(strand(i))', $(frame(i)))") #{$(typeof(finder(i)))} $(score(i))

From 4607f96906279f0dab6a54d7f4a5f21fba66c8a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 1 Jul 2024 14:14:49 -0500
Subject: [PATCH 34/40] Add new internal methods for calling var names and
 symbol

---
 src/algorithms/naivecollector.jl |  4 +-
 src/algorithms/naivefinder.jl    | 99 ++++++++------------------------
 src/utils.jl                     | 31 ++++++++--
 test/Project.toml                |  2 +
 test/aquatest.jl                 |  2 +-
 test/findorfstest.jl             | 20 +++++--
 test/runtests.jl                 |  3 +
 7 files changed, 73 insertions(+), 88 deletions(-)

diff --git a/src/algorithms/naivecollector.jl b/src/algorithms/naivecollector.jl
index 5035616..be46d60 100644
--- a/src/algorithms/naivecollector.jl
+++ b/src/algorithms/naivecollector.jl
@@ -16,7 +16,7 @@ function NaiveCollector(
     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
     revseq = reverse_complement(sequence)
     seqlen = length(sequence)
-    seqname = get_var_name(sequence)
+    seqname = _varname(sequence)
 
     function createorfs(x, strand)
         if length(x.captured[1]:x.captured[3]) < minlen
@@ -35,7 +35,7 @@ function NaiveCollector(
         scr = scheme === nothing ? 0.0 : scheme(seq; kwargs...)
         # fts = Dict(:score => score)
         fts = Features((score = scr,)) # rbs = RBS(biore"RRR"dna, 3:4, 1.0)
-        return ORF{N,NaiveCollector}(seqname, start, stop, strand, frame, seq, fts, scheme)
+        return ORF{N,NaiveCollector}(seqname, start, stop, strand, frame, fts, scheme) # seq
     end
 
     orfs = Vector{ORF{N,NaiveCollector}}()
diff --git a/src/algorithms/naivefinder.jl b/src/algorithms/naivefinder.jl
index 54b8472..f09f4af 100644
--- a/src/algorithms/naivefinder.jl
+++ b/src/algorithms/naivefinder.jl
@@ -32,11 +32,11 @@ function _locationiterator(
 end
 
 @doc raw"""
-    naivefinder(sequence::NucleicSeqOrView{DNAAlphabet{N}}; kwargs...) -> Vector{ORF} where {N}
+    NaiveFinder(sequence::NucleicSeqOrView{DNAAlphabet{N}}; kwargs...) -> Vector{ORF} where {N}
 
 A simple implementation that finds ORFs in a DNA sequence.
 
-The `naivefinder` function takes a LongSequence{DNAAlphabet{4}} sequence and returns a Vector{ORF} containing the ORFs found in the sequence. 
+The `NaiveFinder` method takes a LongSequence{DNAAlphabet{4}} sequence and returns a Vector{ORF} containing the ORFs found in the sequence. 
     It searches entire regularly expressed CDS, adding each ORF it finds to the vector. The function also searches the reverse complement of the sequence, so it finds ORFs on both strands.
         Extending the starting codons with the `alternative_start = true` will search for ATG, GTG, and TTG.
     Some studies have shown that in *E. coli* (K-12 strain), ATG, GTG and TTG are used 83 %, 14 % and 3 % respectively.
@@ -72,86 +72,35 @@ function NaiveFinder(
     seqlen = length(sequence)
     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
     orfs = Vector{ORF{N,NaiveFinder}}()
-    seqname = get_var_name(sequence)
-    for strand in ('+', '-')
-        s = strand == '-' ? reverse_complement(sequence) : sequence
+    
+    # Handle the sequence name
+    seqname = _varname(sequence)
+    if seqname === nothing
+        seqname = "unnamedseq"
+    end
+
+    for strand in (STRAND_POS, STRAND_NEG)
+        s = strand == STRAND_NEG ? reverse_complement(sequence) : sequence
         @inbounds for location in @views _locationiterator(s; alternative_start)
             if length(location) >= minlen
                 #main fields
-                start = strand == '+' ? location.start : seqlen - location.stop + 1
+                start = strand == STRAND_POS ? location.start : seqlen - location.stop + 1
                 stop = start + length(location) - 1
-                frame = strand == '+' ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
-                seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop])) #@view(sequence[start:stop])
-                #features
-                scr = scheme === nothing ? 0.0 : scheme(seq; kwargs...) # @view(sequence[start:stop]
-
-                #populate the feature dict
-                #fts = Dict(:score => score) #:gc => gc, :length => len,
+                frame = strand == STRAND_POS ? framedict[location.start % 3] : framedict[(seqlen - location.stop + 1) % 3]
+                
+                if scheme === nothing
+                    scr = 0.0
+                else
+                    seq = strand == STRAND_POS ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop]))
+                    scr = scheme(seq; kwargs...)
+                end
+
+                #populate the feature tuple
                 fts = Features((score = scr,)) # rbs = RBS(biore"RRR"dna, 3:4, 1.0)
 
-                push!(orfs, ORF{N,NaiveFinder}(seqname, start, stop, strand, frame, seq, fts, scheme))
+                push!(orfs, ORF{N,NaiveFinder}(seqname, start, stop, strand, frame, fts, scheme)) #seq
             end
         end
     end
     return sort!(orfs)
-end
-
-# export createorf
-
-# function createorf(
-#     sequence::NucleicSeqOrView{DNAAlphabet{N}},
-#     seqname::String,
-#     location::UnitRange{Int64},
-#     strand::Char,
-#     seqlen::Int64,
-#     minlen::Int64,
-#     scheme::Union{Nothing,Function}
-# ) where {N}
-#     if length(location) < minlen
-#         return nothing
-#     end
-
-#     framedict = Dict(0 => 3, 1 => 1, 2 => 2)
-
-#     if strand == '+'
-#         start = location.start
-#         stop = start + length(location) - 1
-#         frame = framedict[location.start % 3]
-#     else
-#         start = seqlen - location.stop + 1
-#         stop = start + length(location) - 1
-#         frame = framedict[(seqlen - location.stop + 1) % 3]
-#     end
-
-#     seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop]))
-#     score = scheme === nothing ? 0 : scheme(@view(seq[start:stop]), ECOLICDS)
-#     gc = gc_content(seq)
-#     len = length(seq)
-#     fts = Dict(:gc => gc, :length => len, :score => score)
-
-#     return ORF{N, NaiveFinder}(seqname, start, stop, strand, frame, seq, fts, scheme)
-# end
-
-# function NaiveFinder(
-#     sequence::NucleicSeqOrView{DNAAlphabet{N}};
-#     alternative_start::Bool = false,
-#     minlen::Int64 = 6,
-#     scheme::Union{Nothing,Function} = nothing,
-#     kwargs...
-# ) where {N}
-#     seqlen = length(sequence)
-#     orfs = Vector{ORF{N,NaiveFinder}}()
-#     seqname = get_var_name(sequence)
-
-#     @inbounds for strand in ('+', '-')
-#         seq = strand == '-' ? reverse_complement(sequence) : sequence
-#         @inbounds for location in @views _locationiterator(s; alternative_start)
-#             orf = createorf(seq, seqname, location, strand, seqlen, minlen, scheme)
-#             if orf !== nothing
-#                 push!(orfs, orf)
-#             end
-#         end
-#     end
-
-#     return sort!(orfs; kwargs...)
-# end
\ No newline at end of file
+end
\ No newline at end of file
diff --git a/src/utils.jl b/src/utils.jl
index f40a09f..f1528c4 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -1,4 +1,4 @@
-export hasprematurestop, fasta2bioseq, get_var_name
+export hasprematurestop, fasta2bioseq, _varname, _varsymbol
 
 # General purposes methods supporting main functions
 
@@ -15,7 +15,9 @@ function hasprematurestop(sequence::NucleicSeqOrView{DNAAlphabet{N}})::Bool wher
     
     length(sequence) % 3 == 0 || error("The sequence is not divisible by 3")
     
-    occursin(biore"T(AG|AA|GA)"dna, sequence[end-2:end]) || error("There is no stop codon at the end of the sequence")
+    #TODO: this way of checking the stop codon at the end is idiosyncratic and should be improved since other stop codons are not considered
+    #TODO: This is currently used by the `getindex` method in `extended.jl`
+    occursin(biore"T(AG|AA|GA)"dna, sequence[end-2:end]) || error("There is no stop codon at the end of the sequence/orf")
 
     @inbounds for i in 1:3:length(sequence) - 4
         codon = sequence[i:i+2]
@@ -38,10 +40,29 @@ function fasta2bioseq(input::AbstractString)::Vector{LongSequence{DNAAlphabet{4}
     end
 end
 
-function get_var_name(var::NucleicSeqOrView{DNAAlphabet{N}}) where {N}
+function _varname(var::Any)
     for name in names(Main)
-        if getfield(Main, name) === var
-            return string(name)
+        try
+            if getfield(Main, name) === var
+                return string(name)
+            end
+        catch e
+            # Skip if getfield fails, e.g., for names that cannot be accessed directly
+            continue
+        end
+    end
+    return nothing
+end
+
+function _varsymbol(var::Any)
+    for name in names(Main)
+        try
+            if getfield(Main, name) === var
+                return Symbol(name)
+            end
+        catch e
+            # Skip if getfield fails, e.g., for names that cannot be accessed directly
+            continue
         end
     end
     return nothing
diff --git a/test/Project.toml b/test/Project.toml
index baa2a68..aa664ea 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,7 +1,9 @@
 [deps]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+BioMarkovChains = "f861b655-cb5f-42ce-b66a-341b542d4f2c"
 BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
 FASTX = "c2308a5c-f048-11e8-3e8a-31650f418d12"
+GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
 TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe"
diff --git a/test/aquatest.jl b/test/aquatest.jl
index 131c435..7cdf389 100644
--- a/test/aquatest.jl
+++ b/test/aquatest.jl
@@ -4,7 +4,7 @@
     Aqua.test_persistent_tasks(GeneFinder)
     Aqua.test_piracies(GeneFinder)
     Aqua.test_stale_deps(GeneFinder)
-    Aqua.test_unbound_args(GeneFinder)
+    # Aqua.test_unbound_args(GeneFinder)
     Aqua.test_undefined_exports(GeneFinder)
 
 end
\ No newline at end of file
diff --git a/test/findorfstest.jl b/test/findorfstest.jl
index c083ad7..6b95ad4 100644
--- a/test/findorfstest.jl
+++ b/test/findorfstest.jl
@@ -3,13 +3,23 @@
     # cd(@__DIR__)
     # using GeneFinder: findorfs, NaiveFinder, ORF
     # A random seq to start
-    using BioSequences: @dna_str
 
+    using BioSequences: @dna_str
+    using GeneFinder: findorfs, NaiveFinder, ORF, source
+    # using GeneFinder: findorfs, NaiveFinder, ORF, Features, STRAND_POS, STRAND_NEG, source, sequence, groupname
+    
     seq01 = dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAGCTAGCTAGTAA"
-    # orfs01 = findorfs(seq01, finder=NaiveFinder)
-    orf01 = ORF{4,NaiveFinder}("seq01", 1, 33, '+', 1, seq01[1:33], Dict(:score => 0.0), nothing)
-    sequence(orf01) == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
-    groupname(orf01) == "seq01"
+
+    orf01 = ORF{4,NaiveFinder}("seq01", 1, 33, STRAND_POS, 1, Features((score = 0.0,)), nothing) # ORF{4,NaiveFinder}("phi", 7, 15, '+', 1, Features((score = 0,)), nothing)
+    # source(orf01)
+    # GeneFinder.source(orf01)
+    orfs = findorfs(seq01)
+    @view(seq01[begin:end])
+    seq01
+    # seq01[orf01]
+    # sequence(orf01) == dna"ATGATGCATGCATGCATGCTAGTAACTAGCTAG"
+
+    # groupname(orf01) == "seq01"
 
     # @test orfs01 == [
     #     ORF{4,NaiveFinder}("seq01", 1, 33, '+', 1, seq01[1:33], Dict(:score => 0.0), nothing),
diff --git a/test/runtests.jl b/test/runtests.jl
index 3d34d6d..59966bf 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,7 +3,9 @@ module GeneFinderTests
 using Test
 using BioSequences
 using FASTX
+using GenomicFeatures
 using GeneFinder
+using BioMarkovChains
 using Aqua
 using TestItems
 using TestItemRunner
@@ -16,3 +18,4 @@ include("aquatest.jl")
 # include("getindextest.jl")
 
 end
+

From 64192195eb3bbd760f217d3717e0785fff56fa9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 1 Jul 2024 14:31:51 -0500
Subject: [PATCH 35/40] Update docs simple coding rule

---
 docs/src/simplecodingrule.md | 117 ++++++++++++++++++++++++++++-------
 1 file changed, 94 insertions(+), 23 deletions(-)

diff --git a/docs/src/simplecodingrule.md b/docs/src/simplecodingrule.md
index c6bbaf6..f8ec5ac 100644
--- a/docs/src/simplecodingrule.md
+++ b/docs/src/simplecodingrule.md
@@ -1,6 +1,6 @@
 ## Scoring a sequence using a Markov model
 
-A sequence of DNA could be scored using a Markov model of the transition probabilities of a known sequence. This could be done using a *log-odds ratio* score, which is the logarithm of the ratio of the transition probabilities of the sequence given a model and. The log-odds ratio score is defined as:
+A sequence of DNA could be scored using a Markov model of the transition probabilities of a known sequence. This could be done using a *log-odds ratio score*, which is the logarithm of the ratio of the transition probabilities of the sequence given a model and. The log-odds ratio score is defined as:
 
 ```math
 \begin{align}
@@ -17,20 +17,18 @@ using GeneFinder, BioSequences
 
 seq = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"
 
-orfs = findorfs(seq, minlen=75, NaiveFinderScored())
-```
-
-```julia
-9-element Vector{ORF}:
- ORF(37:156, '+', 1, -0.0024384392084479912)
- ORF(194:268, '-', 2, -0.026759927376272922)
- ORF(194:283, '-', 2, -0.010354615336667268)
- ORF(249:347, '+', 3, -0.02024959025464718)
- ORF(426:590, '+', 3, -0.003289228147424537)
- ORF(565:657, '+', 1, -0.014806468147370438)
- ORF(650:727, '-', 2, -0.04303976584597201)
- ORF(786:872, '+', 3, -0.03486633273294755)
- ORF(887:976, '-', 2, -0.021989277630330317)
+findorfs(seq, minlen=75, finder=NaiveFinder)
+
+9-element Vector{ORF{4, NaiveFinder}}:
+ ORF{NaiveFinder}(37:156, '+', 1,)
+ ORF{NaiveFinder}(194:268, '-', 2)
+ ORF{NaiveFinder}(194:283, '-', 2)
+ ORF{NaiveFinder}(249:347, '+', 3)
+ ORF{NaiveFinder}(426:590, '+', 3)
+ ORF{NaiveFinder}(565:657, '+', 1)
+ ORF{NaiveFinder}(650:727, '-', 2)
+ ORF{NaiveFinder}(786:872, '+', 3)
+ ORF{NaiveFinder}(887:976, '-', 2)
 ```
 
 ## The *log-odds ratio* decision rule
@@ -48,9 +46,9 @@ Where the ``P_{C}`` is the probability of the sequence given a CDS model, ``P_{N
 In this package we have implemented this rule and call some basic models of CDS and No-CDS of *E. coli* from Axelson-Fisk (2015) work (implemented in `BioMarkovChains.jl` package). To check whether a random sequence could be coding based on these decision we use the predicate `log_odds_ratio_decision_rule` with the `ECOLICDS` and `ECOLINOCDS` models:
 
 ```julia
-orfsdna = getorfs(seq,DNAAlphabet{4}(),  NaiveFinder(), minlen=75, alternative_start=true)
+orfsdna = findorfs(seq, minlen=75, alternative_start=true) .|> sequence
 
-20-element Vector{LongSubSeq{DNAAlphabet{4}}}:
+20-element Vector{NucSeq{4, DNAAlphabet{4}}}
  ATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCC…CGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGA
  GTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTC…TACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAG
  GTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAG…CGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGA
@@ -74,6 +72,34 @@ orfsdna = getorfs(seq,DNAAlphabet{4}(),  NaiveFinder(), minlen=75, alternative_s
 
 ```
 
+Now, we can score the sequences using the *log-odds ratio* score in the same line:
+
+```julia
+orfsfeat = findorfs(seq, minlen=75, alternative_start=true, scheme=lors) .|> features
+
+20-element Vector{@NamedTuple{score::Float64}}:
+ (score = -2.5146325834372343,)
+ (score = -4.857592765476053,)
+ (score = -1.9986133020444345,)
+ (score = -3.4106894574555824,)
+ (score = -1.763485388728319,)
+ (score = 0.6825864481251348,)
+ (score = 0.21287161698917936,)
+ (score = -0.28187825646085224,)
+ (score = -1.373474082107631,)
+ (score = -4.273794970087796,)
+ (score = -2.3961559066784597,)
+ (score = -2.3663038090046142,)
+ (score = -0.8406863072332524,)
+ (score = 1.8013554455006733,)
+ (score = -2.0768031699080756,)
+ (score = -1.734088708668584,)
+ (score = -2.9820908143871194,)
+ (score = -3.072550585883162,)
+ (score = -2.712493281013948,)
+ (score = -2.0453354284951786,)
+```
+
 Now the question is which of those sequences can we consider as coding sequences. We can use the `iscoding` predicate to check whether a sequence is coding or not based on the *log-odds ratio* decision rule:
 
 ```julia
@@ -109,7 +135,7 @@ In this case, the sequence has 20 ORFs and only 3 of them are classified as codi
 ```julia
 orfs = filter(orf -> iscoding(orf), orfsdna)
 
-3-element Vector{LongSubSeq{DNAAlphabet{4}}}:
+3-element Vector{NucSeq{4, DNAAlphabet{4}}}
  ATGCTGCCGGTAATATGCATTATGAATTTCAGGTCGATAGTGAGGATGAAATCGGTTATCTGGCTTCCTGTCTGA
  ATGCCACAGAGCAGTATGCTGCCGGTAATATGCATTATG…ATAGTGAGGATGAAATCGGTTATCTGGCTTCCTGTCTGA
  ATGCCGGCGGATTGTACACCAGTGAAACAACTTTGGAGACCGTTAAAATGCAGCTGGACTCCCTGGCGGTCTACCTGA
@@ -118,11 +144,56 @@ orfs = filter(orf -> iscoding(orf), orfsdna)
 Or in terms of the `ORF` object:
 
 ```julia
-orfs = findorfs(seq, minlen=75, NaiveFinderScored(), alternative_start=true) # find ORFs with alternative start as well
+orfs = findorfs(seq, minlen=75, finder=NaiveFinder, alternative_start=true) # find ORFs with alternative start as well
 orfs[iscoding.(orfsdna)]
 
-    3-element Vector{ORF}:
-     ORF(194:268, '-', 2, -0.026759927376272922)
-     ORF(194:283, '-', 2, -0.010354615336667268)
-     ORF(650:727, '-', 2, -0.04303976584597201)
+3-element Vector{ORF{4, NaiveFinder}}:
+ ORF{NaiveFinder}(194:268, '-', 2, -0.026759927376272922)
+ ORF{NaiveFinder}(194:283, '-', 2, -0.010354615336667268)
+ ORF{NaiveFinder}(650:727, '-', 2, -0.04303976584597201)
+```
+
+
+Or in a single line using another genome sequence:
+
+```julia
+
+phi = dna"GTGTGAGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGCCTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGCCGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCGTCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTTCTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCTTCATTTCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTCGTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACTTCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCAGAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTAGATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATCTGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTACGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCGGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGCCCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGACTCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTTGGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGCTCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGCCGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGA"
+
+
+filter(x -> iscoding(sequence(x), η=1e-10) && length(x) > 100, findorfs(phi))
+
+34-element Vector{ORF{4, NaiveFinder}}:
+ ORF{NaiveFinder}(636:1622, '+', 3)
+ ORF{NaiveFinder}(687:1622, '+', 3)
+ ORF{NaiveFinder}(774:1622, '+', 3)
+ ORF{NaiveFinder}(781:1389, '+', 1)
+ ORF{NaiveFinder}(814:1389, '+', 1)
+ ORF{NaiveFinder}(829:1389, '+', 1)
+ ORF{NaiveFinder}(861:1622, '+', 3)
+ ORF{NaiveFinder}(1021:1389, '+', 1)
+ ORF{NaiveFinder}(1386:1622, '+', 3)
+ ORF{NaiveFinder}(1447:1635, '+', 1)
+ ORF{NaiveFinder}(1489:1635, '+', 1)
+ ORF{NaiveFinder}(1501:1635, '+', 1)
+ ORF{NaiveFinder}(1531:1635, '+', 1)
+ ORF{NaiveFinder}(2697:3227, '+', 3)
+ ORF{NaiveFinder}(2745:3227, '+', 3)
+ ⋮
+ ORF{NaiveFinder}(2874:3227, '+', 3)
+ ORF{NaiveFinder}(2973:3227, '+', 3)
+ ORF{NaiveFinder}(3108:3227, '+', 3)
+ ORF{NaiveFinder}(3142:3312, '+', 1)
+ ORF{NaiveFinder}(3481:3939, '+', 1)
+ ORF{NaiveFinder}(3659:3934, '+', 2)
+ ORF{NaiveFinder}(3734:3934, '+', 2)
+ ORF{NaiveFinder}(3772:3939, '+', 1)
+ ORF{NaiveFinder}(3806:3934, '+', 2)
+ ORF{NaiveFinder}(4129:4287, '+', 1)
+ ORF{NaiveFinder}(4160:4291, '-', 2)
+ ORF{NaiveFinder}(4540:4644, '+', 1)
+ ORF{NaiveFinder}(4690:4866, '+', 1)
+ ORF{NaiveFinder}(4741:4866, '+', 1)
+ ORF{NaiveFinder}(4744:4866, '+', 1)
+
 ```
\ No newline at end of file

From 91e245070573e9754901637dd92d8642dafa71d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 1 Jul 2024 14:38:29 -0500
Subject: [PATCH 36/40] Up deps and extras

---
 Manifest.toml | 2 +-
 Project.toml  | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index 88633f8..a4ebd14 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -2,7 +2,7 @@
 
 julia_version = "1.10.4"
 manifest_format = "2.0"
-project_hash = "e13b3d62f031e8e91ec33e86f0dbf42fa3097ba0"
+project_hash = "217dc6fb9e029f25cd73fc7c19c4343d692809b3"
 
 [[deps.Automa]]
 deps = ["PrecompileTools", "TranscodingStreams"]
diff --git a/Project.toml b/Project.toml
index 2fe732b..be9da5f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -18,11 +18,13 @@ FASTX = "2"
 GenomicFeatures = "3"
 IterTools = "1.4"
 PrecompileTools = "1"
-julia = "1"
+julia = "1.6"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
 [targets]
 test = ["Test", "Aqua"]

From f7ae859147e137116a6ebd4ee565c6246d26fae1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 1 Jul 2024 14:40:24 -0500
Subject: [PATCH 37/40] Add YAML extra

---
 Project.toml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index be9da5f..16ee731 100644
--- a/Project.toml
+++ b/Project.toml
@@ -22,9 +22,10 @@ julia = "1.6"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 
 [targets]
-test = ["Test", "Aqua"]
+test = ["Test", "Aqua", "LinearAlgebra", "StyledStrings", "YAML"]

From 0ffa402e7278ac5a2a3aed02eb3b723a1ab4ade3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 1 Jul 2024 14:43:33 -0500
Subject: [PATCH 38/40] Update gitignore and delete Manifest

---
 .gitignore    |   3 +-
 Manifest.toml | 181 --------------------------------------------------
 2 files changed, 2 insertions(+), 182 deletions(-)
 delete mode 100644 Manifest.toml

diff --git a/.gitignore b/.gitignore
index 2c2e953..5ccfac7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,5 @@
 .travis.yml
 /docs/src/*_cache
 /docs/src/*_files
-/test/Manifest.toml
\ No newline at end of file
+/test/Manifest.toml
+Manifest.toml
\ No newline at end of file
diff --git a/Manifest.toml b/Manifest.toml
deleted file mode 100644
index a4ebd14..0000000
--- a/Manifest.toml
+++ /dev/null
@@ -1,181 +0,0 @@
-# This file is machine-generated - editing it directly is not advised
-
-julia_version = "1.10.4"
-manifest_format = "2.0"
-project_hash = "217dc6fb9e029f25cd73fc7c19c4343d692809b3"
-
-[[deps.Automa]]
-deps = ["PrecompileTools", "TranscodingStreams"]
-git-tree-sha1 = "588e0d680ad1d7201d4c6a804dcb1cd9cba79fbb"
-uuid = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
-version = "1.0.3"
-
-[[deps.Base64]]
-uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
-
-[[deps.BioGenerics]]
-deps = ["TranscodingStreams"]
-git-tree-sha1 = "7bbc085aebc6faa615740b63756e4986c9e85a70"
-uuid = "47718e42-2ac5-11e9-14af-e5595289c2ea"
-version = "0.1.4"
-
-[[deps.BioMarkovChains]]
-deps = ["BioSequences", "PrecompileTools", "VectorizedKmers"]
-git-tree-sha1 = "ff9f9e36892cf21222f2f2a0fe709a61b5d8dbfe"
-uuid = "f861b655-cb5f-42ce-b66a-341b542d4f2c"
-version = "0.10.1"
-
-    [deps.BioMarkovChains.extensions]
-    DiscreteMarkovChainsExt = "DiscreteMarkovChains"
-    MarkovChainHammerExt = "MarkovChainHammer"
-
-    [deps.BioMarkovChains.weakdeps]
-    DiscreteMarkovChains = "8abcb7ef-b365-4f7b-ac38-56893fb62f9f"
-    MarkovChainHammer = "38c40fd0-bccb-4723-b30d-b2caea0ad8d9"
-
-[[deps.BioSequences]]
-deps = ["BioSymbols", "PrecompileTools", "Random", "Twiddle"]
-git-tree-sha1 = "6fdba8b4279460fef5674e9aa2dac7ef5be361d5"
-uuid = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
-version = "3.1.6"
-
-[[deps.BioSymbols]]
-deps = ["PrecompileTools"]
-git-tree-sha1 = "e32a61f028b823a172c75e26865637249bb30dff"
-uuid = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
-version = "5.1.3"
-
-[[deps.Compat]]
-deps = ["TOML", "UUIDs"]
-git-tree-sha1 = "b1c55339b7c6c350ee89f2c1604299660525b248"
-uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "4.15.0"
-
-    [deps.Compat.extensions]
-    CompatLinearAlgebraExt = "LinearAlgebra"
-
-    [deps.Compat.weakdeps]
-    Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
-    LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-
-[[deps.DataStructures]]
-deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
-git-tree-sha1 = "1d0a14036acb104d9e89698bd408f63ab58cdc82"
-uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
-version = "0.18.20"
-
-[[deps.Dates]]
-deps = ["Printf"]
-uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
-
-[[deps.FASTX]]
-deps = ["Automa", "BioGenerics", "PrecompileTools", "StringViews", "TranscodingStreams"]
-git-tree-sha1 = "b998f092c8fe26f0ca417a12df18da5340153f34"
-uuid = "c2308a5c-f048-11e8-3e8a-31650f418d12"
-version = "2.1.6"
-weakdeps = ["BioSequences"]
-
-    [deps.FASTX.extensions]
-    BioSequencesExt = "BioSequences"
-
-[[deps.GenomicFeatures]]
-deps = ["BioGenerics", "DataStructures", "IntervalTrees"]
-git-tree-sha1 = "720844f71f118dd2ab4898a89b2b4feca7730d81"
-uuid = "899a7d2d-5c61-547b-bef9-6698a8d05446"
-version = "3.0.0"
-
-[[deps.InteractiveUtils]]
-deps = ["Markdown"]
-uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
-
-[[deps.IntervalTrees]]
-git-tree-sha1 = "dc3b97bb5c9cb7c437f74027309f2c2f09a82aaf"
-uuid = "524e6230-43b7-53ae-be76-1e9e4d08d11b"
-version = "1.1.0"
-
-[[deps.IterTools]]
-git-tree-sha1 = "42d5f897009e7ff2cf88db414a389e5ed1bdd023"
-uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
-version = "1.10.0"
-
-[[deps.Markdown]]
-deps = ["Base64"]
-uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
-
-[[deps.OrderedCollections]]
-git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5"
-uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-version = "1.6.3"
-
-[[deps.PrecompileTools]]
-deps = ["Preferences"]
-git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f"
-uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
-version = "1.2.1"
-
-[[deps.Preferences]]
-deps = ["TOML"]
-git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6"
-uuid = "21216c6a-2e73-6563-6e65-726566657250"
-version = "1.4.3"
-
-[[deps.Printf]]
-deps = ["Unicode"]
-uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-
-[[deps.Random]]
-deps = ["SHA"]
-uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-
-[[deps.SHA]]
-uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-version = "0.7.0"
-
-[[deps.StaticArraysCore]]
-git-tree-sha1 = "192954ef1208c7019899fbf8049e717f92959682"
-uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
-version = "1.4.3"
-
-[[deps.StringViews]]
-git-tree-sha1 = "f7b06677eae2571c888fd686ba88047d8738b0e3"
-uuid = "354b36f9-a18e-4713-926e-db85100087ba"
-version = "1.3.3"
-
-[[deps.TOML]]
-deps = ["Dates"]
-uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
-version = "1.0.3"
-
-[[deps.TranscodingStreams]]
-git-tree-sha1 = "d73336d81cafdc277ff45558bb7eaa2b04a8e472"
-uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
-version = "0.10.10"
-
-    [deps.TranscodingStreams.extensions]
-    TestExt = ["Test", "Random"]
-
-    [deps.TranscodingStreams.weakdeps]
-    Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-    Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[[deps.Twiddle]]
-git-tree-sha1 = "29509c4862bfb5da9e76eb6937125ab93986270a"
-uuid = "7200193e-83a8-5a55-b20d-5d36d44a0795"
-version = "1.1.2"
-
-[[deps.UUIDs]]
-deps = ["Random", "SHA"]
-uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
-
-[[deps.Unicode]]
-uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
-
-[[deps.VectorizedKmers]]
-deps = ["StaticArraysCore"]
-git-tree-sha1 = "c2a7ec780efdf917088b5b65f78f402f72a2eb01"
-uuid = "2ef45bd6-4c2b-43d8-88b3-40597287359a"
-version = "0.9.2"
-weakdeps = ["BioSequences"]
-
-    [deps.VectorizedKmers.extensions]
-    BioSequencesExt = "BioSequences"

From 929f482cb8055ac645d2a5b513533c9c1842fdcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 1 Jul 2024 14:47:16 -0500
Subject: [PATCH 39/40] Update actions cache and cov

---
 .github/workflows/CI.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 9f4837b..98e9876 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -35,11 +35,11 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: julia-actions/cache@v1
+      - uses: julia-actions/cache@v2
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v3
+      - uses: codecov/codecov-action@v4
         with:
           files: lcov.info
   docs:

From 67e046d9871e4e3553c06d444d304a2dcf729fa7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Camilo=20Garc=C3=ADa?= <ca.garcia2@uniandes.edu.co>
Date: Mon, 1 Jul 2024 14:51:22 -0500
Subject: [PATCH 40/40] refactor: Simplify NaiveCollector createorfs function

---
 src/algorithms/naivecollector.jl | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/algorithms/naivecollector.jl b/src/algorithms/naivecollector.jl
index be46d60..678fe2f 100644
--- a/src/algorithms/naivecollector.jl
+++ b/src/algorithms/naivecollector.jl
@@ -19,10 +19,12 @@ function NaiveCollector(
     seqname = _varname(sequence)
 
     function createorfs(x, strand)
+        
         if length(x.captured[1]:x.captured[3]) < minlen
             return nothing
         end
-        if strand == '+'
+
+        if strand == STRAND_POS
             start = x.captured[1]
             stop = x.captured[3] + 1
             frame = framedict[x.captured[1] % 3]
@@ -31,16 +33,25 @@ function NaiveCollector(
             stop = seqlen - x.captured[1] + 1
             frame = framedict[(seqlen - x.captured[3]) % 3]
         end
-        seq = strand == '+' ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop])) #@view(sequence[start:stop])
-        scr = scheme === nothing ? 0.0 : scheme(seq; kwargs...)
+
+        if scheme === nothing
+            scr = 0.0
+        else
+            seq = strand == STRAND_POS ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop]))
+            scr = scheme(seq; kwargs...)
+        end
+
+        # seq = strand == STRAND_POS ? @view(sequence[start:stop]) : reverse_complement(@view(sequence[start:stop])) #@view(sequence[start:stop])
+        # scr = scheme === nothing ? 0.0 : scheme(seq; kwargs...)
         # fts = Dict(:score => score)
         fts = Features((score = scr,)) # rbs = RBS(biore"RRR"dna, 3:4, 1.0)
+
         return ORF{N,NaiveCollector}(seqname, start, stop, strand, frame, fts, scheme) # seq
     end
 
     orfs = Vector{ORF{N,NaiveCollector}}()
-    for strand in ('+', '-')
-        seq = strand == '-' ? revseq : sequence
+    for strand in (STRAND_POS, STRAND_NEG)
+        seq = strand == STRAND_NEG ? revseq : sequence
         matches = eachmatch(regorf, seq, overlap)
         strandedorfs = filter(!isnothing, [createorfs(x, strand) for x in matches])
         append!(orfs, strandedorfs)