Skip to content

Commit

Permalink
Merge pull request #32 from camilogarciabotero/iscoding
Browse files Browse the repository at this point in the history
Improve interface for further methods
  • Loading branch information
camilogarciabotero authored May 13, 2024
2 parents a298442 + ff1da7b commit de99586
Show file tree
Hide file tree
Showing 18 changed files with 438 additions and 317 deletions.
10 changes: 5 additions & 5 deletions Manifest.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file is machine-generated - editing it directly is not advised

julia_version = "1.10.2"
julia_version = "1.10.3"
manifest_format = "2.0"
project_hash = "b881dd48d80d6b19dfa1756a0b2d6e2e42d93e33"

Expand Down Expand Up @@ -48,9 +48,9 @@ uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"

[[deps.FASTX]]
deps = ["Automa", "BioGenerics", "PrecompileTools", "StringViews", "TranscodingStreams"]
git-tree-sha1 = "bff5d62bf5e1c382a370ac701bcaea9a24115ac6"
git-tree-sha1 = "24ce37a228990be0cb69b3a2dbcfb656f32fc679"
uuid = "c2308a5c-f048-11e8-3e8a-31650f418d12"
version = "2.1.4"
version = "2.1.5"
weakdeps = ["BioSequences"]

[deps.FASTX.extensions]
Expand Down Expand Up @@ -101,9 +101,9 @@ uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
version = "1.0.3"

[[deps.TranscodingStreams]]
git-tree-sha1 = "71509f04d045ec714c4748c785a59045c3736349"
git-tree-sha1 = "5d54d076465da49d6746c647022f3b3674e64156"
uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
version = "0.10.7"
version = "0.10.8"

[deps.TranscodingStreams.extensions]
TestExt = ["Test", "Random"]
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
[![GitHub Actions](https://github.com/camilogarciabotero/GeneFinder.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/camilogarciabotero/GeneFinder.jl/actions/workflows/CI.yml)
[![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/camilogarciabotero/GeneFinder.jl/blob/main/LICENSE)
[![Repo Status](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip)
[![Downloads](https://shields.io/endpoint?url=https://pkgs.genieframework.com/api/v1/badge/GeneFinder&label=downloads)](https://pkgs.genieframework.com?packages=GeneFinder)
[![Downloads](https://img.shields.io/badge/dynamic/json?url=http%3A%2F%2Fjuliapkgstats.com%2Fapi%2Fv1%2Fmonthly_downloads%2FGeneFinder&query=total_requests&suffix=%2Fmonth&label=Downloads)](http://juliapkgstats.com/pkg/GeneFinder)
[![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl)

</div>
Expand Down Expand Up @@ -50,7 +50,7 @@ seq = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCA
Now lest us find the ORFs

```julia
findorfs(seq)
findorfs(seq, NaiveFinder())

12-element Vector{ORF}:
ORF(29:40, '+', 2, 0.0)
Expand All @@ -67,10 +67,10 @@ findorfs(seq)
ORF(695:706, '+', 2, 0.0)
```

Two other functions (`get_orfs_dna` and `get_orfs_aa`) are implemented to get the ORFs in DNA and amino acid sequences, respectively. They use the `findorfs` function to first get the ORFs and then get the correspondance array of `BioSequence` objects.
Two other methods where implemented into `getorfs` to get the ORFs in DNA or aminoacid sequences, respectively. They use the `findorfs` function to first get the ORFs and then get the correspondance array of `BioSequence` objects.

```julia
get_orfs_dna(seq)
getorfs(seq, DNAAlphabet{4}(), NaiveFinder())

12-element Vector{LongSubSeq{DNAAlphabet{4}}}:
ATGCAACCCTGA
Expand Down Expand Up @@ -118,7 +118,7 @@ Once a `BioSequence` object has been instantiated, the `write_orfs_fna` function
outfile = "LFLS01000089.fna"

open(outfile, "w") do io
write_orfs_fna(seq, io)
write_orfs_fna(seq, io, NaiveFinder())
end
```

Expand Down
40 changes: 20 additions & 20 deletions docs/Manifest.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file is machine-generated - editing it directly is not advised

julia_version = "1.10.2"
julia_version = "1.10.3"
manifest_format = "2.0"
project_hash = "e796693959137d83427b789f5054fa800312cae0"

Expand Down Expand Up @@ -91,9 +91,9 @@ version = "0.9.3"

[[deps.Documenter]]
deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "CodecZlib", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "TOML", "Test", "Unicode"]
git-tree-sha1 = "4a40af50e8b24333b9ec6892546d9ca5724228eb"
git-tree-sha1 = "5461b2a67beb9089980e2f8f25145186b6d34f91"
uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
version = "1.3.0"
version = "1.4.1"

[[deps.Downloads]]
deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
Expand All @@ -108,15 +108,15 @@ version = "0.1.10"

[[deps.Expat_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl"]
git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c"
git-tree-sha1 = "1c6317308b9dc757616f0b5cb379db10494443a7"
uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
version = "2.5.0+0"
version = "2.6.2+0"

[[deps.FASTX]]
deps = ["Automa", "BioGenerics", "PrecompileTools", "StringViews", "TranscodingStreams"]
git-tree-sha1 = "bff5d62bf5e1c382a370ac701bcaea9a24115ac6"
git-tree-sha1 = "24ce37a228990be0cb69b3a2dbcfb656f32fc679"
uuid = "c2308a5c-f048-11e8-3e8a-31650f418d12"
version = "2.1.4"
version = "2.1.5"
weakdeps = ["BioSequences"]

[deps.FASTX.extensions]
Expand All @@ -129,7 +129,7 @@ uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
deps = ["BioMarkovChains", "BioSequences", "FASTX", "IterTools", "PrecompileTools"]
path = ".."
uuid = "2bc6ee46-2158-4f5a-a720-91cb2d7cee64"
version = "0.2.0"
version = "0.3.0"

[[deps.Git]]
deps = ["Git_jll"]
Expand All @@ -139,15 +139,15 @@ version = "1.3.1"

[[deps.Git_jll]]
deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"]
git-tree-sha1 = "12945451c5d0e2d0dca0724c3a8d6448b46bbdf9"
git-tree-sha1 = "d18fb8a1f3609361ebda9bf029b60fd0f120c809"
uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb"
version = "2.44.0+1"
version = "2.44.0+2"

[[deps.HTTP]]
deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"]
git-tree-sha1 = "8e59b47b9dc525b70550ca082ce85bcd7f5477cd"
git-tree-sha1 = "d1d712be3164d61d1fb98e7ce9bcbc6cc06b45ed"
uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3"
version = "1.10.5"
version = "1.10.8"

[[deps.IOCapture]]
deps = ["Logging", "Random"]
Expand Down Expand Up @@ -216,9 +216,9 @@ version = "1.17.0+0"

[[deps.LiveServer]]
deps = ["HTTP", "LoggingExtras", "MIMEs", "Pkg", "Sockets", "Test"]
git-tree-sha1 = "24d05efe53436b22a42bf2ae459f47c48b0c2603"
git-tree-sha1 = "1e46b873b8ef176e23ee43f96e72cd45c20bafb4"
uuid = "16fef848-5104-11e9-1b77-fb7a48bbb589"
version = "1.2.7"
version = "1.3.1"

[[deps.Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
Expand Down Expand Up @@ -268,15 +268,15 @@ version = "1.2.0"

[[deps.OpenSSL]]
deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"]
git-tree-sha1 = "af81a32750ebc831ee28bdaaba6e1067decef51e"
git-tree-sha1 = "38cb508d080d21dc1128f7fb04f20387ed4c0af4"
uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c"
version = "1.4.2"
version = "1.4.3"

[[deps.OpenSSL_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl"]
git-tree-sha1 = "60e3045590bd104a16fefb12836c00c0ef8c7f8c"
git-tree-sha1 = "3da7367955dcc5c54c1ba4d402ccdc09a1a3e046"
uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95"
version = "3.0.13+0"
version = "3.0.13+1"

[[deps.PCRE2_jll]]
deps = ["Artifacts", "Libdl"]
Expand Down Expand Up @@ -364,9 +364,9 @@ deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[[deps.TranscodingStreams]]
git-tree-sha1 = "71509f04d045ec714c4748c785a59045c3736349"
git-tree-sha1 = "5d54d076465da49d6746c647022f3b3674e64156"
uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
version = "0.10.7"
version = "0.10.8"
weakdeps = ["Random", "Test"]

[deps.TranscodingStreams.extensions]
Expand Down
9 changes: 1 addition & 8 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,13 @@ Modules = [GeneFinder]
Pages = ["findorfs.jl"]
```

## Finding ORFs using BioRegex
## Finding ORFs using BioRegex and scoring

```@autodocs
Modules = [GeneFinder]
Pages = ["algorithms/naivefinder.jl"]
```

## Finding ORFs and simple scoring

```@autodocs
Modules = [GeneFinder]
Pages = ["algorithms/naivefinderscored.jl"]
```

## Geting ORFs sequences

```@autodocs
Expand Down
4 changes: 2 additions & 2 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
<a href="https://www.repostatus.org/#wip">
<img src="https://www.repostatus.org/badges/latest/wip.svg" alt="Repo Status">
</a>
<a href="https://pkgs.genieframework.com?packages=GeneFinder">
<img src="https://shields.io/endpoint?url=https://pkgs.genieframework.com/api/v1/badge/GeneFinder&label=downloads" alt="Downloads">
<a href="http://juliapkgstats.com/pkg/GeneFinder">
<img src="https://img.shields.io/badge/dynamic/json?url=http%3A%2F%2Fjuliapkgstats.com%2Fapi%2Fv1%2Fmonthly_downloads%2FGeneFinder&query=total_requests&suffix=%2Fmonth&label=Downloads" alt="Downloads">
</a>
<a href="https://github.com/JuliaTesting/Aqua.jl">
<img src="https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg" alt="Aqua QA">
Expand Down
4 changes: 2 additions & 2 deletions docs/src/iodocs.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Once a `BioSequence` object has been instantiated, the `write_orfs_fna` function
outfile = "LFLS01000089.fna"

open(outfile, "w") do io
write_orfs_fna(seq, io)
write_orfs_fna(seq, io, NaiveFinder())
end
```

Expand Down Expand Up @@ -71,7 +71,7 @@ infile = "test/data/NC_001884.fasta"
outfile = "test/data/NC_001884-orfs.faa"

open(inputfile) do io
write_orfs_faa(infile, outfile)
write_orfs_faa(infile, outfile, NaiveFinder())
end
```

Expand Down
8 changes: 4 additions & 4 deletions docs/src/naivefinder.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ seq = dna"AACCAGGGCAATATCAGTACCGCGGGCAATGCAACCCTGACTGCCGGCGGTAACCTGAACAGCACTGGCA
Now lest us find the ORFs

```julia
findorfs(seq)
findorfs(seq, NaiveFinder())

12-element Vector{ORF}:
ORF(29:40, '+', 2, 0.0)
Expand All @@ -58,10 +58,10 @@ findorfs(seq)
ORF(695:706, '+', 2, 0.0)
```

Two other functions (`get_orfs_dna` and `get_orfs_aa`) are implemented to get the ORFs in DNA and amino acid sequences, respectively. They use the `findorfs` function to first get the ORFs and then get the correspondance array of `BioSequence` objects.
Two other methods where implemented into `getorfs` to get the ORFs in DNA or aminoacid sequences, respectively. They use the `findorfs` function to first get the ORFs and then get the correspondance array of `BioSequence` objects.

```julia
get_orfs_dna(seq)
getorfs(seq, DNAAlphabet{4}(), NaiveFinder())

12-element Vector{LongSubSeq{DNAAlphabet{4}}}:
ATGCAACCCTGA
Expand All @@ -79,7 +79,7 @@ get_orfs_dna(seq)
```

```julia
get_orfs_aa(seq)
getorfs(seq, AminoAcidAlphabet(), NaiveFinder())

12-element Vector{LongSubSeq{AminoAcidAlphabet}}:
MQP*
Expand Down
25 changes: 14 additions & 11 deletions docs/src/simplecodingrule.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,20 @@ using GeneFinder, BioSequences

seq = dna"TTCGTCAGTCGTTCTGTTTCATTCAATACGATAGTAATGTATTTTTCGTGCATTTCCGGTGGAATCGTGCCGTCCAGCATAGCCTCCAGATATCCCCTTATAGAGGTCAGAGGGGAACGGAAATCGTGGGATACATTGGCTACAAACTTTTTCTGATCATCCTCGGAACGGGCAATTTCGCTTGCCATATAATTCAGACAGGAAGCCAGATAACCGATTTCATCCTCACTATCGACCTGAAATTCATAATGCATATTACCGGCAGCATACTGCTCTGTGGCATGAGTGATCTTCCTCAGAGGAATATATACGATCTCAGTGAAAAAGATCAGAATGATCAGGGATAGCAGGAACAGGATTGCCAGGGTGATATAGGAAATATTCAGCAGGTTGTTACAGGATTTCTGAATATCATTCATATCAGTATGGATGACTACATAGCCTTTTACCTTGTAGTTGGAGGTAATGGGAGCAAATACAGTAAGTACATCCGAATCAAAATTACCGAAGAAATCACCAACAATGTAATAGGAGCCGCTGGTTACGGTCGAATCAAAATTCTCAATGACAACCACATTCTCCACATCTAAGGGACTATTGGTATCCAGTACCAGTCGTCCGGAGGGATTGATGATGCGAATCTCGGAATTCAGGTAGACCGCCAGGGAGTCCAGCTGCATTTTAACGGTCTCCAAAGTTGTTTCACTGGTGTACAATCCGCCGGCATAGGTTCCGGCGATCAGGGTTGCTTCGGAATAGAGACTTTCTGCCTTTTCCCGGATCAGATGTTCTTTGGTCATATTGGGAACAAAAGTTGTAACAATGATGAAACCAAATACACCAAAAATAAAATATGCGAGTATAAATTTTAGATAAAGTGTTTTTTTCATAACAAATCCTGCTTTTGGTATGACTTAATTACGTACTTCGAATTTATAGCCGATGCCCCAGATGGTGCTGATCTTCCAGTTGGCATGATCCTTGATCTTCTC"

orfs = findorfs(seq, min_len=75, findermethod=naivefinderscored)
orfs = findorfs(seq, min_len=75, NaiveFinderScored())
```

```julia
9-element Vector{ORF}:
ORF(37:156, '+', 1, -0.0024384392084479912)
ORF(194:268, '-', 2, -0.003991702119459298)
ORF(194:283, '-', 2, -0.01431767026931985)
ORF(249:347, '+', 3, -0.02024959025464718)
ORF(426:590, '+', 3, -0.003289228147424537)
ORF(565:657, '+', 1, -0.014806468147370438)
ORF(650:727, '-', 2, -0.009087704913650461)
ORF(786:872, '+', 3, -0.03486633273294755)
ORF(887:976, '-', 2, -0.005778301450517392)
ORF(37:156, '+', 1, 731.0630113217583)
ORF(194:268, '-', 2, 451.39296840114787)
ORF(194:283, '-', 2, 539.7348273169179)
ORF(249:347, '+', 3, 603.7889048870026)
ORF(426:590, '+', 3, 1005.854258504278)
ORF(565:657, '+', 1, 573.9794470785512)
ORF(650:727, '-', 2, 482.1313855694985)
ORF(786:872, '+', 3, 514.7538884163761)
ORF(887:976, '-', 2, 539.2671724379366)
```

## The *log-odds ratio* decision rule
Expand All @@ -45,7 +48,7 @@ Where the ``P_{C}`` is the probability of the sequence given a CDS model, ``P_{N
In this package we have implemented this rule and call some basic models of CDS and No-CDS of *E. coli* from Axelson-Fisk (2015) work (implemented in `BioMarkovChains.jl` package). To check whether a random sequence could be coding based on these decision we use the predicate `isnaivecoding` with the `ECOLICDS` and `ECOLINOCDS` models:

```julia
orfsdna = get_orfs_dna(seq, findermethod=naivefinderscored, min_len=75, alternative_start=true);
orfsdna = getorfs(seq,DNAAlphabet{4}(), NaiveFinderScored(), min_len=75, alternative_start=true)
isnaivecoding.(orfsdna)
```
```julia
Expand Down
5 changes: 3 additions & 2 deletions src/GeneFinder.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ using BioSequences:
DNA,
DNA_Gap, DNA_A, DNA_C, DNA_M, DNA_G, DNA_R, DNA_S, DNA_V, DNA_T, DNA_W, DNA_Y, DNA_H, DNA_K, DNA_D, DNA_B, DNA_N,

Alphabet,
NucleicAcidAlphabet,
DNAAlphabet,
AminoAcidAlphabet,
Expand All @@ -26,9 +27,9 @@ using IterTools: takewhile, iterated
using PrecompileTools: @setup_workload, @compile_workload

include("algorithms/naivefinder.jl")
include("algorithms/naivefinderscored.jl")
include("types.jl")
include("findorfs.jl")
include("iscoding.jl")
include("getorfs.jl")
include("io.jl")
include("utils.jl")
Expand All @@ -41,7 +42,7 @@ include("extended.jl")
@compile_workload begin
# all calls in this block will be precompiled, regardless of whether
# they belong to your package or not (on Julia 1.8 and higher)
findorfs(seq)
findorfs(seq, NaiveFinder())
end
end

Expand Down
Loading

0 comments on commit de99586

Please sign in to comment.