diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..e352c42 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,5 @@ +Thank you for making an issue. +If you are submitting a bug report, it will help us if you include the following information: + +- Your version of Julia and all packages in your activated Julia environment +- A small example that demonstrates the bug. If possible, please make the code copy-pastable into a fresh REPL. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..e9b3e4d --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,7 @@ +Thank you for your contribution! +If you have any questions about your PR, or need help completing it, you can ping the maintainers of this repository, who will be happy to help if they can find time. + +You can optionally use the following checklist when you work on your PR: +- [ ] I have updated any relevant documentation and docstrings. +- [ ] I have added unit tests, and the CodeCov bot shows tests cover my new code. +- [ ] I have mentioned my changes in the CHANGELOG.md file. diff --git a/.github/codecov.yml b/.github/codecov.yml new file mode 100644 index 0000000..6515f07 --- /dev/null +++ b/.github/codecov.yml @@ -0,0 +1,28 @@ +coverage: + status: + project: + default: + # basic + target: auto + threshold: 10% + base: auto + flags: + - unittest + paths: + - "src" + # advanced settings + if_ci_failed: ignore + informational: true + patch: + default: + # basic + target: auto + threshold: 10% + base: auto + flags: + - unittest + paths: + - "src" + # advanced settings + if_ci_failed: ignore + informational: true diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 0000000..4c7bd84 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,29 @@ +name: Documentation + +on: + push: + branches: + - master # update to match your development branch (master, main, dev, trunk, ...) + tags: '*' + pull_request: + +jobs: + build: + permissions: + contents: write + pull-requests: read + statuses: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: 'nightly' + - uses: julia-actions/cache@v1 + - name: Install dependencies + run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + - name: Build and deploy + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # If authenticating with GitHub Actions token + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # If authenticating with SSH deploy key + run: julia --project=docs/ docs/make.jl diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml new file mode 100644 index 0000000..03206a0 --- /dev/null +++ b/.github/workflows/unittest.yml @@ -0,0 +1,45 @@ +name: CI + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + julia-version: + - 'nightly' + os: [ ubuntu-latest, windows-latest ] + arch: [ x64 ] + include: + - julia-version: nightly + julia-arch: x86 + os: ubuntu-latest + experimental: true + # - julia-version: 1 + # os: macOS-latest + # experimental: false + steps: + - name: Checkout Repository + uses: actions/checkout@v2 + - name: Setup Julia + uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.julia-version }} + - name: Run Tests + uses: julia-actions/julia-runtest@latest + - name: Create CodeCov + uses: julia-actions/julia-processcoverage@v1 + - name: Upload CodeCov + uses: codecov/codecov-action@v1 + with: + file: ./lcov.info + flags: unittests + name: codecov-umbrella + fail_ci_if_error: false + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/Project.toml b/Project.toml index fa110cb..c010b93 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,7 @@ XAMAuxData = "e99d641e-1821-45d7-9150-ecb7bf333fe1" [compat] Aqua = "0.8.7" FormatSpecimens = "1.3" -MemViews = "0.1" +MemViews = "0.2" PrecompileTools = "1.2.1" StringViews = "1.3.3" Test = "1.11" diff --git a/README.md b/README.md index 2ce5cda..6534219 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,13 @@ [![Latest Release](https://img.shields.io/github/release/BioJulia/PairwiseMappingFormat.jl.svg)](https://github.com/BioJulia/PairwiseMappingFormat.jl/releases/latest) [![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/PairwiseMappingFormat.jl/blob/master/LICENSE) -[![Documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/PairwiseMappingFormat.jl/stable) +[![Documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/PairwiseMappingFormat.jl/dev) +[![](https://codecov.io/gh/BioJulia/PairwiseMappingFormat.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/BioJulia/PairwiseMappingFormat.jl) PairwiseMappingFormat.jl provide a parser for Pairwise Mapping Format (PAF) files. PAF is a simple, tab-delimited format created by programs such as minimap2. -To learn how to use the package, [read the documentation](https://biojulia.github.io/PairwiseMappingFormat.jl/stable/) +To learn how to use the package, [read the documentation](https://biojulia.github.io/PairwiseMappingFormat.jl/dev/) ## Example ```julia @@ -29,4 +30,4 @@ add PairwiseMappingFormat ``` ## Contributing -Get in touch with the BioJulia community over at the [Julia Slack](https://julialang.org/slack/) or Zulip servers. +Make an issue or PR on this repository, or get in touch with the BioJulia community over at the [Julia Slack](https://julialang.org/slack/) or Zulip servers. diff --git a/docs/make.jl b/docs/make.jl index 443ba6e..292a3c9 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -8,22 +8,20 @@ meta = quote using PairwiseMappingFormat: try_parse end -DocMeta.setdocmeta!( - PairwiseMappingFormat, - :DocTestSetup, - meta, - recursive=true -) +DocMeta.setdocmeta!(PairwiseMappingFormat, :DocTestSetup, meta; recursive=true) -makedocs( - sitename = "PairwiseMappingFormat.jl", - modules = [PairwiseMappingFormat], - pages = [ - "Home" => "index.md", - ], - authors = "Jakob Nybo Nissen", - checkdocs = :public, - remotes=nothing, # TODO: Remove +makedocs(; + sitename="PairwiseMappingFormat.jl", + modules=[PairwiseMappingFormat], + pages=["Home" => "index.md"], + authors="Jakob Nybo Nissen", + checkdocs=:public, + remotes=nothing, ) -# TODO: Call deploydocs +deploydocs(; + repo="github.com/BioJulia/PairwiseMappingFormat.jl.git", + push_preview=true, + deps=nothing, + make=nothing, +) diff --git a/docs/src/index.md b/docs/src/index.md index 815df34..79ed10a 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -170,9 +170,7 @@ NaN ## Low-level interface Iterating `PAFReader`s, and the `parse` function will throw a `PairwiseMappingFormat.ParserException` if the data is invalid: ```jldoctest -parse(PAFRecord, "not a PAF line") - -# output +julia> parse(PAFRecord, "not a PAF line") ERROR: Error when parsing PAF record on line 1, near byte number 14 in line: Not enough tab-separated fields. Each line must have at least 12 fields [...] ``` @@ -186,13 +184,10 @@ It either returns a valid `PAFRecord`, or else returns the [`ParserException`](@ but does not throw the exception: ```jldoctest public -const PAF = PairwiseMappingFormat +julia> const PAF = PairwiseMappingFormat; -println(PAF.try_parse("not a PAF line")) - -# output +julia> println(PAF.try_parse("not a PAF line")) PairwiseMappingFormat.ParserException(PairwiseMappingFormat.Errors.TooFewFields, 14, 1) - ``` Similarly, the next record of a `PAFReader` may be obtained with the unexported [`try_next!`](@ref) @@ -234,11 +229,10 @@ The [`ParserException`](@ref) type contains the error type as an `Enum`, and the the exception occurred. These can be obtained with the `.kind` and `.line` properties. ```jldoctest public -println(err.line) -println(err.kind) - -# output +julia> err.line 1 + +julia> err.kind TooFewFields ``` diff --git a/src/PairwiseMappingFormat.jl b/src/PairwiseMappingFormat.jl index 7611c78..0ef272d 100644 --- a/src/PairwiseMappingFormat.jl +++ b/src/PairwiseMappingFormat.jl @@ -149,9 +149,9 @@ function Base.show(io::IO, ::MIME"text/plain", record::PAFRecord) buf = IOBuffer() is_mapped(record) || print(buf, "Unmapped ") println(buf, "PAFRecord:") - println(buf, " Query: ", qname(record)) + println(buf, " Query: \"", qname(record), '"') if is_mapped(record) - println(buf, " Target: ", tname(record)) + println(buf, " Target: \"", tname(record), '"') println(buf, " Q cov: ", round(query_coverage(record); digits=4)) println(buf, " T cov: ", round(target_coverage(record); digits=4)) println(buf, " Identity: ", round(aln_identity(record); digits=4)) @@ -165,7 +165,7 @@ function Base.show(io::IO, ::MIME"text/plain", record::PAFRecord) end # Print the AUX fields indented -function repr_aux(record::PAFRecord) +function repr_aux(record::PAFRecord)::Vector{UInt8} buf = IOBuffer() out = IOBuffer() show(buf, MIME"text/plain"(), aux_data(record)) @@ -220,10 +220,7 @@ end """ query_coverage(rec::Record) -> Float64 -Compute the approximate fraction of the query covered by the alignment. -This is computed as the alignment length divided by the query length, -and thus may be inaccurate if there are deletions in the alignment. -The result is guaranteed to be non-negative. +Compute the fraction of the query covered by the alignment. # Examples ```jldoctest @@ -240,10 +237,7 @@ end """ target_coverage(rec::Record) -> Float64 -Compute the approximate fraction of the target covered by the alignment. -This is computed as the alignment length divided by the target length, -and thus may be inaccurate if there are deletions in the alignment. -The result is guaranteed to be non-negative. +Compute the fraction of the target covered by the alignment. # Examples ```jldoctest @@ -450,9 +444,9 @@ function parse_line!( strand = if b == UInt8('*') return finish_unmapped!(record, mem, qname, qlen, i + 2) elseif b == UInt8('-') - 0x01 - elseif b == UInt8('+') 0x02 + elseif b == UInt8('+') + 0x01 else return ParserException(i % Int32, Errors.InvalidStrand) end @@ -484,11 +478,11 @@ function parse_line!( length(data) == filled || resize!(data, filled) doff = 1 dataview = MemView(data) - unsafe_copyto!(dataview, doff, mem, first(qname), length(qname)) + unsafe_copyto!(dataview, mem[qname]) doff += length(qname) - unsafe_copyto!(dataview, doff, mem, first(tname), length(tname)) + unsafe_copyto!(dataview[doff:end], mem[tname]) doff += length(tname) - iszero(auxlen) || unsafe_copyto!(dataview, doff, mem, i, auxlen) + iszero(auxlen) || unsafe_copyto!(dataview[doff:end], mem[i:end]) # Fill in fields of the struct @@ -529,7 +523,7 @@ function finish_unmapped!( i + 1 end end - i = findnext(==(0x00), mem, i) + i = findnext(==(UInt8('\t')), mem, i) aux_end = lastindex(mem) aux_start = isnothing(i) ? aux_end + 1 : i + 1 aux = aux_start:aux_end @@ -539,8 +533,8 @@ function finish_unmapped!( data = getfield(record, :data) dataview = MemView(data) length(data) == filled || resize!(data, filled) - unsafe_copyto!(dataview, 1, mem, first(qname), length(qname)) - unsafe_copyto!(dataview, length(qname) + 1, mem, aux_start, length(aux)) + unsafe_copyto!(dataview, mem[qname]) + unsafe_copyto!(dataview[length(qname)+1:end], mem[aux]) # Fill in fields record.qname_len = length(qname) % Int32 @@ -594,13 +588,15 @@ function parse_int( n < 0 && return ParserException(i % Int32, Errors.IntegerOverflow) n = Int32(10) * n + (b - Int32(48)) end + # Note: It is not possible to get an InvalidZero error here, because + # the last field (mapq) does allow zero, and for any other fields, + # if they do not end with a \t and thus reach this line, a TooFewFields + # error will be returned. # at_end is if this is the mapq field, which does not need to end with a tab if !at_end ParserException(lastindex(v) % Int32, Errors.TooFewFields) elseif from > lastindex(v) ParserException(i % Int32, Errors.EmptyInteger) - elseif !allow_zero & iszero(n) - ParserException(i % Int32, Errors.InvalidZero) else (n, i + 1) end @@ -660,9 +656,8 @@ mutable struct PAFReader{I <: IO} copy::Bool end -function PAFReader(io::IO; copy::Bool=true) - # 2^16 and 512 are reasonable buffer sizes, they are arbitrary - mem = Memory{UInt8}(undef, 2^16) +function PAFReader(io::IO; buf_size::Int=2^16, copy::Bool=true) + mem = Memory{UInt8}(undef, max(buf_size, 16)) rec = PAFRecord(512) PAFReader{typeof(io)}(io, rec, mem, 1, 0, 1, copy) end @@ -671,8 +666,8 @@ Base.IteratorSize(::Type{<:PAFReader}) = Base.SizeUnknown() Base.eltype(::Type{<:PAFReader}) = PAFRecord Base.close(reader::PAFReader) = close(reader.io) # TODO: Docstring -function PAFReader(f::Function, io::IO; copy::Bool=true) - reader = PAFReader(io; copy) +function PAFReader(f::Function, io::IO; kwargs...) + reader = PAFReader(io; kwargs...) try f(reader) finally diff --git a/test/runtests.jl b/test/runtests.jl index 2938d36..51b6b1e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -42,6 +42,12 @@ end @test rec1.matches == 301142 @test rec1.alnlen == 301142 @test rec1.mapq == 0 + + @test_throws Exception rec1.foobar + @test_throws Exception rec1.strand + @test_throws Exception rec1.data + @test_throws Exception rec1.qname_len + @test_throws Exception rec1.tname_len end @testset "Aux data" begin @@ -64,7 +70,7 @@ end cmp_aux(rec3, ["dv" => +17f21, "rl" => 38]) # Empty record - rec = PAFReader(only, open(joinpath(path_of_format("PAF"), "good3.paf"))) + rec = PAFReader(first, open(joinpath(path_of_format("PAF"), "good3.paf"))) @test isnothing(rec.tname) @test isnothing(rec.is_rc) @test !is_mapped(rec) @@ -77,6 +83,9 @@ end "dv" => Float32(0.0117), "rl" => 0, ) + + # Unmapped record with too few fieds + @test try_parse("query\t123\t1\t100\t*\t*\t5\t19").kind == Errors.TooFewFields end function with_replaced(s::String, field::Integer, new::String) @@ -87,7 +96,7 @@ function with_replaced(s::String, field::Integer, new::String) end @testset "Errors" begin - good = "my_qname 301156 3 301145 + my_tname 6701780 2764860 3066002 301142 301142 0" + good = "my_qname\t301156\t3\t301145\t+\tmy_tname\t6701780\t2764860\t3066002\t301142\t301142\t0" @test with_replaced(good, 1, "my other name") isa PAFRecord # Negative numbers, zero numbers @@ -113,7 +122,7 @@ end @test with_replaced(good, 4, "310000") == Errors.PositionOutOfBounds @test with_replaced(good, 7, "3060002") == Errors.PositionOutOfBounds @test with_replaced(good, 9, "6711780") == Errors.PositionOutOfBounds - + # Char not valid @test with_replaced(good, 5, "/") == Errors.InvalidStrand @@ -133,6 +142,31 @@ end @test with_replaced(good, 12, "256") == Errors.IntegerOverflow end +@testset "Bad reader" begin + # Iterating a reader with bad data will throw + good = "my_qname\t301156\t3\t301145\t+\tmy_tname\t6701780\t2764860\t3066002\t301142\t301142\t0" + + @test only(PAFReader(collect, IOBuffer(good))) isa PAFRecord + bad = good[1:end-2] + @test_throws PairwiseMappingFormat.ParserException only(PAFReader(collect, IOBuffer(bad))) +end + +@testset "Reader buffer size" begin + # Large data where buffer needs to be shifted + good = "my_qname\t301156\t3\t301145\t+\tmy_tname\t6701780\t2764860\t3066002\t301142\t301142\t0" + large = join([good for _ in 1:1000], '\n') + PAFReader(IOBuffer(large); buf_size=16) do reader + (allgood, n) = foldl(reader; init=(true, 0)) do (isgood, n), record + isgood &= record.qlen == 301156 + isgood &= record.mapq == 0 + isgood &= record.tstart == 2764861 + (isgood, n + 1) + end + @test n == 1000 + @test allgood + end +end + # We assume that aux data is correctly tested in its own package, here we just test # that the aux data has the right span and byte content. function test_aux_content(record_data::String, target_aux::String)