Skip to content

Commit

Permalink
wip: movemask_will_have_non_zero
Browse files Browse the repository at this point in the history
  • Loading branch information
jprochazk committed Jun 24, 2024
1 parent 089529b commit a49590a
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 3 deletions.
4 changes: 3 additions & 1 deletion src/irc/tags/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,9 @@ fn find_first(data: &[u8], byte: u8) -> Option<usize> {
let chunk_3 = V::load_aligned(data, offset + V::SIZE * 3).eq(byte);

// early exit if we know that all chunks will have a zero mask
if V::SUPPORTS_MOVEMASK_WILL_HAVE_NON_ZERO && !chunk_0.movemask_will_have_non_zero() {
if V::SUPPORTS_MOVEMASK_WILL_HAVE_NON_ZERO
&& !(chunk_0 | chunk_1 | chunk_2 | chunk_3).movemask_will_have_non_zero()
{
offset += V::SIZE;
continue;
}
Expand Down
23 changes: 21 additions & 2 deletions src/irc/wide/aarch64/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
// To obtain the position of the charater, divide its trailing zeros by 4.

use core::arch::aarch64::{
uint8x16_t, vceqq_u8, vget_lane_u64, vld1q_u8, vreinterpret_u64_u8, vreinterpretq_u16_u8,
vshrn_n_u16,
uint8x16_t, vceqq_u8, vget_lane_u64, vgetq_lane_u64, vld1q_u8, vorrq_u8, vpmaxq_u8,
vreinterpret_u64_u8, vreinterpretq_u16_u8, vreinterpretq_u64_u8, vshrn_n_u16,
};

// NOTE: neon has no alignment requirements for loads,
Expand Down Expand Up @@ -104,6 +104,25 @@ impl Vector {
Mask(matches)
}
}

pub const SUPPORTS_MOVEMASK_WILL_HAVE_NON_ZERO: bool = true;

#[inline(always)]
pub fn movemask_will_have_non_zero(self) -> bool {
unsafe {
let low = vreinterpretq_u64_u8(vpmaxq_u8(self.0, self.0));
vgetq_lane_u64(low, 0) != 0
}
}
}

impl std::ops::BitOr for Vector {
type Output = Self;

#[inline(always)]
fn bitor(self, rhs: Self) -> Self {
Self(unsafe { vorrq_u8(self.0, rhs.0) })
}
}

#[derive(Clone, Copy)]
Expand Down

0 comments on commit a49590a

Please sign in to comment.