Skip to content

Commit

Permalink
[CHORE] clean up cache, distance, index, storage clippy warnings (#2860)
Browse files Browse the repository at this point in the history
## Description of changes

*Summarize the changes made by this PR.*
 - clean up cache, distance, index, storage clippy warnings

## Test plan
*How are these changes tested?*
- cargo clippy locally

- [ ] Tests pass locally with `pytest` for python, `yarn test` for js,
`cargo test` for rust

## Documentation Changes
*Are all docstrings for user-facing APIs updated if required? Do we need
to make documentation changes in the [docs
repository](https://github.com/chroma-core/docs)?*
  • Loading branch information
rescrv authored Sep 26, 2024
1 parent bed6a22 commit 38ba3be
Show file tree
Hide file tree
Showing 14 changed files with 323 additions and 469 deletions.
7 changes: 2 additions & 5 deletions rust/cache/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,7 @@ where
pub fn get(&self, key: &K) -> Option<V> {
let read_guard = self.cache.read();
let value = read_guard.get(key);
match value {
Some(v) => Some(v.clone()),
None => None,
}
value.cloned()
}

pub fn pop(&self) -> Option<(K, V)> {
Expand Down Expand Up @@ -261,6 +258,6 @@ impl ChromaError for CacheConfigError {
pub trait Cacheable {
// By default the weight of a type that is cacheable is 1.
fn weight(&self) -> usize {
return 1;
1
}
}
1 change: 1 addition & 0 deletions rust/distance/src/distance_neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ copyright Qdrant, licensed under the Apache 2.0 license.
See the License for the specific language governing permissions and
limitations under the License.
*/
#![allow(clippy::missing_safety_doc)]

#[cfg(target_feature = "neon")]
use std::arch::aarch64::*;
Expand Down
6 changes: 3 additions & 3 deletions rust/distance/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,9 @@ impl TryFrom<&str> for DistanceFunction {
}
}

impl Into<String> for DistanceFunction {
fn into(self) -> String {
match self {
impl From<DistanceFunction> for String {
fn from(df: DistanceFunction) -> Self {
match df {
DistanceFunction::Euclidean => "l2".to_string(),
DistanceFunction::Cosine => "cosine".to_string(),
DistanceFunction::InnerProduct => "ip".to_string(),
Expand Down
3 changes: 2 additions & 1 deletion rust/index/src/fulltext/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ impl ChromaTokenizer for TantivyChromaTokenizer {
}
}

#[cfg(test)]
mod test {
use super::*;
use super::{ChromaTokenizer, NgramTokenizer, TantivyChromaTokenizer};

#[test]
fn test_get_tokens() {
Expand Down
95 changes: 39 additions & 56 deletions rust/index/src/fulltext/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::utils::{merge_sorted_vecs_conjunction, merge_sorted_vecs_disjunction}
use chroma_blockstore::{BlockfileFlusher, BlockfileReader, BlockfileWriter};
use chroma_error::{ChromaError, ErrorCodes};
use chroma_types::{BooleanOperator, WhereDocument, WhereDocumentOperator};
use parking_lot::Mutex;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use tantivy::tokenizer::Token;
Expand All @@ -29,7 +29,7 @@ impl ChromaError for FullTextIndexError {
}
}

#[derive(Debug)]
#[derive(Debug, Default)]
pub struct UncommittedPostings {
// token -> {doc -> [start positions]}
positional_postings: HashMap<String, HashMap<u32, Vec<u32>>>,
Expand All @@ -39,10 +39,7 @@ pub struct UncommittedPostings {

impl UncommittedPostings {
pub fn new() -> Self {
Self {
positional_postings: HashMap::new(),
deleted_token_doc_pairs: HashSet::new(),
}
Self::default()
}
}

Expand Down Expand Up @@ -101,13 +98,8 @@ impl<'me> FullTextIndexWriter<'me> {
// Readers are uninitialized until the first compaction finishes
// so there is a case when this is none hence not an error.
None => 0,
Some(reader) => {
match reader.get_frequencies_for_token(token.text.as_str()).await {
Ok(frequency) => frequency,
// New token so start with frequency of 0.
Err(_) => 0,
}
}
Some(reader) => (reader.get_frequencies_for_token(token.text.as_str()).await)
.unwrap_or_default(),
};
uncommitted_frequencies
.insert(token.text.clone(), (frequency as i32, frequency as i32));
Expand All @@ -127,11 +119,9 @@ impl<'me> FullTextIndexWriter<'me> {
// Readers are uninitialized until the first compaction finishes
// so there is a case when this is none hence not an error.
None => vec![],
Some(reader) => match reader.get_all_results_for_token(&token.text).await {
Ok(results) => results,
// New token so start with empty postings list.
Err(_) => vec![],
},
Some(reader) => {
(reader.get_all_results_for_token(&token.text).await).unwrap_or_default()
}
};
let mut doc_and_positions = HashMap::new();
for result in results {
Expand Down Expand Up @@ -165,7 +155,7 @@ impl<'me> FullTextIndexWriter<'me> {
// will have created it if this token is new to the system.
uncommitted_frequencies
.entry(token.text.clone())
.and_modify(|e| (*e).0 += 1);
.and_modify(|e| e.0 += 1);

// For a new token, the uncommitted list will not contain any entry so insert
// an empty builder in that case.
Expand All @@ -179,15 +169,13 @@ impl<'me> FullTextIndexWriter<'me> {
// check full string match.
//
// See https://docs.rs/tantivy/latest/tantivy/tokenizer/struct.Token.html
if !builder.contains_key(&offset_id) {
// Casting to i32 is safe since we limit the size of the document.
builder.insert(offset_id, vec![token.offset_from as u32]);
} else {
// unwrap() is safe since we already verified that the key exists.
builder
.get_mut(&offset_id)
.unwrap()
.push(token.offset_from as u32);
match builder.entry(offset_id) {
Entry::Vacant(v) => {
v.insert(vec![token.offset_from as u32]);
}
Entry::Occupied(mut o) => {
o.get_mut().push(token.offset_from as u32);
}
}
}
Ok(())
Expand All @@ -209,36 +197,33 @@ impl<'me> FullTextIndexWriter<'me> {
for token in tokens {
match uncommitted_frequencies.get_mut(token.text.as_str()) {
Some(frequency) => {
(*frequency).0 -= 1;
frequency.0 -= 1;
}
None => {
// Invariant violation -- we just populated this.
tracing::error!("Error decrementing frequency for token: {:?}", token.text);
return Err(FullTextIndexError::InvariantViolation);
}
}
match uncommitted_postings
if let Some(builder) = uncommitted_postings
.positional_postings
.get_mut(token.text.as_str())
{
Some(builder) => {
builder.remove(&offset_id);
if builder.is_empty() {
uncommitted_postings
.positional_postings
.remove(token.text.as_str());
}
// Track all the deleted (token, doc) pairs. This is needed
// to remove the old postings list for this pair from storage.
builder.remove(&offset_id);
if builder.is_empty() {
uncommitted_postings
.deleted_token_doc_pairs
.insert((token.text.clone(), offset_id as i32));
.positional_postings
.remove(token.text.as_str());
}
// This is fine since we delete all the positions of a token
// of a document at once so the next time we encounter this token
// (at a different position) the map could be empty.
None => {}
// Track all the deleted (token, doc) pairs. This is needed
// to remove the old postings list for this pair from storage.
uncommitted_postings
.deleted_token_doc_pairs
.insert((token.text.clone(), offset_id as i32));
}
// This is fine since we delete all the positions of a token
// of a document at once so the next time we encounter this token
// (at a different position) the map could be empty.
}
Ok(())
}
Expand Down Expand Up @@ -276,7 +261,7 @@ impl<'me> FullTextIndexWriter<'me> {
for (doc_id, positions) in value.drain() {
// Don't add if postings list is empty for this (token, doc) combo.
// This can happen with deletes.
if positions.len() > 0 {
if !positions.is_empty() {
match self
.posting_lists_blockfile_writer
.set(key.as_str(), doc_id, positions)
Expand Down Expand Up @@ -415,7 +400,7 @@ impl<'me> FullTextIndexReader<'me> {
.frequencies_blockfile_reader
.get_by_prefix(token.text.as_str())
.await?;
if res.len() == 0 {
if res.is_empty() {
return Ok(vec![]);
}
if res.len() > 1 {
Expand All @@ -429,7 +414,7 @@ impl<'me> FullTextIndexReader<'me> {
token_frequencies.push((token.text.to_string(), res.1));
}

if token_frequencies.len() == 0 {
if token_frequencies.is_empty() {
return Ok(vec![]);
}
// TODO sort by frequency. This adds an additional layer of complexity
Expand Down Expand Up @@ -488,7 +473,7 @@ impl<'me> FullTextIndexReader<'me> {
}
}
if !new_positions.is_empty() {
new_candidates.insert((*doc_id) as u32, new_positions);
new_candidates.insert(*doc_id, new_positions);
}
}
if new_candidates.is_empty() {
Expand All @@ -501,7 +486,7 @@ impl<'me> FullTextIndexReader<'me> {
for (doc_id, _) in candidates.drain() {
results.push(doc_id as i32);
}
return Ok(results);
Ok(results)
}

// We use this to implement deletes in the Writer. A delete() is implemented
Expand Down Expand Up @@ -529,7 +514,7 @@ impl<'me> FullTextIndexReader<'me> {
.frequencies_blockfile_reader
.get_by_prefix(token)
.await?;
if res.len() == 0 {
if res.is_empty() {
return Ok(0);
}
if res.len() > 1 {
Expand Down Expand Up @@ -565,10 +550,8 @@ pub fn process_where_document_clause_with_callback<
let mut first_iteration = true;
for child in where_document_children.children.iter() {
let child_results: Vec<usize> =
match process_where_document_clause_with_callback(&child, callback) {
Ok(result) => result,
Err(_) => vec![],
};
process_where_document_clause_with_callback(child, callback)
.unwrap_or_default();
if first_iteration {
results = child_results;
first_iteration = false;
Expand All @@ -586,7 +569,7 @@ pub fn process_where_document_clause_with_callback<
}
}
results.sort();
return Ok(results);
Ok(results)
}

#[cfg(test)]
Expand Down
Loading

0 comments on commit 38ba3be

Please sign in to comment.