[CHORE] clean up cache, distance, index, storage clippy warnings (#2860)

## Description of changes *Summarize the changes made by this PR.* - clean up cache, distance, index, storage clippy warnings ## Test plan *How are these changes tested?* - cargo clippy locally - [ ] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Documentation Changes *Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs repository](https://github.com/chroma-core/docs)?*
chroma-core · Sep 26, 2024 · 38ba3be · 38ba3be
1 parent bed6a22
commit 38ba3be
Show file tree

Hide file tree

Showing 14 changed files with 323 additions and 469 deletions.
diff --git a/rust/cache/src/cache.rs b/rust/cache/src/cache.rs
@@ -125,10 +125,7 @@ where
     pub fn get(&self, key: &K) -> Option<V> {
         let read_guard = self.cache.read();
         let value = read_guard.get(key);
-        match value {
-            Some(v) => Some(v.clone()),
-            None => None,
-        }
+        value.cloned()
     }
 
     pub fn pop(&self) -> Option<(K, V)> {
@@ -261,6 +258,6 @@ impl ChromaError for CacheConfigError {
 pub trait Cacheable {
     // By default the weight of a type that is cacheable is 1.
     fn weight(&self) -> usize {
-        return 1;
+        1
     }
 }
diff --git a/rust/distance/src/distance_neon.rs b/rust/distance/src/distance_neon.rs
@@ -199,6 +199,7 @@ copyright Qdrant, licensed under the Apache 2.0 license.
    See the License for the specific language governing permissions and
    limitations under the License.
 */
+#![allow(clippy::missing_safety_doc)]
 
 #[cfg(target_feature = "neon")]
 use std::arch::aarch64::*;

diff --git a/rust/distance/src/types.rs b/rust/distance/src/types.rs
@@ -359,9 +359,9 @@ impl TryFrom<&str> for DistanceFunction {
     }
 }
 
-impl Into<String> for DistanceFunction {
-    fn into(self) -> String {
-        match self {
+impl From<DistanceFunction> for String {
+    fn from(df: DistanceFunction) -> Self {
+        match df {
             DistanceFunction::Euclidean => "l2".to_string(),
             DistanceFunction::Cosine => "cosine".to_string(),
             DistanceFunction::InnerProduct => "ip".to_string(),

diff --git a/rust/index/src/fulltext/tokenizer.rs b/rust/index/src/fulltext/tokenizer.rs
@@ -46,8 +46,9 @@ impl ChromaTokenizer for TantivyChromaTokenizer {
     }
 }
 
+#[cfg(test)]
 mod test {
-    use super::*;
+    use super::{ChromaTokenizer, NgramTokenizer, TantivyChromaTokenizer};
 
     #[test]
     fn test_get_tokens() {

diff --git a/rust/index/src/fulltext/types.rs b/rust/index/src/fulltext/types.rs
@@ -4,7 +4,7 @@ use crate::utils::{merge_sorted_vecs_conjunction, merge_sorted_vecs_disjunction}
 use chroma_blockstore::{BlockfileFlusher, BlockfileReader, BlockfileWriter};
 use chroma_error::{ChromaError, ErrorCodes};
 use chroma_types::{BooleanOperator, WhereDocument, WhereDocumentOperator};
-use parking_lot::Mutex;
+use std::collections::hash_map::Entry;
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
 use tantivy::tokenizer::Token;
@@ -29,7 +29,7 @@ impl ChromaError for FullTextIndexError {
     }
 }
 
-#[derive(Debug)]
+#[derive(Debug, Default)]
 pub struct UncommittedPostings {
     // token -> {doc -> [start positions]}
     positional_postings: HashMap<String, HashMap<u32, Vec<u32>>>,
@@ -39,10 +39,7 @@ pub struct UncommittedPostings {
 
 impl UncommittedPostings {
     pub fn new() -> Self {
-        Self {
-            positional_postings: HashMap::new(),
-            deleted_token_doc_pairs: HashSet::new(),
-        }
+        Self::default()
     }
 }
 
@@ -101,13 +98,8 @@ impl<'me> FullTextIndexWriter<'me> {
                     // Readers are uninitialized until the first compaction finishes
                     // so there is a case when this is none hence not an error.
                     None => 0,
-                    Some(reader) => {
-                        match reader.get_frequencies_for_token(token.text.as_str()).await {
-                            Ok(frequency) => frequency,
-                            // New token so start with frequency of 0.
-                            Err(_) => 0,
-                        }
-                    }
+                    Some(reader) => (reader.get_frequencies_for_token(token.text.as_str()).await)
+                        .unwrap_or_default(),
                 };
                 uncommitted_frequencies
                     .insert(token.text.clone(), (frequency as i32, frequency as i32));
@@ -127,11 +119,9 @@ impl<'me> FullTextIndexWriter<'me> {
                 // Readers are uninitialized until the first compaction finishes
                 // so there is a case when this is none hence not an error.
                 None => vec![],
-                Some(reader) => match reader.get_all_results_for_token(&token.text).await {
-                    Ok(results) => results,
-                    // New token so start with empty postings list.
-                    Err(_) => vec![],
-                },
+                Some(reader) => {
+                    (reader.get_all_results_for_token(&token.text).await).unwrap_or_default()
+                }
             };
             let mut doc_and_positions = HashMap::new();
             for result in results {
@@ -165,7 +155,7 @@ impl<'me> FullTextIndexWriter<'me> {
             // will have created it if this token is new to the system.
             uncommitted_frequencies
                 .entry(token.text.clone())
-                .and_modify(|e| (*e).0 += 1);
+                .and_modify(|e| e.0 += 1);
 
             // For a new token, the uncommitted list will not contain any entry so insert
             // an empty builder in that case.
@@ -179,15 +169,13 @@ impl<'me> FullTextIndexWriter<'me> {
             // check full string match.
             //
             // See https://docs.rs/tantivy/latest/tantivy/tokenizer/struct.Token.html
-            if !builder.contains_key(&offset_id) {
-                // Casting to i32 is safe since we limit the size of the document.
-                builder.insert(offset_id, vec![token.offset_from as u32]);
-            } else {
-                // unwrap() is safe since we already verified that the key exists.
-                builder
-                    .get_mut(&offset_id)
-                    .unwrap()
-                    .push(token.offset_from as u32);
+            match builder.entry(offset_id) {
+                Entry::Vacant(v) => {
+                    v.insert(vec![token.offset_from as u32]);
+                }
+                Entry::Occupied(mut o) => {
+                    o.get_mut().push(token.offset_from as u32);
+                }
             }
         }
         Ok(())
@@ -209,36 +197,33 @@ impl<'me> FullTextIndexWriter<'me> {
         for token in tokens {
             match uncommitted_frequencies.get_mut(token.text.as_str()) {
                 Some(frequency) => {
-                    (*frequency).0 -= 1;
+                    frequency.0 -= 1;
                 }
                 None => {
                     // Invariant violation -- we just populated this.
                     tracing::error!("Error decrementing frequency for token: {:?}", token.text);
                     return Err(FullTextIndexError::InvariantViolation);
                 }
             }
-            match uncommitted_postings
+            if let Some(builder) = uncommitted_postings
                 .positional_postings
                 .get_mut(token.text.as_str())
             {
-                Some(builder) => {
-                    builder.remove(&offset_id);
-                    if builder.is_empty() {
-                        uncommitted_postings
-                            .positional_postings
-                            .remove(token.text.as_str());
-                    }
-                    // Track all the deleted (token, doc) pairs. This is needed
-                    // to remove the old postings list for this pair from storage.
+                builder.remove(&offset_id);
+                if builder.is_empty() {
                     uncommitted_postings
-                        .deleted_token_doc_pairs
-                        .insert((token.text.clone(), offset_id as i32));
+                        .positional_postings
+                        .remove(token.text.as_str());
                 }
-                // This is fine since we delete all the positions of a token
-                // of a document at once so the next time we encounter this token
-                // (at a different position) the map could be empty.
-                None => {}
+                // Track all the deleted (token, doc) pairs. This is needed
+                // to remove the old postings list for this pair from storage.
+                uncommitted_postings
+                    .deleted_token_doc_pairs
+                    .insert((token.text.clone(), offset_id as i32));
             }
+            // This is fine since we delete all the positions of a token
+            // of a document at once so the next time we encounter this token
+            // (at a different position) the map could be empty.
         }
         Ok(())
     }
@@ -276,7 +261,7 @@ impl<'me> FullTextIndexWriter<'me> {
             for (doc_id, positions) in value.drain() {
                 // Don't add if postings list is empty for this (token, doc) combo.
                 // This can happen with deletes.
-                if positions.len() > 0 {
+                if !positions.is_empty() {
                     match self
                         .posting_lists_blockfile_writer
                         .set(key.as_str(), doc_id, positions)
@@ -415,7 +400,7 @@ impl<'me> FullTextIndexReader<'me> {
                 .frequencies_blockfile_reader
                 .get_by_prefix(token.text.as_str())
                 .await?;
-            if res.len() == 0 {
+            if res.is_empty() {
                 return Ok(vec![]);
             }
             if res.len() > 1 {
@@ -429,7 +414,7 @@ impl<'me> FullTextIndexReader<'me> {
             token_frequencies.push((token.text.to_string(), res.1));
         }
 
-        if token_frequencies.len() == 0 {
+        if token_frequencies.is_empty() {
             return Ok(vec![]);
         }
         // TODO sort by frequency. This adds an additional layer of complexity
@@ -488,7 +473,7 @@ impl<'me> FullTextIndexReader<'me> {
                     }
                 }
                 if !new_positions.is_empty() {
-                    new_candidates.insert((*doc_id) as u32, new_positions);
+                    new_candidates.insert(*doc_id, new_positions);
                 }
             }
             if new_candidates.is_empty() {
@@ -501,7 +486,7 @@ impl<'me> FullTextIndexReader<'me> {
         for (doc_id, _) in candidates.drain() {
             results.push(doc_id as i32);
         }
-        return Ok(results);
+        Ok(results)
     }
 
     // We use this to implement deletes in the Writer. A delete() is implemented
@@ -529,7 +514,7 @@ impl<'me> FullTextIndexReader<'me> {
             .frequencies_blockfile_reader
             .get_by_prefix(token)
             .await?;
-        if res.len() == 0 {
+        if res.is_empty() {
             return Ok(0);
         }
         if res.len() > 1 {
@@ -565,10 +550,8 @@ pub fn process_where_document_clause_with_callback<
             let mut first_iteration = true;
             for child in where_document_children.children.iter() {
                 let child_results: Vec<usize> =
-                    match process_where_document_clause_with_callback(&child, callback) {
-                        Ok(result) => result,
-                        Err(_) => vec![],
-                    };
+                    process_where_document_clause_with_callback(child, callback)
+                        .unwrap_or_default();
                 if first_iteration {
                     results = child_results;
                     first_iteration = false;
@@ -586,7 +569,7 @@ pub fn process_where_document_clause_with_callback<
         }
     }
     results.sort();
-    return Ok(results);
+    Ok(results)
 }
 
 #[cfg(test)]