Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions rust/lance-index/benches/inverted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ fn bench_inverted(c: &mut Criterion) {
InvertedIndexBuilder::new(InvertedIndexParams::default().with_position(false));
black_box({
builder
.update(stream, indexing_store.as_ref())
.update(stream, indexing_store.as_ref(), None)
.await
.unwrap();
builder
Expand All @@ -119,7 +119,7 @@ fn bench_inverted(c: &mut Criterion) {
InvertedIndexBuilder::new(InvertedIndexParams::default().with_position(true));
black_box({
builder
.update(stream, indexing_with_positions_store.as_ref())
.update(stream, indexing_with_positions_store.as_ref(), None)
.await
.unwrap();
builder
Expand All @@ -135,7 +135,7 @@ fn bench_inverted(c: &mut Criterion) {
let mut builder =
InvertedIndexBuilder::new(InvertedIndexParams::default().with_position(true));
builder
.update(stream, phrase_search_store.as_ref())
.update(stream, phrase_search_store.as_ref(), None)
.await
.unwrap();
});
Expand Down
11 changes: 7 additions & 4 deletions rust/lance-index/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -836,10 +836,13 @@ pub struct UpdateCriteria {
/// - stable row IDs: use exact row-id membership instead
#[derive(Debug, Clone)]
pub enum OldIndexDataFilter {
/// Keep old rows whose row-address fragment is in this bitmap.
/// Keeps track of which fragments are still valid and which are no longer valid.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this fix work for tables with stable row IDs?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably not but I'm a little vague on how indices work with stable row ids in general. Do we store the stable row id in the index? Or are we still storing row addresses?

Can we just filter search results against the list of valid row ids? If a dataset has an ordered list of valid row ids in memory then it should just be one O(N) pass against the index search results?

I'll add this as a follow-up task.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

///
/// This is valid for address-style row IDs.
Fragments(RoaringBitmap),
Fragments {
to_keep: RoaringBitmap,
to_remove: RoaringBitmap,
},
/// Keep old rows whose row IDs are in this exact allow-list.
///
/// This is required for stable row IDs, where row IDs are opaque and
Expand All @@ -851,9 +854,9 @@ impl OldIndexDataFilter {
/// Build a boolean mask that keeps only row IDs selected by this filter.
pub fn filter_row_ids(&self, row_ids: &UInt64Array) -> BooleanArray {
match self {
Self::Fragments(valid_fragments) => row_ids
Self::Fragments { to_keep, .. } => row_ids
.iter()
.map(|id| id.map(|id| valid_fragments.contains((id >> 32) as u32)))
.map(|id| id.map(|id| to_keep.contains((id >> 32) as u32)))
.collect(),
Self::RowIds(valid_row_ids) => row_ids
.iter()
Expand Down
2 changes: 1 addition & 1 deletion rust/lance-index/src/scalar/inverted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ impl InvertedIndexPlugin {
let mut inverted_index =
InvertedIndexBuilder::new_with_fragment_mask(params, fragment_mask)
.with_progress(progress);
inverted_index.update(data, index_store).await?;
inverted_index.update(data, index_store, None).await?;
Ok(CreatedIndex {
index_details: prost_types::Any::from_msg(&details).unwrap(),
index_version: current_fts_format_version().index_version(),
Expand Down
Loading
Loading