Skip to content

Commit

Permalink
Remove acceptDocs argument from DocIdSetIterator#intoBitSet and i…
Browse files Browse the repository at this point in the history
…ntroduce `Bits#applyMask`. (#14134)

Most `DocIdSetIterator` implementations can no longer implement `#intoBitSet`
efficiently as soon as there are live docs. So this commit remove this argument
and instead introduces a new `Bits#applyMask` API that helps clear bits in a
bit set when the corresponding doc ID is not live.

Relates #14133
  • Loading branch information
jpountz authored Jan 14, 2025
1 parent c20e09e commit 5851f44
Show file tree
Hide file tree
Showing 13 changed files with 299 additions and 137 deletions.
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ API Changes
* GITHUB#14069: Added DocIdSetIterator#intoBitSet API to let implementations
optimize loading doc IDs into a bit set. (Adrien Grand)

* GITHUB#14134: Added Bits#applyMask API to help apply live docs as a mask on a
bit set of matches. (Adrien Grand)

New Features
---------------------
(No changes)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
Expand Down Expand Up @@ -878,16 +877,13 @@ public int advance(int target) throws IOException {
}

@Override
public void intoBitSet(Bits acceptDocs, int upTo, FixedBitSet bitSet, int offset)
throws IOException {
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
if (doc >= upTo) {
return;
}

// Handle the current doc separately, it may be on the previous docBuffer.
if (acceptDocs == null || acceptDocs.get(doc)) {
bitSet.set(doc - offset);
}
bitSet.set(doc - offset);

for (; ; ) {
if (docBufferUpto == BLOCK_SIZE) {
Expand All @@ -898,7 +894,7 @@ public void intoBitSet(Bits acceptDocs, int upTo, FixedBitSet bitSet, int offset
int start = docBufferUpto;
int end = computeBufferEndBoundary(upTo);
if (end != 0) {
bufferIntoBitSet(start, end, acceptDocs, bitSet, offset);
bufferIntoBitSet(start, end, bitSet, offset);
doc = docBuffer[end - 1];
}
docBufferUpto = end;
Expand All @@ -922,15 +918,12 @@ private int computeBufferEndBoundary(int upTo) {
}
}

private void bufferIntoBitSet(
int start, int end, Bits acceptDocs, FixedBitSet bitSet, int offset) throws IOException {
// acceptDocs#get (if backed by FixedBitSet), bitSet#set and `doc - offset` get
// auto-vectorized
private void bufferIntoBitSet(int start, int end, FixedBitSet bitSet, int offset)
throws IOException {
// bitSet#set and `doc - offset` get auto-vectorized
for (int i = start; i < end; ++i) {
int doc = docBuffer[i];
if (acceptDocs == null || acceptDocs.get(doc)) {
bitSet.set(doc - offset);
}
bitSet.set(doc - offset);
}
}

Expand Down
61 changes: 29 additions & 32 deletions lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Original file line number Diff line number Diff line change
Expand Up @@ -164,37 +164,6 @@ public long cost() {
return cost;
}

private void scoreDisiWrapperIntoBitSet(DisiWrapper w, Bits acceptDocs, int min, int max)
throws IOException {
boolean needsScores = BooleanScorer.this.needsScores;
FixedBitSet matching = BooleanScorer.this.matching;
Bucket[] buckets = BooleanScorer.this.buckets;

DocIdSetIterator it = w.iterator;
Scorable scorer = w.scorable;
int doc = w.doc;
if (doc < min) {
doc = it.advance(min);
}
if (buckets == null) {
it.intoBitSet(acceptDocs, max, matching, doc & ~MASK);
} else {
for (; doc < max; doc = it.nextDoc()) {
if (acceptDocs == null || acceptDocs.get(doc)) {
final int i = doc & MASK;
matching.set(i);
final Bucket bucket = buckets[i];
bucket.freq++;
if (needsScores) {
bucket.score += scorer.score();
}
}
}
}

w.doc = it.docID();
}

private void scoreWindowIntoBitSetAndReplay(
LeafCollector collector,
Bits acceptDocs,
Expand All @@ -207,7 +176,35 @@ private void scoreWindowIntoBitSetAndReplay(
for (int i = 0; i < numScorers; ++i) {
final DisiWrapper w = scorers[i];
assert w.doc < max;
scoreDisiWrapperIntoBitSet(w, acceptDocs, min, max);

DocIdSetIterator it = w.iterator;
int doc = w.doc;
if (doc < min) {
doc = it.advance(min);
}
if (buckets == null) {
// This doesn't apply live docs, so we'll need to apply them later
it.intoBitSet(max, matching, base);
} else {
for (; doc < max; doc = it.nextDoc()) {
if (acceptDocs == null || acceptDocs.get(doc)) {
final int d = doc & MASK;
matching.set(d);
final Bucket bucket = buckets[d];
bucket.freq++;
if (needsScores) {
bucket.score += w.scorable.score();
}
}
}
}

w.doc = it.docID();
}

if (buckets == null && acceptDocs != null) {
// In this case, live docs have not been applied yet.
acceptDocs.applyMask(matching, base);
}

docIdStreamView.base = base;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,11 @@ private void scoreWindowUsingBitSet(
assert clauseWindowMatches.scanIsEmpty();

int offset = lead.docID();
lead.intoBitSet(acceptDocs, max, windowMatches, offset);
lead.intoBitSet(max, windowMatches, offset);
if (acceptDocs != null) {
// Apply live docs.
acceptDocs.applyMask(windowMatches, offset);
}

int upTo = 0;
for (;
Expand All @@ -116,9 +120,7 @@ private void scoreWindowUsingBitSet(
if (other.docID() < offset) {
other.advance(offset);
}
// No need to apply acceptDocs on other clauses since we already applied live docs on the
// leading clause.
other.intoBitSet(null, max, clauseWindowMatches, offset);
other.intoBitSet(max, clauseWindowMatches, offset);
windowMatches.and(clauseWindowMatches);
clauseWindowMatches.clear();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import java.util.Collection;
import java.util.Comparator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;

/**
Expand Down Expand Up @@ -150,17 +149,16 @@ public int advance(int target) throws IOException {
}

@Override
public void intoBitSet(Bits acceptDocs, int upTo, FixedBitSet bitSet, int offset)
throws IOException {
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
while (leadTop.doc < upTo) {
leadTop.approximation.intoBitSet(acceptDocs, upTo, bitSet, offset);
leadTop.approximation.intoBitSet(upTo, bitSet, offset);
leadTop.doc = leadTop.approximation.docID();
leadTop = leadIterators.updateTop();
}

minOtherDoc = Integer.MAX_VALUE;
for (DisiWrapper w : otherIterators) {
w.approximation.intoBitSet(acceptDocs, upTo, bitSet, offset);
w.approximation.intoBitSet(upTo, bitSet, offset);
w.doc = w.approximation.docID();
minOtherDoc = Math.min(minOtherDoc, w.doc);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.lucene.search;

import java.io.IOException;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;

/**
Expand Down Expand Up @@ -220,9 +219,7 @@ protected final int slowAdvance(int target) throws IOException {
*
* <pre class="prettyprint">
* for (int doc = docID(); doc &lt; upTo; doc = nextDoc()) {
* if (acceptDocs == null || acceptDocs.get(doc)) {
* bitSet.set(doc - offset);
* }
* bitSet.set(doc - offset);
* }
* </pre>
*
Expand All @@ -233,13 +230,10 @@ protected final int slowAdvance(int target) throws IOException {
*
* @lucene.internal
*/
public void intoBitSet(Bits acceptDocs, int upTo, FixedBitSet bitSet, int offset)
throws IOException {
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
assert offset <= docID();
for (int doc = docID(); doc < upTo; doc = nextDoc()) {
if (acceptDocs == null || acceptDocs.get(doc)) {
bitSet.set(doc - offset);
}
bitSet.set(doc - offset);
}
}
}
17 changes: 5 additions & 12 deletions lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,20 +99,13 @@ public long cost() {
}

@Override
public void intoBitSet(Bits acceptDocs, int upTo, FixedBitSet bitSet, int offset)
throws IOException {
// TODO: Can we also optimize the case when acceptDocs is not null?
if (acceptDocs == null
&& offset < bits.length()
&& bits instanceof FixedBitSet fixedBits
// no bits are set between `offset` and `doc`
&& fixedBits.nextSetBit(offset) == doc
// the whole `bitSet` is getting filled
&& (upTo - offset == bitSet.length())) {
bitSet.orRange(fixedBits, offset);
public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException {
upTo = Math.min(upTo, bits.length());
if (upTo > doc && bits instanceof FixedBitSet fixedBits) {
FixedBitSet.orRange(fixedBits, doc, bitSet, doc - offset, upTo - doc);
advance(upTo); // set the current doc
} else {
super.intoBitSet(acceptDocs, upTo, bitSet, offset);
super.intoBitSet(upTo, bitSet, offset);
}
}
}
28 changes: 28 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/Bits.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
package org.apache.lucene.util;

import org.apache.lucene.search.DocIdSetIterator;

/**
* Interface for Bitset-like structures.
*
Expand All @@ -34,6 +36,32 @@ public interface Bits {
/** Returns the number of bits in this set */
int length();

/**
* Apply this {@code Bits} instance to the given {@link FixedBitSet}, which starts at the given
* {@code offset}.
*
* <p>This should behave the same way as the default implementation, which does the following:
*
* <pre class="prettyprint">
* for (int i = bitSet.nextSetBit(0);
* i != DocIdSetIterator.NO_MORE_DOCS;
* i = i + 1 >= bitSet.length() ? DocIdSetIterator.NO_MORE_DOCS : bitSet.nextSetBit(i + 1)) {
* if (get(offset + i) == false) {
* bitSet.clear(i);
* }
* }
* </pre>
*/
default void applyMask(FixedBitSet bitSet, int offset) {
for (int i = bitSet.nextSetBit(0);
i != DocIdSetIterator.NO_MORE_DOCS;
i = i + 1 >= bitSet.length() ? DocIdSetIterator.NO_MORE_DOCS : bitSet.nextSetBit(i + 1)) {
if (get(offset + i) == false) {
bitSet.clear(i);
}
}
}

Bits[] EMPTY_ARRAY = new Bits[0];

/** Bits impl of the specified length with all bits set. */
Expand Down
Loading

0 comments on commit 5851f44

Please sign in to comment.