Skip to content

Commit

Permalink
Optimize BitSetIterator#intoBitSet. (#14083)
Browse files Browse the repository at this point in the history
Calling `DocIdSetIterator#intoBitSet` on a `BitSetIterator` copies set bits one
by one from one `BitSet` to another. We can do better.
  • Loading branch information
jpountz authored Dec 26, 2024
1 parent 0494c82 commit c0fc700
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 10 deletions.
19 changes: 19 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/BitSetIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.lucene.util;

import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;

/**
Expand Down Expand Up @@ -96,4 +97,22 @@ public int advance(int target) {
public long cost() {
return cost;
}

@Override
public void intoBitSet(Bits acceptDocs, int upTo, FixedBitSet bitSet, int offset)
throws IOException {
// TODO: Can we also optimize the case when acceptDocs is not null?
if (acceptDocs == null
&& offset < bits.length()
&& bits instanceof FixedBitSet fixedBits
// no bits are set between `offset` and `doc`
&& fixedBits.nextSetBit(offset) == doc
// the whole `bitSet` is getting filled
&& (upTo - offset == bitSet.length())) {
bitSet.orRange(fixedBits, offset);
advance(upTo); // set the current doc
} else {
super.intoBitSet(acceptDocs, upTo, bitSet, offset);
}
}
}
48 changes: 38 additions & 10 deletions lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -338,11 +338,8 @@ public int prevSetBit(int index) {

@Override
public void or(DocIdSetIterator iter) throws IOException {
if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
checkUnpositioned(iter);
final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter);
or(bits);
} else if (iter instanceof DocBaseBitSetIterator) {
if (iter instanceof DocBaseBitSetIterator) {
// TODO: implement DocBaseBitSetIterator#intoBitSet instead
checkUnpositioned(iter);
DocBaseBitSetIterator baseIter = (DocBaseBitSetIterator) iter;
or(baseIter.getDocBase() >> 6, baseIter.getBitSet());
Expand All @@ -353,11 +350,6 @@ public void or(DocIdSetIterator iter) throws IOException {
}
}

/** this = this OR other */
public void or(FixedBitSet other) {
or(0, other.bits, other.numWords);
}

private void or(final int otherOffsetWords, FixedBitSet other) {
or(otherOffsetWords, other.bits, other.numWords);
}
Expand All @@ -372,6 +364,42 @@ private void or(final int otherOffsetWords, final long[] otherArr, final int oth
}
}

/**
* Or {@code min(length(), other.length() - from} bits starting at {@code from} from {@code other}
* into this bit set starting at 0.
*/
void orRange(FixedBitSet other, int from) {
int numBits = Math.min(length(), other.length() - from);
if (numBits <= 0) {
return;
}
int numFullWords = numBits >> 6;
long[] otherBits = other.getBits();
int wordOffset = from >> 6;
if ((from & 0x3F) == 0) {
// from is aligned with a long[]
for (int i = 0; i < numFullWords; ++i) {
bits[i] |= otherBits[wordOffset + i];
}
} else {
for (int i = 0; i < numFullWords; ++i) {
bits[i] |= (otherBits[wordOffset + i] >>> from) | (otherBits[wordOffset + i + 1] << -from);
}
}

// Handle the remainder
for (int i = numFullWords << 6; i < numBits; ++i) {
if (other.get(from + i)) {
set(i);
}
}
}

/** this = this OR other */
public void or(FixedBitSet other) {
orRange(other, 0);
}

/** this = this XOR other */
public void xor(FixedBitSet other) {
xor(other.bits, other.numWords);
Expand Down
42 changes: 42 additions & 0 deletions lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
package org.apache.lucene.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.tests.util.BaseBitSetTestCase;
Expand Down Expand Up @@ -642,4 +644,44 @@ public void testScanIsEmpty() {
set.set(1028);
assertFalse(set.scanIsEmpty());
}

public void testOrRange() {
FixedBitSet set1 = new FixedBitSet(1_000);
FixedBitSet set2 = new FixedBitSet(10_000);
for (int i = 0; i < set2.length(); i += 3) {
set2.set(i);
}

// Check different values of `offset`
List<Integer> offsets = new ArrayList<>();
for (int offset = 64; offset < 128; ++offset) {
// Test all possible alignments
offsets.add(offset);
}
for (int offset = set2.length() - 128; offset < set2.length() - 64; ++offset) {
// Again, test all possible alignments, but this time we stop or-ing bits when exceeding the
// size of set2 rather than set1
offsets.add(offset);
}

for (int offset : offsets) {
set1.clear();
for (int i = 0; i < set1.length(); i += 10) {
set1.set(i);
}
set1.orRange(set2, offset);
int upTo = Math.min(set1.length(), set2.length() - offset);
for (int i = 0; i < set1.length(); ++i) {
if (i % 10 == 0 || i >= upTo) {
// These bits were set before, they should still be set
assertEquals(i % 10 == 0, set1.get(i));
} else if ((offset + i) % 3 == 0) {
// These bits were set in set1, should be set in set2
assertTrue(set1.get(i));
} else {
assertFalse(set1.get(i));
}
}
}
}
}

0 comments on commit c0fc700

Please sign in to comment.