Skip to content

Commit

Permalink
Fix optimization to help inline calls to live docs. (#14294)
Browse files Browse the repository at this point in the history
While doing benchmarks on indexes with deletions, I found a bug in
`ScorerUtil`, which optimizes live docs for the wrong class: `FixedBitSet`
instead of `FixedBit`. Another performance bug is that `FixedBits` did not
override `Bits#applyMask` with a more efficient implementation.
  • Loading branch information
jpountz authored Feb 27, 2025
1 parent b0a8727 commit 88d7709
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ protected void searchLeaf(
}
try {
// Optimize for the case when live docs are stored in a FixedBitSet.
Bits acceptDocs = ScorerUtil.likelyFixedBitSet(ctx.reader().getLiveDocs());
Bits acceptDocs = ScorerUtil.likelyLiveDocs(ctx.reader().getLiveDocs());
scorer.score(leafCollector, acceptDocs, minDocId, maxDocId);
} catch (
@SuppressWarnings("unused")
Expand Down
20 changes: 11 additions & 9 deletions lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
class ScorerUtil {

private static final Class<?> DEFAULT_IMPACTS_ENUM_CLASS;
private static final Class<?> DEFAULT_ACCEPT_DOCS_CLASS;

static {
try (Directory dir = new ByteBuffersDirectory();
Expand All @@ -58,6 +59,8 @@ class ScorerUtil {
} catch (IOException e) {
throw new Error(e);
}

DEFAULT_ACCEPT_DOCS_CLASS = new FixedBitSet(1).asReadOnlyBits().getClass();
}

static long costWithMinShouldMatch(LongStream costs, int numScorers, int minShouldMatch) {
Expand Down Expand Up @@ -113,18 +116,17 @@ static Scorable likelyTermScorer(Scorable scorable) {

/**
* Optimize {@link Bits} representing the set of accepted documents for the case when it is likely
* implemented via a {@link FixedBitSet}. This helps make calls to {@link Bits#get(int)}
* inlinable, which in-turn helps speed up query evaluation. This is especially helpful as
* inlining will sometimes enable auto-vectorizing shifts and masks that are done in {@link
* FixedBitSet#get(int)}.
* implemented as live docs. This helps make calls to {@link Bits#get(int)} inlinable, which
* in-turn helps speed up query evaluation. This is especially helpful as inlining will sometimes
* enable auto-vectorizing shifts and masks that are done in {@link FixedBitSet#get(int)}.
*/
static Bits likelyFixedBitSet(Bits acceptDocs) {
if (acceptDocs instanceof FixedBitSet) {
static Bits likelyLiveDocs(Bits acceptDocs) {
if (acceptDocs == null) {
return acceptDocs;
} else if (acceptDocs.getClass() == DEFAULT_ACCEPT_DOCS_CLASS) {
return acceptDocs;
} else if (acceptDocs != null) {
return new FilterBits(acceptDocs);
} else {
return null;
return new FilterBits(acceptDocs);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ public int hashCode() {
public static FixedBitSet copyOf(Bits bits) {
if (bits instanceof FixedBits fixedBits) {
// restore the original FixedBitSet
bits = new FixedBitSet(fixedBits.bits, fixedBits.length);
bits = fixedBits.bitSet;
}

if (bits instanceof FixedBitSet) {
Expand Down
20 changes: 9 additions & 11 deletions lucene/core/src/java/org/apache/lucene/util/FixedBits.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,24 @@
/** Immutable twin of FixedBitSet. */
final class FixedBits implements Bits {

final long[] bits;
final int length;
final FixedBitSet bitSet;

FixedBits(long[] bits, int length) {
this.bits = bits;
this.length = length;
this.bitSet = new FixedBitSet(bits, length);
}

@Override
public boolean get(int index) {
assert index >= 0 && index < length : "index=" + index + ", numBits=" + length;
int i = index >> 6; // div 64
// signed shift will keep a negative index and force an
// array-index-out-of-bounds-exception, removing the need for an explicit check.
long bitmask = 1L << index;
return (bits[i] & bitmask) != 0;
return bitSet.get(index);
}

@Override
public void applyMask(FixedBitSet dest, int offset) {
bitSet.applyMask(dest, offset);
}

@Override
public int length() {
return length;
return bitSet.length();
}
}
95 changes: 95 additions & 0 deletions lucene/core/src/test/org/apache/lucene/search/TestScorerUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FeatureField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SparseFixedBitSet;

public class TestScorerUtil extends LuceneTestCase {

public void testLikelyFixedBits() throws IOException {
assertNull(ScorerUtil.likelyLiveDocs(null));

Bits bits1 = new SparseFixedBitSet(10);
assertNotSame(bits1, ScorerUtil.likelyLiveDocs(bits1));
Bits bits2 = new Bits.MatchAllBits(10);
assertNotSame(bits2, ScorerUtil.likelyLiveDocs(bits2));
assertEquals(
ScorerUtil.likelyLiveDocs(bits1).getClass(), ScorerUtil.likelyLiveDocs(bits2).getClass());

try (Directory dir = new ByteBuffersDirectory();
IndexWriter w =
new IndexWriter(
dir,
new IndexWriterConfig()
.setCodec(TestUtil.getDefaultCodec())
.setMergePolicy(NoMergePolicy.INSTANCE))) {
Document doc = new Document();
doc.add(new StringField("id", "1", Store.NO));
w.addDocument(doc);
doc = new Document();
doc.add(new StringField("id", "2", Store.NO));
w.addDocument(doc);
w.deleteDocuments(new Term("id", "1"));
try (DirectoryReader reader = DirectoryReader.open(w)) {
LeafReader leafReader = reader.leaves().get(0).reader();
Bits acceptDocs = leafReader.getLiveDocs();
assertNotNull(acceptDocs);
assertSame(acceptDocs, ScorerUtil.likelyLiveDocs(acceptDocs));
}
}
}

public void testLikelyImpactsEnum() throws IOException {
DocIdSetIterator iterator = DocIdSetIterator.all(10);
assertTrue(ScorerUtil.likelyImpactsEnum(iterator) instanceof FilterDocIdSetIterator);

try (Directory dir = new ByteBuffersDirectory();
IndexWriter w =
new IndexWriter(dir, new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec()))) {
Document doc = new Document();
doc.add(new FeatureField("field", "value", 1f));
w.addDocument(doc);
try (DirectoryReader reader = DirectoryReader.open(w)) {
LeafReader leafReader = reader.leaves().get(0).reader();
TermsEnum te = leafReader.terms("field").iterator();
assertTrue(te.seekExact(new BytesRef("value")));
ImpactsEnum ie = te.impacts(PostingsEnum.FREQS);
assertSame(ie, ScorerUtil.likelyImpactsEnum(ie));
}
}
}
}

0 comments on commit 88d7709

Please sign in to comment.