diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index cd39edc32279..761fafeba09a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -52,6 +52,9 @@ API Changes
* GITHUB#14209: Deprecate Operations.union(Automaton,Automaton) and
concatenate(Automaton,Automaton) in favor of the methods taking List. (Robert Muir)
+* GITHUB#14236: CombinedFieldQuery moved from lucene-sandbox to lucene-core.
+ (Adrien Grand)
+
New Features
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/CombinedFieldQuery.java b/lucene/core/src/java/org/apache/lucene/search/CombinedFieldQuery.java
new file mode 100644
index 000000000000..42659e3b37e9
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/CombinedFieldQuery.java
@@ -0,0 +1,462 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.TreeMap;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.TermStates;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.DFRSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOSupplier;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.SmallFloat;
+
+/**
+ * A {@link Query} that treats multiple fields as a single stream and scores terms as if they had
+ * been indexed in a single field whose values would be the union of the values of the provided
+ * fields.
+ *
+ *
The query works as follows:
+ *
+ *
+ * - Given a list of fields and weights, it pretends there is a synthetic combined field where
+ * all terms have been indexed. It computes new term and collection statistics for this
+ * combined field.
+ *
- It uses a disjunction iterator and {@link IndexSearcher#getSimilarity} to score documents.
+ *
+ *
+ * In order for a similarity to be compatible, {@link Similarity#computeNorm} must be additive:
+ * the norm of the combined field is the sum of norms for each individual field. The norms must also
+ * be encoded using {@link SmallFloat#intToByte4}. These requirements hold for all similarities that
+ * don't customize {@link Similarity#computeNorm}, which includes {@link BM25Similarity} and {@link
+ * DFRSimilarity}. Per-field similarities are not supported.
+ *
+ *
The query also requires that either all fields or no fields have norms enabled. Having only
+ * some fields with norms enabled can result in errors.
+ *
+ *
This query assumes that all fields share the same analyzer. Scores may not make much sense if
+ * all fields don't have the same analyzer.
+ *
+ *
The scoring is based on BM25F's simple formula described in:
+ * http://www.staff.city.ac.uk/~sb317/papers/foundations_bm25_review.pdf. This query implements the
+ * same approach but allows other similarities besides {@link
+ * org.apache.lucene.search.similarities.BM25Similarity}.
+ *
+ * @lucene.experimental
+ */
+public final class CombinedFieldQuery extends Query implements Accountable {
+ private static final long BASE_RAM_BYTES =
+ RamUsageEstimator.shallowSizeOfInstance(CombinedFieldQuery.class);
+
+ /** A builder for {@link CombinedFieldQuery}. */
+ public static class Builder {
+ private final Map fieldAndWeights = new HashMap<>();
+ private final BytesRef term;
+
+ /** Create a builder for the given term {@link String}. */
+ public Builder(String term) {
+ this.term = new BytesRef(term);
+ }
+
+ /** Create a builder for the given term bytes. */
+ public Builder(BytesRef term) {
+ this.term = BytesRef.deepCopyOf(term);
+ }
+
+ /**
+ * Adds a field to this builder.
+ *
+ * @param field The field name.
+ */
+ public Builder addField(String field) {
+ return addField(field, 1f);
+ }
+
+ /**
+ * Adds a field to this builder.
+ *
+ * @param field The field name.
+ * @param weight The weight associated to this field.
+ */
+ public Builder addField(String field, float weight) {
+ if (weight < 1) {
+ throw new IllegalArgumentException("weight must be greater or equal to 1");
+ }
+ fieldAndWeights.put(field, new FieldAndWeight(field, weight));
+ return this;
+ }
+
+ /** Builds the {@link CombinedFieldQuery}. */
+ public CombinedFieldQuery build() {
+ if (fieldAndWeights.size() > IndexSearcher.getMaxClauseCount()) {
+ throw new IndexSearcher.TooManyClauses();
+ }
+ return new CombinedFieldQuery(new TreeMap<>(fieldAndWeights), term);
+ }
+ }
+
+ record FieldAndWeight(String field, float weight) {}
+
+ // sorted map for fields.
+ private final TreeMap fieldAndWeights;
+ // term bytes
+ private final BytesRef term;
+ // array of terms per field, sorted by field
+ private final Term[] fieldTerms;
+
+ private final long ramBytesUsed;
+
+ private CombinedFieldQuery(TreeMap fieldAndWeights, BytesRef term) {
+ this.fieldAndWeights = fieldAndWeights;
+ this.term = Objects.requireNonNull(term);
+ if (fieldAndWeights.size() > IndexSearcher.getMaxClauseCount()) {
+ throw new IndexSearcher.TooManyClauses();
+ }
+ this.fieldTerms = new Term[fieldAndWeights.size()];
+ int pos = 0;
+ for (String field : fieldAndWeights.keySet()) {
+ fieldTerms[pos++] = new Term(field, term);
+ }
+
+ this.ramBytesUsed =
+ BASE_RAM_BYTES
+ + RamUsageEstimator.sizeOfObject(fieldAndWeights)
+ + RamUsageEstimator.sizeOfObject(fieldTerms)
+ + RamUsageEstimator.sizeOfObject(term);
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder builder = new StringBuilder("CombinedFieldQuery((");
+ int pos = 0;
+ for (FieldAndWeight fieldWeight : fieldAndWeights.values()) {
+ if (pos++ != 0) {
+ builder.append(" ");
+ }
+ builder.append(fieldWeight.field);
+ if (fieldWeight.weight != 1f) {
+ builder.append("^");
+ builder.append(fieldWeight.weight);
+ }
+ }
+ builder.append(")(");
+ builder.append(Term.toString(term));
+ builder.append("))");
+ return builder.toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (sameClassAs(o) == false) return false;
+ CombinedFieldQuery that = (CombinedFieldQuery) o;
+ return Objects.equals(fieldAndWeights, that.fieldAndWeights) && term.equals(that.term);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = classHash();
+ result = 31 * result + Objects.hash(fieldAndWeights);
+ result = 31 * result + term.hashCode();
+ return result;
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return ramBytesUsed;
+ }
+
+ @Override
+ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
+ if (fieldAndWeights.isEmpty()) {
+ return new BooleanQuery.Builder().build();
+ }
+ return this;
+ }
+
+ @Override
+ public void visit(QueryVisitor visitor) {
+ Term[] selectedTerms =
+ Arrays.stream(fieldTerms).filter(t -> visitor.acceptField(t.field())).toArray(Term[]::new);
+ if (selectedTerms.length > 0) {
+ QueryVisitor v = visitor.getSubVisitor(BooleanClause.Occur.SHOULD, this);
+ v.consumeTerms(this, selectedTerms);
+ }
+ }
+
+ private BooleanQuery rewriteToBoolean() {
+ // rewrite to a simple disjunction if the score is not needed.
+ BooleanQuery.Builder bq = new BooleanQuery.Builder();
+ for (Term term : fieldTerms) {
+ bq.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
+ }
+ return bq.build();
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
+ throws IOException {
+ validateConsistentNorms(searcher.getIndexReader());
+ if (scoreMode.needsScores()) {
+ return new CombinedFieldWeight(this, searcher, scoreMode, boost);
+ } else {
+ // rewrite to a simple disjunction if the score is not needed.
+ Query bq = rewriteToBoolean();
+ return searcher.rewrite(bq).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
+ }
+ }
+
+ private void validateConsistentNorms(IndexReader reader) {
+ boolean allFieldsHaveNorms = true;
+ boolean noFieldsHaveNorms = true;
+
+ for (LeafReaderContext context : reader.leaves()) {
+ FieldInfos fieldInfos = context.reader().getFieldInfos();
+ for (String field : fieldAndWeights.keySet()) {
+ FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+ if (fieldInfo != null) {
+ allFieldsHaveNorms &= fieldInfo.hasNorms();
+ noFieldsHaveNorms &= fieldInfo.omitsNorms();
+ }
+ }
+ }
+
+ if (allFieldsHaveNorms == false && noFieldsHaveNorms == false) {
+ throw new IllegalArgumentException(
+ getClass().getSimpleName()
+ + " requires norms to be consistent across fields: some fields cannot "
+ + " have norms enabled, while others have norms disabled");
+ }
+ }
+
+ class CombinedFieldWeight extends Weight {
+ private final IndexSearcher searcher;
+ private final TermStates[] termStates;
+ private final Similarity.SimScorer simWeight;
+
+ CombinedFieldWeight(Query query, IndexSearcher searcher, ScoreMode scoreMode, float boost)
+ throws IOException {
+ super(query);
+ assert scoreMode.needsScores();
+ this.searcher = searcher;
+ long docFreq = 0;
+ long totalTermFreq = 0;
+ termStates = new TermStates[fieldTerms.length];
+ for (int i = 0; i < termStates.length; i++) {
+ FieldAndWeight field = fieldAndWeights.get(fieldTerms[i].field());
+ TermStates ts = TermStates.build(searcher, fieldTerms[i], true);
+ termStates[i] = ts;
+ if (ts.docFreq() > 0) {
+ TermStatistics termStats =
+ searcher.termStatistics(fieldTerms[i], ts.docFreq(), ts.totalTermFreq());
+ docFreq = Math.max(termStats.docFreq(), docFreq);
+ totalTermFreq += (double) field.weight * termStats.totalTermFreq();
+ }
+ }
+ if (docFreq > 0) {
+ CollectionStatistics pseudoCollectionStats = mergeCollectionStatistics(searcher);
+ TermStatistics pseudoTermStatistics =
+ new TermStatistics(new BytesRef("pseudo_term"), docFreq, Math.max(1, totalTermFreq));
+ this.simWeight =
+ searcher.getSimilarity().scorer(boost, pseudoCollectionStats, pseudoTermStatistics);
+ } else {
+ this.simWeight = null;
+ }
+ }
+
+ private CollectionStatistics mergeCollectionStatistics(IndexSearcher searcher)
+ throws IOException {
+ long maxDoc = 0;
+ long docCount = 0;
+ long sumTotalTermFreq = 0;
+ long sumDocFreq = 0;
+ for (FieldAndWeight fieldWeight : fieldAndWeights.values()) {
+ CollectionStatistics collectionStats = searcher.collectionStatistics(fieldWeight.field);
+ if (collectionStats != null) {
+ maxDoc = Math.max(collectionStats.maxDoc(), maxDoc);
+ docCount = Math.max(collectionStats.docCount(), docCount);
+ sumDocFreq = Math.max(collectionStats.sumDocFreq(), sumDocFreq);
+ sumTotalTermFreq += (double) fieldWeight.weight * collectionStats.sumTotalTermFreq();
+ }
+ }
+
+ return new CollectionStatistics(
+ "pseudo_field", maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
+ }
+
+ @Override
+ public Matches matches(LeafReaderContext context, int doc) throws IOException {
+ Weight weight =
+ searcher.rewrite(rewriteToBoolean()).createWeight(searcher, ScoreMode.COMPLETE, 1f);
+ return weight.matches(context, doc);
+ }
+
+ @Override
+ public Explanation explain(LeafReaderContext context, int doc) throws IOException {
+ Scorer scorer = scorer(context);
+ if (scorer != null) {
+ int newDoc = scorer.iterator().advance(doc);
+ if (newDoc == doc) {
+ assert scorer instanceof CombinedFieldScorer;
+ float freq = ((CombinedFieldScorer) scorer).freq();
+ MultiNormsLeafSimScorer docScorer =
+ new MultiNormsLeafSimScorer(
+ simWeight, context.reader(), fieldAndWeights.values(), true);
+ Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
+ Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
+ return Explanation.match(
+ scoreExplanation.getValue(),
+ "weight(" + getQuery() + " in " + doc + "), result of:",
+ scoreExplanation);
+ }
+ }
+ return Explanation.noMatch("no matching term");
+ }
+
+ @Override
+ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+ List iterators = new ArrayList<>();
+ List fields = new ArrayList<>();
+ long cost = 0;
+ for (int i = 0; i < fieldTerms.length; i++) {
+ IOSupplier supplier = termStates[i].get(context);
+ TermState state = supplier == null ? null : supplier.get();
+ if (state != null) {
+ TermsEnum termsEnum = context.reader().terms(fieldTerms[i].field()).iterator();
+ termsEnum.seekExact(fieldTerms[i].bytes(), state);
+ PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.FREQS);
+ iterators.add(postingsEnum);
+ fields.add(fieldAndWeights.get(fieldTerms[i].field()));
+ cost += postingsEnum.cost();
+ }
+ }
+
+ if (iterators.isEmpty()) {
+ return null;
+ }
+
+ MultiNormsLeafSimScorer scoringSimScorer =
+ new MultiNormsLeafSimScorer(simWeight, context.reader(), fieldAndWeights.values(), true);
+
+ final long finalCost = cost;
+ return new ScorerSupplier() {
+
+ @Override
+ public Scorer get(long leadCost) throws IOException {
+ // we use termscorers + disjunction as an impl detail
+ List wrappers = new ArrayList<>(iterators.size());
+ for (int i = 0; i < iterators.size(); i++) {
+ float weight = fields.get(i).weight;
+ wrappers.add(
+ new WeightedDisiWrapper(new TermScorer(iterators.get(i), simWeight, null), weight));
+ }
+ // Even though it is called approximation, it is accurate since none of
+ // the sub iterators are two-phase iterators.
+ DisjunctionDISIApproximation iterator =
+ new DisjunctionDISIApproximation(wrappers, leadCost);
+ return new CombinedFieldScorer(iterator, scoringSimScorer);
+ }
+
+ @Override
+ public long cost() {
+ return finalCost;
+ }
+ };
+ }
+
+ @Override
+ public boolean isCacheable(LeafReaderContext ctx) {
+ return false;
+ }
+ }
+
+ private static class WeightedDisiWrapper extends DisiWrapper {
+ final PostingsEnum postingsEnum;
+ final float weight;
+
+ WeightedDisiWrapper(Scorer scorer, float weight) {
+ super(scorer, false);
+ this.weight = weight;
+ this.postingsEnum = (PostingsEnum) scorer.iterator();
+ }
+
+ float freq() throws IOException {
+ return weight * postingsEnum.freq();
+ }
+ }
+
+ private static class CombinedFieldScorer extends Scorer {
+ private final DisjunctionDISIApproximation iterator;
+ private final MultiNormsLeafSimScorer simScorer;
+ private final float maxScore;
+
+ CombinedFieldScorer(DisjunctionDISIApproximation iterator, MultiNormsLeafSimScorer simScorer) {
+ this.iterator = iterator;
+ this.simScorer = simScorer;
+ this.maxScore = simScorer.getSimScorer().score(Float.POSITIVE_INFINITY, 1L);
+ }
+
+ @Override
+ public int docID() {
+ return iterator.docID();
+ }
+
+ float freq() throws IOException {
+ DisiWrapper w = iterator.topList();
+ float freq = ((WeightedDisiWrapper) w).freq();
+ for (w = w.next; w != null; w = w.next) {
+ freq += ((WeightedDisiWrapper) w).freq();
+ if (freq < 0) { // overflow
+ return Integer.MAX_VALUE;
+ }
+ }
+ return freq;
+ }
+
+ @Override
+ public float score() throws IOException {
+ return simScorer.score(iterator.docID(), freq());
+ }
+
+ @Override
+ public DocIdSetIterator iterator() {
+ return iterator;
+ }
+
+ @Override
+ public float getMaxScore(int upTo) throws IOException {
+ return maxScore;
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java b/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java
new file mode 100644
index 000000000000..c6720b362d9c
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.search.CombinedFieldQuery.FieldAndWeight;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.util.SmallFloat;
+
+/**
+ * Scorer that sums document's norms from multiple fields.
+ *
+ * For all fields, norms must be encoded using {@link SmallFloat#intToByte4}. This scorer also
+ * requires that either all fields or no fields have norms enabled. Having only some fields with
+ * norms enabled can result in errors or undefined behavior.
+ */
+final class MultiNormsLeafSimScorer {
+ /** Cache of decoded norms. */
+ private static final float[] LENGTH_TABLE = new float[256];
+
+ static {
+ for (int i = 0; i < 256; i++) {
+ LENGTH_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
+ }
+ }
+
+ private final SimScorer scorer;
+ private final NumericDocValues norms;
+
+ /** Sole constructor: Score documents of {@code reader} with {@code scorer}. */
+ MultiNormsLeafSimScorer(
+ SimScorer scorer,
+ LeafReader reader,
+ Collection normFields,
+ boolean needsScores)
+ throws IOException {
+ this.scorer = Objects.requireNonNull(scorer);
+ if (needsScores) {
+ final List normsList = new ArrayList<>();
+ final List weightList = new ArrayList<>();
+ final Set duplicateCheckingSet = new HashSet<>();
+ for (FieldAndWeight field : normFields) {
+ assert duplicateCheckingSet.add(field.field())
+ : "There is a duplicated field ["
+ + field.field()
+ + "] used to construct MultiNormsLeafSimScorer";
+
+ NumericDocValues norms = reader.getNormValues(field.field());
+ if (norms != null) {
+ normsList.add(norms);
+ weightList.add(field.weight());
+ }
+ }
+
+ if (normsList.isEmpty()) {
+ norms = null;
+ } else {
+ final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]);
+ final float[] weightArr = new float[normsList.size()];
+ for (int i = 0; i < weightList.size(); i++) {
+ weightArr[i] = weightList.get(i);
+ }
+ norms = new MultiFieldNormValues(normsArr, weightArr);
+ }
+ } else {
+ norms = null;
+ }
+ }
+
+ SimScorer getSimScorer() {
+ return scorer;
+ }
+
+ private long getNormValue(int doc) throws IOException {
+ if (norms != null) {
+ boolean found = norms.advanceExact(doc);
+ assert found;
+ return norms.longValue();
+ } else {
+ return 1L; // default norm
+ }
+ }
+
+ /**
+ * Score the provided document assuming the given term document frequency. This method must be
+ * called on non-decreasing sequences of doc ids.
+ *
+ * @see SimScorer#score(float, long)
+ */
+ public float score(int doc, float freq) throws IOException {
+ return scorer.score(freq, getNormValue(doc));
+ }
+
+ /**
+ * Explain the score for the provided document assuming the given term document frequency. This
+ * method must be called on non-decreasing sequences of doc ids.
+ *
+ * @see SimScorer#explain(Explanation, long)
+ */
+ public Explanation explain(int doc, Explanation freqExpl) throws IOException {
+ return scorer.explain(freqExpl, getNormValue(doc));
+ }
+
+ private static class MultiFieldNormValues extends NumericDocValues {
+ private final NumericDocValues[] normsArr;
+ private final float[] weightArr;
+ private long current;
+ private int docID = -1;
+
+ MultiFieldNormValues(NumericDocValues[] normsArr, float[] weightArr) {
+ this.normsArr = normsArr;
+ this.weightArr = weightArr;
+ }
+
+ @Override
+ public long longValue() {
+ return current;
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ float normValue = 0;
+ boolean found = false;
+ for (int i = 0; i < normsArr.length; i++) {
+ if (normsArr[i].advanceExact(target)) {
+ normValue +=
+ weightArr[i] * LENGTH_TABLE[Byte.toUnsignedInt((byte) normsArr[i].longValue())];
+ found = true;
+ }
+ }
+ current = SmallFloat.intToByte4(Math.round(normValue));
+ return found;
+ }
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int nextDoc() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int advance(int target) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long cost() {
+ throw new UnsupportedOperationException();
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/package-info.java b/lucene/core/src/java/org/apache/lucene/search/package-info.java
index 98b60d94222c..664e8e616938 100644
--- a/lucene/core/src/java/org/apache/lucene/search/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/search/package-info.java
@@ -278,6 +278,30 @@
* See the {@link org.apache.lucene.search.similarities} package documentation for information on
* the built-in available scoring models and extending or changing Similarity.
*
+ *
Scoring multiple fields
+ *
+ * In the real world, documents often have multiple fields with different degrees of relevance. A
+ * robust way of scoring across multiple fields is called BM25F, which is implemented via {@link
+ * org.apache.lucene.search.CombinedFieldQuery}. It scores documents with multiple fields as if
+ * their content had been indexed in a single combined field. It supports configuring per-field
+ * boosts where the value of the boost is interpreted as the number of times that the content of the
+ * field exists in the virtual combined field.
+ *
+ *
Here is an example that constructs a query on "apache OR lucene" on fields "title" with a
+ * boost of 10, and "body" with a boost of 1:
+ *
+ *
+ * BooleanQuery.Builder builder = new BooleanQuery.Builder();
+ * for (String term : new String[] { "apache", "lucene" }) {
+ * Query query = new CombinedFieldQuery(term)
+ * .addField("title", 10f)
+ * .addField("body", 1f)
+ * .build();
+ * builder.add(query, Occur.SHOULD);
+ * }
+ * Query query = builder.build();
+ *
+ *
* Integrating field values into the score
*
* While similarities help score a document relatively to a query, it is also common for
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestCombinedFieldQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestCombinedFieldQuery.java
new file mode 100644
index 000000000000..ed0c051d0bad
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/TestCombinedFieldQuery.java
@@ -0,0 +1,563 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
+import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
+
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;
+import java.io.IOException;
+import java.util.Arrays;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.BooleanSimilarity;
+import org.apache.lucene.search.similarities.ClassicSimilarity;
+import org.apache.lucene.search.similarities.LMDirichletSimilarity;
+import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MMapDirectory;
+import org.apache.lucene.tests.index.RandomIndexWriter;
+import org.apache.lucene.tests.search.CheckHits;
+import org.apache.lucene.tests.util.LuceneTestCase;
+
+public class TestCombinedFieldQuery extends LuceneTestCase {
+ public void testInvalid() {
+ CombinedFieldQuery.Builder builder = new CombinedFieldQuery.Builder("foo");
+ IllegalArgumentException exc =
+ expectThrows(IllegalArgumentException.class, () -> builder.addField("foo", 0.5f));
+ assertEquals(exc.getMessage(), "weight must be greater or equal to 1");
+ }
+
+ public void testRewrite() throws IOException {
+ CombinedFieldQuery.Builder builder = new CombinedFieldQuery.Builder("foo");
+ IndexReader reader = new MultiReader();
+ IndexSearcher searcher = new IndexSearcher(reader);
+ Query actual = searcher.rewrite(builder.build());
+ assertEquals(new MatchNoDocsQuery(), actual);
+ builder.addField("field", 1f);
+ Query query = builder.build();
+ actual = searcher.rewrite(builder.build());
+ assertEquals(query, actual);
+ }
+
+ public void testEqualsAndHashCode() {
+ CombinedFieldQuery query1 =
+ new CombinedFieldQuery.Builder("value").addField("field1").addField("field2").build();
+
+ CombinedFieldQuery query2 =
+ new CombinedFieldQuery.Builder("value").addField("field1").addField("field2", 1.3f).build();
+ assertNotEquals(query1, query2);
+ assertNotEquals(query1.hashCode(), query2.hashCode());
+
+ CombinedFieldQuery query3 =
+ new CombinedFieldQuery.Builder("value").addField("field3").addField("field4").build();
+ assertNotEquals(query1, query3);
+ assertNotEquals(query1.hashCode(), query2.hashCode());
+
+ CombinedFieldQuery duplicateQuery1 =
+ new CombinedFieldQuery.Builder("value").addField("field1").addField("field2").build();
+ assertEquals(query1, duplicateQuery1);
+ assertEquals(query1.hashCode(), duplicateQuery1.hashCode());
+ }
+
+ public void testToString() {
+ CombinedFieldQuery.Builder builder = new CombinedFieldQuery.Builder("bar");
+ assertEquals("CombinedFieldQuery(()(bar))", builder.build().toString());
+ builder.addField("foo", 1f);
+ assertEquals("CombinedFieldQuery((foo)(bar))", builder.build().toString());
+ builder.addField("title", 3f);
+ assertEquals("CombinedFieldQuery((foo title^3.0)(bar))", builder.build().toString());
+ }
+
+ public void testSameScore() throws IOException {
+ Directory dir = newDirectory();
+ Similarity similarity = randomCompatibleSimilarity();
+
+ IndexWriterConfig iwc = new IndexWriterConfig();
+ iwc.setSimilarity(similarity);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ Document doc = new Document();
+ doc.add(new StringField("f", "a", Store.NO));
+ w.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new StringField("g", "a", Store.NO));
+ for (int i = 0; i < 10; ++i) {
+ w.addDocument(doc);
+ }
+
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ searcher.setSimilarity(similarity);
+ CombinedFieldQuery query =
+ new CombinedFieldQuery.Builder("a").addField("f", 1f).addField("g", 1f).build();
+ TopScoreDocCollectorManager collectorManager =
+ new TopScoreDocCollectorManager(
+ Math.min(reader.numDocs(), Integer.MAX_VALUE), Integer.MAX_VALUE);
+ TopDocs topDocs = searcher.search(query, collectorManager);
+ assertEquals(new TotalHits(11, TotalHits.Relation.EQUAL_TO), topDocs.totalHits);
+ // All docs must have the same score
+ for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
+ assertEquals(topDocs.scoreDocs[0].score, topDocs.scoreDocs[i].score, 0.0f);
+ }
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testScoringWithMultipleFieldTermsMatch() throws IOException {
+ int numMatchDoc = randomIntBetween(100, 500);
+ int numHits = randomIntBetween(1, 100);
+ int boost1 = Math.max(1, random().nextInt(5));
+ int boost2 = Math.max(1, random().nextInt(5));
+
+ Directory dir = newDirectory();
+ Similarity similarity = randomCompatibleSimilarity();
+
+ IndexWriterConfig iwc = new IndexWriterConfig();
+ iwc.setSimilarity(similarity);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ // adding potentially matching doc
+ for (int i = 0; i < numMatchDoc; i++) {
+ Document doc = new Document();
+
+ int freqA = random().nextInt(20) + 1;
+ for (int j = 0; j < freqA; j++) {
+ doc.add(new TextField("a", "foo", Store.NO));
+ }
+
+ freqA = random().nextInt(20) + 1;
+ if (randomBoolean()) {
+ for (int j = 0; j < freqA; j++) {
+ doc.add(new TextField("a", "foo" + j, Store.NO));
+ }
+ }
+
+ freqA = random().nextInt(20) + 1;
+ for (int j = 0; j < freqA; j++) {
+ doc.add(new TextField("a", "zoo", Store.NO));
+ }
+
+ int freqB = random().nextInt(20) + 1;
+ for (int j = 0; j < freqB; j++) {
+ doc.add(new TextField("b", "zoo", Store.NO));
+ }
+
+ freqB = random().nextInt(20) + 1;
+ if (randomBoolean()) {
+ for (int j = 0; j < freqB; j++) {
+ doc.add(new TextField("b", "zoo" + j, Store.NO));
+ }
+ }
+
+ int freqC = random().nextInt(20) + 1;
+ for (int j = 0; j < freqC; j++) {
+ doc.add(new TextField("c", "bla" + j, Store.NO));
+ }
+ w.addDocument(doc);
+ }
+
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ searcher.setSimilarity(similarity);
+
+ CombinedFieldQuery query =
+ new CombinedFieldQuery.Builder("foo")
+ .addField("a", (float) boost1)
+ .addField("b", (float) boost2)
+ .build();
+
+ CollectorManager completeManager =
+ new TopScoreDocCollectorManager(numHits, Integer.MAX_VALUE);
+
+ searcher.search(query, completeManager);
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testNormsDisabled() throws IOException {
+ Directory dir = newDirectory();
+ Similarity similarity = randomCompatibleSimilarity();
+
+ IndexWriterConfig iwc = new IndexWriterConfig();
+ iwc.setSimilarity(similarity);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ Document doc = new Document();
+ doc.add(new StringField("a", "value", Store.NO));
+ doc.add(new StringField("b", "value", Store.NO));
+ doc.add(new TextField("c", "value", Store.NO));
+ w.addDocument(doc);
+ w.commit();
+
+ doc = new Document();
+ doc.add(new StringField("a", "value", Store.NO));
+ doc.add(new TextField("c", "value", Store.NO));
+ w.addDocument(doc);
+
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+
+ Similarity searchSimilarity = randomCompatibleSimilarity();
+ searcher.setSimilarity(searchSimilarity);
+ TopScoreDocCollectorManager collectorManager = new TopScoreDocCollectorManager(10, 10);
+
+ CombinedFieldQuery query =
+ new CombinedFieldQuery.Builder("value").addField("a", 1.0f).addField("b", 1.0f).build();
+ TopDocs topDocs = searcher.search(query, collectorManager);
+ assertEquals(new TotalHits(2, TotalHits.Relation.EQUAL_TO), topDocs.totalHits);
+
+ CombinedFieldQuery invalidQuery =
+ new CombinedFieldQuery.Builder("value").addField("b", 1.0f).addField("c", 1.0f).build();
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class, () -> searcher.search(invalidQuery, collectorManager));
+ assertTrue(e.getMessage().contains("requires norms to be consistent across fields"));
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testCopyField() throws IOException {
+ Directory dir = newDirectory();
+ Similarity similarity = randomCompatibleSimilarity();
+
+ IndexWriterConfig iwc = new IndexWriterConfig();
+ iwc.setSimilarity(similarity);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ int numMatch = atLeast(10);
+ int boost1 = Math.max(1, random().nextInt(5));
+ int boost2 = Math.max(1, random().nextInt(5));
+ for (int i = 0; i < numMatch; i++) {
+ Document doc = new Document();
+ if (random().nextBoolean()) {
+ doc.add(new TextField("a", "baz", Store.NO));
+ doc.add(new TextField("b", "baz", Store.NO));
+ for (int k = 0; k < boost1 + boost2; k++) {
+ doc.add(new TextField("ab", "baz", Store.NO));
+ }
+ w.addDocument(doc);
+ doc.clear();
+ }
+ int freqA = random().nextInt(5) + 1;
+ for (int j = 0; j < freqA; j++) {
+ doc.add(new TextField("a", "foo", Store.NO));
+ }
+ int freqB = random().nextInt(5) + 1;
+ for (int j = 0; j < freqB; j++) {
+ doc.add(new TextField("b", "foo", Store.NO));
+ }
+ int freqAB = freqA * boost1 + freqB * boost2;
+ for (int j = 0; j < freqAB; j++) {
+ doc.add(new TextField("ab", "foo", Store.NO));
+ }
+ w.addDocument(doc);
+ }
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+
+ searcher.setSimilarity(similarity);
+ CombinedFieldQuery query =
+ new CombinedFieldQuery.Builder("foo")
+ .addField("a", (float) boost1)
+ .addField("b", (float) boost2)
+ .build();
+
+ checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("ab", "foo")));
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testCopyFieldWithSingleField() throws IOException {
+ Directory dir = new MMapDirectory(createTempDir());
+ Similarity similarity = randomCompatibleSimilarity();
+
+ IndexWriterConfig iwc = new IndexWriterConfig();
+ iwc.setSimilarity(similarity);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ int boost = Math.max(1, random().nextInt(5));
+ int numMatch = atLeast(10);
+ for (int i = 0; i < numMatch; i++) {
+ Document doc = new Document();
+ int freqA = random().nextInt(5) + 1;
+ for (int j = 0; j < freqA; j++) {
+ doc.add(new TextField("a", "foo", Store.NO));
+ }
+
+ int freqB = freqA * boost;
+ for (int j = 0; j < freqB; j++) {
+ doc.add(new TextField("b", "foo", Store.NO));
+ }
+
+ w.addDocument(doc);
+ }
+
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ searcher.setSimilarity(similarity);
+ CombinedFieldQuery query =
+ new CombinedFieldQuery.Builder("foo").addField("a", (float) boost).build();
+
+ checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("b", "foo")));
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testCopyFieldWithMissingFields() throws IOException {
+ Directory dir = new MMapDirectory(createTempDir());
+ Similarity similarity = randomCompatibleSimilarity();
+
+ IndexWriterConfig iwc = new IndexWriterConfig();
+ iwc.setSimilarity(similarity);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ int boost1 = Math.max(1, random().nextInt(5));
+ int boost2 = Math.max(1, random().nextInt(5));
+ int numMatch = atLeast(10);
+ for (int i = 0; i < numMatch; i++) {
+ Document doc = new Document();
+ int freqA = random().nextInt(5) + 1;
+ for (int j = 0; j < freqA; j++) {
+ doc.add(new TextField("a", "foo", Store.NO));
+ }
+
+ // Choose frequencies such that sometimes we don't add field B
+ int freqB = random().nextInt(3);
+ for (int j = 0; j < freqB; j++) {
+ doc.add(new TextField("b", "foo", Store.NO));
+ }
+
+ int freqAB = freqA * boost1 + freqB * boost2;
+ for (int j = 0; j < freqAB; j++) {
+ doc.add(new TextField("ab", "foo", Store.NO));
+ }
+
+ w.addDocument(doc);
+ }
+
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ searcher.setSimilarity(similarity);
+ CombinedFieldQuery query =
+ new CombinedFieldQuery.Builder("foo")
+ .addField("a", (float) boost1)
+ .addField("b", (float) boost2)
+ .build();
+
+ checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("ab", "foo")));
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ private static Similarity randomCompatibleSimilarity() {
+ return RandomPicks.randomFrom(
+ random(),
+ Arrays.asList(
+ new BM25Similarity(),
+ new BooleanSimilarity(),
+ new ClassicSimilarity(),
+ new LMDirichletSimilarity(),
+ new LMJelinekMercerSimilarity(0.1f)));
+ }
+
+ private void checkExpectedHits(
+ IndexSearcher searcher, int numHits, Query firstQuery, Query secondQuery) throws IOException {
+ TopScoreDocCollectorManager collectorManager =
+ new TopScoreDocCollectorManager(numHits, Integer.MAX_VALUE);
+
+ TopDocs firstTopDocs = searcher.search(firstQuery, collectorManager);
+ assertEquals(numHits, firstTopDocs.totalHits.value());
+
+ collectorManager = new TopScoreDocCollectorManager(numHits, Integer.MAX_VALUE);
+ TopDocs secondTopDocs = searcher.search(secondQuery, collectorManager);
+ CheckHits.checkEqual(firstQuery, secondTopDocs.scoreDocs, firstTopDocs.scoreDocs);
+ }
+
+ public void testDocWithNegativeNorms() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig();
+ iwc.setSimilarity(new NegativeNormSimilarity());
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ String queryString = "foo";
+
+ Document doc = new Document();
+ // both fields must contain tokens that match the query string "foo"
+ doc.add(new TextField("f", "foo", Store.NO));
+ doc.add(new TextField("g", "foo baz", Store.NO));
+ w.addDocument(doc);
+
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ searcher.setSimilarity(new BM25Similarity());
+ CombinedFieldQuery query =
+ new CombinedFieldQuery.Builder(queryString).addField("f").addField("g").build();
+ TopDocs topDocs = searcher.search(query, 10);
+ CheckHits.checkDocIds("queried docs do not match", new int[] {0}, topDocs.scoreDocs);
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ public void testMultipleDocsNegativeNorms() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig();
+ iwc.setSimilarity(new NegativeNormSimilarity());
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ String queryString = "foo";
+
+ Document doc0 = new Document();
+ doc0.add(new TextField("f", "foo", Store.NO));
+ doc0.add(new TextField("g", "foo baz", Store.NO));
+ w.addDocument(doc0);
+
+ Document doc1 = new Document();
+ // add another match on the query string to the second doc
+ doc1.add(new TextField("f", "foo is foo", Store.NO));
+ doc1.add(new TextField("g", "foo baz", Store.NO));
+ w.addDocument(doc1);
+
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ searcher.setSimilarity(new BM25Similarity());
+ CombinedFieldQuery query =
+ new CombinedFieldQuery.Builder(queryString).addField("f").addField("g").build();
+ TopDocs topDocs = searcher.search(query, 10);
+ // Return doc1 ahead of doc0 since its tf is higher
+ CheckHits.checkDocIds("queried docs do not match", new int[] {1, 0}, topDocs.scoreDocs);
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+ private static final class NegativeNormSimilarity extends Similarity {
+ @Override
+ public long computeNorm(FieldInvertState state) {
+ return -128;
+ }
+
+ @Override
+ public SimScorer scorer(
+ float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
+ return new BM25Similarity().scorer(boost, collectionStats, termStats);
+ }
+ }
+
+ public void testOverrideCollectionStatistics() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = new IndexWriterConfig();
+ Similarity similarity = randomCompatibleSimilarity();
+ iwc.setSimilarity(similarity);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ int numMatch = atLeast(10);
+ for (int i = 0; i < numMatch; i++) {
+ Document doc = new Document();
+ if (random().nextBoolean()) {
+ doc.add(new TextField("a", "baz", Store.NO));
+ doc.add(new TextField("b", "baz", Store.NO));
+ for (int k = 0; k < 2; k++) {
+ doc.add(new TextField("ab", "baz", Store.NO));
+ }
+ w.addDocument(doc);
+ doc.clear();
+ }
+ int freqA = random().nextInt(5) + 1;
+ for (int j = 0; j < freqA; j++) {
+ doc.add(new TextField("a", "foo", Store.NO));
+ }
+ int freqB = random().nextInt(5) + 1;
+ for (int j = 0; j < freqB; j++) {
+ doc.add(new TextField("b", "foo", Store.NO));
+ }
+ int freqAB = freqA + freqB;
+ for (int j = 0; j < freqAB; j++) {
+ doc.add(new TextField("ab", "foo", Store.NO));
+ }
+ w.addDocument(doc);
+ }
+
+ IndexReader reader = w.getReader();
+
+ int extraMaxDoc = randomIntBetween(0, 10);
+ int extraDocCount = randomIntBetween(0, extraMaxDoc);
+ int extraSumDocFreq = extraDocCount + randomIntBetween(0, 10);
+
+ int extraSumTotalTermFreqA = extraSumDocFreq + randomIntBetween(0, 10);
+ int extraSumTotalTermFreqB = extraSumDocFreq + randomIntBetween(0, 10);
+ int extraSumTotalTermFreqAB = extraSumTotalTermFreqA + extraSumTotalTermFreqB;
+
+ IndexSearcher searcher =
+ new IndexSearcher(reader) {
+ @Override
+ public CollectionStatistics collectionStatistics(String field) throws IOException {
+ CollectionStatistics shardStatistics = super.collectionStatistics(field);
+ int extraSumTotalTermFreq;
+ if (field.equals("a")) {
+ extraSumTotalTermFreq = extraSumTotalTermFreqA;
+ } else if (field.equals("b")) {
+ extraSumTotalTermFreq = extraSumTotalTermFreqB;
+ } else if (field.equals("ab")) {
+ extraSumTotalTermFreq = extraSumTotalTermFreqAB;
+ } else {
+ throw new AssertionError("should never be called");
+ }
+ return new CollectionStatistics(
+ field,
+ shardStatistics.maxDoc() + extraMaxDoc,
+ shardStatistics.docCount() + extraDocCount,
+ shardStatistics.sumTotalTermFreq() + extraSumTotalTermFreq,
+ shardStatistics.sumDocFreq() + extraSumDocFreq);
+ }
+ };
+ searcher.setSimilarity(similarity);
+ CombinedFieldQuery query =
+ new CombinedFieldQuery.Builder("foo").addField("a").addField("b").build();
+
+ checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("ab", "foo")));
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java
index 1ca3f790f43f..debd10ea83d9 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java
@@ -92,7 +92,9 @@
* org.apache.lucene.search.similarities.BM25Similarity}.
*
* @lucene.experimental
+ * @deprecated Use {@link org.apache.lucene.search.CombinedFieldQuery} instead.
*/
+@Deprecated
public final class CombinedFieldQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES =
RamUsageEstimator.shallowSizeOfInstance(CombinedFieldQuery.class);
@@ -203,7 +205,7 @@ public String toString(String field) {
if (pos++ != 0) {
builder.append(" ");
}
- builder.append(term.utf8ToString());
+ builder.append(Term.toString(term));
}
builder.append("))");
return builder.toString();