From 0d660d029f7410177ae68b39904fe47e24ae3d98 Mon Sep 17 00:00:00 2001
From: Renato Haeberli <>
Date: Sat, 22 Feb 2025 22:20:56 +0100
Subject: [PATCH 1/5] introducing option to consume characters when words are
 decompounded, in order to prevent matches on sub-words

---
 .../DictionaryCompoundWordTokenFilter.java    | 15 ++++++-
 ...tionaryCompoundWordTokenFilterFactory.java | 10 ++++-
 .../compound/TestCompoundWordTokenFilter.java | 44 ++++++++++++++++++-
 3 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
index c6278a80a1f3..919594ca91f6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
@@ -28,6 +28,8 @@
  */
 public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
 
+  private boolean consumeChars = false;
+
   /**
    * Creates a new {@link DictionaryCompoundWordTokenFilter}
    *
@@ -50,6 +52,9 @@ public DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet diction
    * @param minSubwordSize only subwords longer than this get to the output stream
    * @param maxSubwordSize only subwords shorter than this get to the output stream
    * @param onlyLongestMatch Add only the longest matching subword to the stream
+   * @param consumeChars Characters are consumes, if a matching word is found and not used for
+   *     further potential matches (e.g. if the word "schwein" is extracted, the sub-word "wein" is
+   *     not extracted anymore
    */
   public DictionaryCompoundWordTokenFilter(
       TokenStream input,
@@ -57,8 +62,11 @@ public DictionaryCompoundWordTokenFilter(
       int minWordSize,
       int minSubwordSize,
       int maxSubwordSize,
-      boolean onlyLongestMatch) {
+      boolean onlyLongestMatch,
+      boolean consumeChars) {
     super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+    this.consumeChars = consumeChars;
+
     if (dictionary == null) {
       throw new IllegalArgumentException("dictionary must not be null");
     }
@@ -87,6 +95,11 @@ protected void decompose() {
           }
         }
       }
+
+      if (longestMatchToken != null && consumeChars) {
+        i += longestMatchToken.txt.length() - 1;
+      }
+
       if (this.onlyLongestMatch && longestMatchToken != null) {
         tokens.add(longestMatchToken);
       }
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
index 69819736d692..c18d26c120cf 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
@@ -51,6 +51,7 @@ public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory
   private final int minSubwordSize;
   private final int maxSubwordSize;
   private final boolean onlyLongestMatch;
+  private final boolean consumeChars;
 
   /** Creates a new DictionaryCompoundWordTokenFilterFactory */
   public DictionaryCompoundWordTokenFilterFactory(Map<String, String> args) {
@@ -62,6 +63,7 @@ public DictionaryCompoundWordTokenFilterFactory(Map<String, String> args) {
     maxSubwordSize =
         getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
     onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
+    consumeChars = getBoolean(args, "consumeChars", false);
     if (!args.isEmpty()) {
       throw new IllegalArgumentException("Unknown parameters: " + args);
     }
@@ -84,6 +86,12 @@ public TokenStream create(TokenStream input) {
       return input;
     }
     return new DictionaryCompoundWordTokenFilter(
-        input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+        input,
+        dictionary,
+        minWordSize,
+        minSubwordSize,
+        maxSubwordSize,
+        onlyLongestMatch,
+        consumeChars);
   }
 }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
index 1e5ca1417c6d..0271df8fbb6b 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
@@ -251,7 +251,8 @@ public void testDumbCompoundWordsSELongestMatch() throws Exception {
             CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
-            true);
+            true,
+            false);
 
     assertTokenStreamContents(
         tf,
@@ -275,6 +276,7 @@ public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception {
             CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
+            false,
             false);
 
     assertTokenStreamContents(
@@ -297,6 +299,7 @@ public void testWordComponentWithLessThanMinimumLength() throws Exception {
             CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
+            false,
             false);
 
     // since "d" is shorter than the minimum subword size, it should not be added to the token
@@ -323,6 +326,7 @@ public void testReset() throws Exception {
             CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
+            false,
             false);
 
     CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
@@ -351,6 +355,7 @@ public void testRetainMockAttribute() throws Exception {
             CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
+            false,
             false);
     MockRetainAttribute retAtt = stream.addAttribute(MockRetainAttribute.class);
     stream.reset();
@@ -682,4 +687,41 @@ protected TokenStreamComponents createComponents(String fieldName) {
     checkOneTerm(b, "", "");
     b.close();
   }
+
+  public void testDecompoundingWithConsumingChars() throws Exception {
+
+    CharArraySet dict = makeDictionary("wein", "schwein", "fleisch");
+
+    Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+    String searchTerm = "schweinefleisch";
+    DictionaryCompoundWordTokenFilter tf =
+        getDictionaryCompoundWordTokenFilter(tokenizer, searchTerm, dict);
+
+    assertTokenStreamContents(tf, new String[] {searchTerm, "schwein", "fleisch"});
+  }
+
+  public void testDecompoundingWithConsumingChars2() throws Exception {
+    CharArraySet dict = makeDictionary("waffe", "affe", "kampf");
+
+    Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+    String searchTerm = "nahkampfwaffen";
+
+    DictionaryCompoundWordTokenFilter tf =
+        getDictionaryCompoundWordTokenFilter(tokenizer, searchTerm, dict);
+
+    assertTokenStreamContents(tf, new String[] {searchTerm, "kampf", "waffe"});
+  }
+
+  private DictionaryCompoundWordTokenFilter getDictionaryCompoundWordTokenFilter(
+      Tokenizer tokenizer, String searchTerm, CharArraySet dict) {
+    tokenizer.setReader(new StringReader(searchTerm));
+    return new DictionaryCompoundWordTokenFilter(
+        tokenizer,
+        dict,
+        CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
+        CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
+        CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
+        true,
+        true);
+  }
 }

From edaea41d5c606d76278fe3600b55fbd731b62d1e Mon Sep 17 00:00:00 2001
From: Renato Haeberli <>
Date: Sat, 22 Feb 2025 22:23:56 +0100
Subject: [PATCH 2/5] fixing typo

---
 .../analysis/compound/DictionaryCompoundWordTokenFilter.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
index 919594ca91f6..eaebe94c5f3c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
@@ -52,7 +52,7 @@ public DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet diction
    * @param minSubwordSize only subwords longer than this get to the output stream
    * @param maxSubwordSize only subwords shorter than this get to the output stream
    * @param onlyLongestMatch Add only the longest matching subword to the stream
-   * @param consumeChars Characters are consumes, if a matching word is found and not used for
+   * @param consumeChars Characters are consumed, if a matching word is found and not used for
    *     further potential matches (e.g. if the word "schwein" is extracted, the sub-word "wein" is
    *     not extracted anymore
    */

From 3af19241e7ead1fdd02d752271bea378e658ab58 Mon Sep 17 00:00:00 2001
From: Renato Haeberli <>
Date: Sat, 22 Feb 2025 22:30:07 +0100
Subject: [PATCH 3/5] fixing typo

---
 .../analysis/compound/DictionaryCompoundWordTokenFilter.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
index eaebe94c5f3c..ea25775bbf48 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
@@ -53,8 +53,8 @@ public DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet diction
    * @param maxSubwordSize only subwords shorter than this get to the output stream
    * @param onlyLongestMatch Add only the longest matching subword to the stream
    * @param consumeChars Characters are consumed, if a matching word is found and not used for
-   *     further potential matches (e.g. if the word "schwein" is extracted, the sub-word "wein" is
-   *     not extracted anymore
+   *     further potential matches anymore. E.g. if the word "schwein" is extracted, the sub-word "wein" is
+   *     not extracted anymore.
    */
   public DictionaryCompoundWordTokenFilter(
       TokenStream input,

From e292287b08cab2ab8dc1d2318dd4f2c1f4f7f381 Mon Sep 17 00:00:00 2001
From: Renato Haeberli <>
Date: Sat, 22 Feb 2025 22:48:38 +0100
Subject: [PATCH 4/5] fix format of java-doc

---
 .../analysis/compound/DictionaryCompoundWordTokenFilter.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
index ea25775bbf48..e1f2ce273f06 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
@@ -53,8 +53,8 @@ public DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet diction
    * @param maxSubwordSize only subwords shorter than this get to the output stream
    * @param onlyLongestMatch Add only the longest matching subword to the stream
    * @param consumeChars Characters are consumed, if a matching word is found and not used for
-   *     further potential matches anymore. E.g. if the word "schwein" is extracted, the sub-word "wein" is
-   *     not extracted anymore.
+   *     further potential matches anymore. E.g. if the word "schwein" is extracted, the sub-word
+   *     "wein" is not extracted anymore.
    */
   public DictionaryCompoundWordTokenFilter(
       TokenStream input,

From a4ab9ef440a5f887b05eeeeea28afd5140caf0a8 Mon Sep 17 00:00:00 2001
From: Renato Haeberli <>
Date: Mon, 24 Feb 2025 20:26:45 +0100
Subject: [PATCH 5/5] fix format of java-doc

---
 .../DictionaryCompoundWordTokenFilter.java    | 19 +++++++-----
 ...tionaryCompoundWordTokenFilterFactory.java |  6 ++--
 .../compound/TestCompoundWordTokenFilter.java | 31 +++++++++++++------
 3 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
index e1f2ce273f06..5a997e375d3b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
@@ -28,7 +28,7 @@
  */
 public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBase {
 
-  private boolean consumeChars = false;
+  private boolean reuseChars = true;
 
   /**
    * Creates a new {@link DictionaryCompoundWordTokenFilter}
@@ -52,9 +52,9 @@ public DictionaryCompoundWordTokenFilter(TokenStream input, CharArraySet diction
    * @param minSubwordSize only subwords longer than this get to the output stream
    * @param maxSubwordSize only subwords shorter than this get to the output stream
    * @param onlyLongestMatch Add only the longest matching subword to the stream
-   * @param consumeChars Characters are consumed, if a matching word is found and not used for
-   *     further potential matches anymore. E.g. if the word "schwein" is extracted, the sub-word
-   *     "wein" is not extracted anymore.
+   * @param reuseChars Characters are reused for multiple matching words, e.g. if a word contains
+   *     'schwein', the word 'schwein' and 'wein' will be extracted. If set to false, only the
+   *     longer word, 'schwein' in this case, will be extracted.
    */
   public DictionaryCompoundWordTokenFilter(
       TokenStream input,
@@ -63,13 +63,18 @@ public DictionaryCompoundWordTokenFilter(
       int minSubwordSize,
       int maxSubwordSize,
       boolean onlyLongestMatch,
-      boolean consumeChars) {
+      boolean reuseChars) {
     super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
-    this.consumeChars = consumeChars;
+    this.reuseChars = reuseChars;
 
     if (dictionary == null) {
       throw new IllegalArgumentException("dictionary must not be null");
     }
+
+    if (!reuseChars && !onlyLongestMatch) {
+      throw new IllegalArgumentException(
+          "reuseChars can only be set to false if onlyLongestMatch is set to true");
+    }
   }
 
   @Override
@@ -96,7 +101,7 @@ protected void decompose() {
         }
       }
 
-      if (longestMatchToken != null && consumeChars) {
+      if (longestMatchToken != null && !reuseChars) {
         i += longestMatchToken.txt.length() - 1;
       }
 
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
index c18d26c120cf..effe6e7379db 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
@@ -51,7 +51,7 @@ public class DictionaryCompoundWordTokenFilterFactory extends TokenFilterFactory
   private final int minSubwordSize;
   private final int maxSubwordSize;
   private final boolean onlyLongestMatch;
-  private final boolean consumeChars;
+  private final boolean reuseChars;
 
   /** Creates a new DictionaryCompoundWordTokenFilterFactory */
   public DictionaryCompoundWordTokenFilterFactory(Map<String, String> args) {
@@ -63,7 +63,7 @@ public DictionaryCompoundWordTokenFilterFactory(Map<String, String> args) {
     maxSubwordSize =
         getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
     onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
-    consumeChars = getBoolean(args, "consumeChars", false);
+    reuseChars = getBoolean(args, "reuseChars", true);
     if (!args.isEmpty()) {
       throw new IllegalArgumentException("Unknown parameters: " + args);
     }
@@ -92,6 +92,6 @@ public TokenStream create(TokenStream input) {
         minSubwordSize,
         maxSubwordSize,
         onlyLongestMatch,
-        consumeChars);
+        reuseChars);
   }
 }
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
index 0271df8fbb6b..7e5dda0c57a9 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
@@ -20,6 +20,7 @@
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.Arrays;
+import java.util.List;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
@@ -252,7 +253,7 @@ public void testDumbCompoundWordsSELongestMatch() throws Exception {
             CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
             true,
-            false);
+            true);
 
     assertTokenStreamContents(
         tf,
@@ -277,7 +278,7 @@ public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception {
             CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
             false,
-            false);
+            true);
 
     assertTokenStreamContents(
         tf,
@@ -300,7 +301,7 @@ public void testWordComponentWithLessThanMinimumLength() throws Exception {
             CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
             false,
-            false);
+            true);
 
     // since "d" is shorter than the minimum subword size, it should not be added to the token
     // stream
@@ -327,7 +328,7 @@ public void testReset() throws Exception {
             CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
             false,
-            false);
+            true);
 
     CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
     tf.reset();
@@ -356,7 +357,7 @@ public void testRetainMockAttribute() throws Exception {
             CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
             CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
             false,
-            false);
+            true);
     MockRetainAttribute retAtt = stream.addAttribute(MockRetainAttribute.class);
     stream.reset();
     while (stream.incrementToken()) {
@@ -695,7 +696,7 @@ public void testDecompoundingWithConsumingChars() throws Exception {
     Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
     String searchTerm = "schweinefleisch";
     DictionaryCompoundWordTokenFilter tf =
-        getDictionaryCompoundWordTokenFilter(tokenizer, searchTerm, dict);
+        getDictionaryCompoundWordTokenFilter(tokenizer, searchTerm, dict, true);
 
     assertTokenStreamContents(tf, new String[] {searchTerm, "schwein", "fleisch"});
   }
@@ -707,13 +708,23 @@ public void testDecompoundingWithConsumingChars2() throws Exception {
     String searchTerm = "nahkampfwaffen";
 
     DictionaryCompoundWordTokenFilter tf =
-        getDictionaryCompoundWordTokenFilter(tokenizer, searchTerm, dict);
+        getDictionaryCompoundWordTokenFilter(tokenizer, searchTerm, dict, true);
 
     assertTokenStreamContents(tf, new String[] {searchTerm, "kampf", "waffe"});
   }
 
+  public void testDecompoundingWithInvalidParameterCombination() {
+
+    Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+    expectThrows(
+        IllegalArgumentException.class,
+        () ->
+            getDictionaryCompoundWordTokenFilter(
+                tokenizer, "", new CharArraySet(List.of(), true), false));
+  }
+
   private DictionaryCompoundWordTokenFilter getDictionaryCompoundWordTokenFilter(
-      Tokenizer tokenizer, String searchTerm, CharArraySet dict) {
+      Tokenizer tokenizer, String searchTerm, CharArraySet dict, boolean onlyLongestMatch) {
     tokenizer.setReader(new StringReader(searchTerm));
     return new DictionaryCompoundWordTokenFilter(
         tokenizer,
@@ -721,7 +732,7 @@ private DictionaryCompoundWordTokenFilter getDictionaryCompoundWordTokenFilter(
         CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
         CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
         CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE,
-        true,
-        true);
+        onlyLongestMatch,
+        false);
   }
 }