org.codelibs.elasticsearch.synonym.analysis.NGramSynonymTokenizerTest.java Source code

Java tutorial

Introduction

Here is the source code for org.codelibs.elasticsearch.synonym.analysis.NGramSynonymTokenizerTest.java

Source

package org.codelibs.elasticsearch.synonym.analysis;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import static org.junit.Assert.*;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.text.ParseException;
import java.util.PriorityQueue;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.synonym.SolrSynonymParser;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.codelibs.elasticsearch.synonym.analysis.NGramSynonymTokenizer.MyToken;
import org.junit.Test;

public class NGramSynonymTokenizerTest {

    @Test
    public void testGetNextBlock() throws Exception {
        NGramSynonymTokenizer tokenizer = getTokenizer("?????");
        assertBlocks(tokenizer, "0", "?????");

        tokenizer = getTokenizer("????? ");
        assertBlocks(tokenizer, "0", "?????");

        tokenizer = getTokenizer("????? ???????");
        assertBlocks(tokenizer, "0,6", "?????", "???????");

        tokenizer = getTokenizer("????? \t???????");
        assertBlocks(tokenizer, "0,7", "?????", "???????");

        tokenizer = getTokenizer("????? \t??????? ??????");
        assertBlocks(tokenizer, "0,8,14", "?????", "???????", "??????");
    }

    @Test
    public void testGetNextBlockLong() throws Exception {
        String src1 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE - 2, 'a', ' ');
        NGramSynonymTokenizer tokenizer = getTokenizer(src1);
        assertBlocks(tokenizer, "0", src1.substring(0, NGramSynonymTokenizer.BUFFER_SIZE - 2));

        src1 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE - 1, 'a', ' ');
        tokenizer = getTokenizer(src1);
        assertBlocks(tokenizer, "0", src1.substring(0, NGramSynonymTokenizer.BUFFER_SIZE - 1));

        src1 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE, 'a', ' ');
        tokenizer = getTokenizer(src1);
        assertBlocks(tokenizer, "0", src1.substring(0, NGramSynonymTokenizer.BUFFER_SIZE));

        src1 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE - 2, 'a', ' ');
        String src2 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE - 2, 'a', ' ');
        tokenizer = getTokenizer(src1 + src2);
        assertBlocks(tokenizer, new int[] { 0, NGramSynonymTokenizer.BUFFER_SIZE - 1 },
                src1.substring(0, NGramSynonymTokenizer.BUFFER_SIZE - 2),
                src2.substring(0, NGramSynonymTokenizer.BUFFER_SIZE - 2));

        src1 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE - 1, 'a', ' ');
        src2 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE - 2, 'a', ' ');
        tokenizer = getTokenizer(src1 + src2);
        assertBlocks(tokenizer, new int[] { 0, NGramSynonymTokenizer.BUFFER_SIZE },
                src1.substring(0, NGramSynonymTokenizer.BUFFER_SIZE - 1),
                src2.substring(0, NGramSynonymTokenizer.BUFFER_SIZE - 2));

        src1 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE, 'a', ' ');
        src2 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE - 2, 'a', ' ');
        tokenizer = getTokenizer(src1 + src2);
        assertBlocks(tokenizer, new int[] { 0, NGramSynonymTokenizer.BUFFER_SIZE + 1 },
                src1.substring(0, NGramSynonymTokenizer.BUFFER_SIZE),
                src2.substring(0, NGramSynonymTokenizer.BUFFER_SIZE - 2));

        src1 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE + 1, 'a', ' ');
        src2 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE - 2, 'a', ' ');
        tokenizer = getTokenizer(src1 + src2);
        assertBlocks(tokenizer, new int[] { 0, NGramSynonymTokenizer.BUFFER_SIZE + 2 },
                src1.substring(0, NGramSynonymTokenizer.BUFFER_SIZE + 1),
                src2.substring(0, NGramSynonymTokenizer.BUFFER_SIZE - 2));

        src1 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE + 2, 'a', '\n', '\r');
        src2 = getLengthDummyBlock(NGramSynonymTokenizer.BUFFER_SIZE - 2, 'a', ' ');
        tokenizer = getTokenizer(src1 + src2);
        assertBlocks(tokenizer, new int[] { 0, NGramSynonymTokenizer.BUFFER_SIZE + 4 },
                src1.substring(0, NGramSynonymTokenizer.BUFFER_SIZE + 2),
                src2.substring(0, NGramSynonymTokenizer.BUFFER_SIZE - 2));
    }

    private NGramSynonymTokenizer getTokenizer(String input) throws IOException {
        NGramSynonymTokenizer tokenizer = new NGramSynonymTokenizer(new StringReader(input),
                NGramSynonymTokenizer.DEFAULT_N_SIZE, NGramSynonymTokenizer.DEFAULT_DELIMITERS, false, true, null);
        tokenizer.reset();
        return tokenizer;
    }

    private void assertBlocks(NGramSynonymTokenizer tokenizer, String expBlkStarts, String... expBlocks)
            throws Exception {
        String[] params = expBlkStarts.split(",");
        final int len = params.length;
        int[] exps = new int[len];
        for (int i = 0; i < len; i++) {
            exps[i] = Integer.parseInt(params[i]);
        }
        assertBlocks(tokenizer, exps, expBlocks);
    }

    private void assertBlocks(NGramSynonymTokenizer tokenizer, int[] expBlkStarts, String... expBlocks)
            throws Exception {
        final int len = expBlkStarts.length;
        assertEquals(len, expBlocks.length);

        for (int i = 0; i < len; i++) {
            assertTrue(tokenizer.getNextBlock());
            assertEquals(expBlkStarts[i], tokenizer.blkStart);
            assertEquals(expBlocks[i], tokenizer.block.toString());
        }

        assertFalse(tokenizer.getNextBlock());
    }

    private String getLengthDummyBlock(int length, char blockChar, char... eobChars) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < length; i++) {
            sb.append(blockChar);
        }
        for (char eobChar : eobChars) {
            sb.append(eobChar);
        }
        return sb.toString();
    }

    @Test
    public void testMyTokensComparator() throws Exception {
        PriorityQueue<MyToken> pq = new PriorityQueue<MyToken>(10, new NGramSynonymTokenizer.MyTokensComparator());

        MyToken t1 = new MyToken("", 10, 11, 1);
        pq.add(t1);
        MyToken t2 = new MyToken("", 9, 11, 0);
        pq.add(t2);
        MyToken t3 = new MyToken("", 9, 11, 1);
        pq.add(t3);
        MyToken t4 = new MyToken("", 8, 11, 1);
        pq.add(t4);
        MyToken t5 = new MyToken("", 7, 11, 1);
        pq.add(t5);
        MyToken t6 = new MyToken("", 7, 10, 1);
        pq.add(t6);

        assertEquals(t6, pq.poll());
        assertEquals(t5, pq.poll());
        assertEquals(t4, pq.poll());
        assertEquals(t3, pq.poll());
        assertEquals(t2, pq.poll());
        assertEquals(t1, pq.poll());
        assertNull(pq.poll());
    }

    @Test
    public void testMyTokenIdentical() throws Exception {
        MyToken t1 = new MyToken("token", 10, 11, 1);
        MyToken t2 = new MyToken("token", 10, 11, 1);
        assertFalse(t1.identical(t2));
        assertFalse(t2.identical(t2));
        assertFalse(t2.identical(t1));

        MyToken t3 = new MyToken("token", 10, 11, 0);
        assertTrue(t1.identical(t3));
        assertFalse(t3.identical(t1));

        MyToken t4 = new MyToken("token", 10, 11, 0);
        assertTrue(t1.identical(t4));
        assertTrue(t3.identical(t4));
        assertTrue(t4.identical(t3));
    }

    @Test
    public void testGetNextUniqueToken() throws Exception {
        PriorityQueue<MyToken> pq = new PriorityQueue<MyToken>(10, new NGramSynonymTokenizer.MyTokensComparator());

        MyToken t1 = new MyToken("t1", 10, 11, 1);
        pq.add(t1);
        MyToken t2 = new MyToken("t2", 9, 11, 0);
        pq.add(t2);
        MyToken t3 = new MyToken("t3", 9, 11, 1);
        pq.add(t3);
        MyToken t4 = new MyToken("t2", 9, 11, 0);
        pq.add(t4);
        MyToken t5 = new MyToken("t5", 8, 11, 1);
        pq.add(t5);
        MyToken t6 = new MyToken("t5", 8, 11, 0);
        pq.add(t6);
        MyToken t7 = new MyToken("t7", 7, 11, 1);
        pq.add(t7);
        MyToken t8 = new MyToken("t8", 7, 10, 1);
        pq.add(t8);

        assertEquals(t8, NGramSynonymTokenizer.getNextUniqueToken(pq, null));
        assertEquals(t7, NGramSynonymTokenizer.getNextUniqueToken(pq, t8));
        assertEquals(t5, NGramSynonymTokenizer.getNextUniqueToken(pq, t7));
        assertEquals(t3, NGramSynonymTokenizer.getNextUniqueToken(pq, t5));
        assertEquals(t2, NGramSynonymTokenizer.getNextUniqueToken(pq, t3));
        assertEquals(t1, NGramSynonymTokenizer.getNextUniqueToken(pq, t2));
        assertNull(NGramSynonymTokenizer.getNextUniqueToken(pq, t1));
    }

    @Test
    public void testNullSynonyms() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1);
        TokenStream stream = a.tokenStream("f", new StringReader(""));
        stream.reset();
        assertTokenStream(stream, ",0,1,1/,1,2,1/,2,3,1/,3,4,1/,4,5,1/,5,6,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2);
        stream = a.tokenStream("f", new StringReader(""));
        stream.reset();
        assertTokenStream(stream, ",0,2,1/,1,3,1/,2,4,1/,3,5,1/,4,6,1");
        stream.close();
        stream = a.tokenStream("f", new StringReader(""));
        stream.reset();
        assertTokenStream(stream, ",0,1,1");
        stream.close();
        stream = a.tokenStream("f", new StringReader(""));
        stream.reset();
        assertTokenStream(stream, ",0,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3);
        stream = a.tokenStream("f", new StringReader(""));
        stream.reset();
        assertTokenStream(stream, ",0,3,1/,1,4,1/,2,5,1/,3,6,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4);
        stream = a.tokenStream("f", new StringReader(""));
        stream.reset();
        assertTokenStream(stream, ",0,4,1/,1,5,1/,2,6,1");

        a = new NGramSynonymTokenizerTestAnalyzer(5);
        stream = a.tokenStream("f", new StringReader(""));
        stream.reset();
        assertTokenStream(stream, ",0,5,1/,1,6,1");

        a = new NGramSynonymTokenizerTestAnalyzer(6);
        stream = a.tokenStream("f", new StringReader(""));
        stream.reset();
        assertTokenStream(stream, ",0,6,1");

        a = new NGramSynonymTokenizerTestAnalyzer(7);
        stream = a.tokenStream("f", new StringReader(""));
        stream.reset();
        assertTokenStream(stream, ",0,6,1");

        a = new NGramSynonymTokenizerTestAnalyzer(8);
        stream = a.tokenStream("f", new StringReader(""));
        stream.reset();
        assertTokenStream(stream, ",0,6,1");
    }

    @Test
    public void testSingleSynonym() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa,aaa");
        TokenStream stream = a.tokenStream("f", new StringReader("a"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa,aaa");
        stream = a.tokenStream("f", new StringReader("aa"));
        stream.reset();
        assertTokenStream(stream, "aa,0,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa,aaa");
        stream = a.tokenStream("f", new StringReader("aaa"));
        stream.reset();
        assertTokenStream(stream, "aaa,0,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a");
        stream = a.tokenStream("f", new StringReader("a"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1");
    }

    @Test
    public void testSingleSynonymIgnoreCase() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, false, "A,AA,AAA");
        TokenStream stream = a.tokenStream("f", new StringReader("aaa"));
        stream.reset();
        assertTokenStream(stream, "aaa,0,3,1");
    }

    @Test
    public void testSingleSynonymExpand() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa,aaa");
        TokenStream stream = a.tokenStream("f", new StringReader("a"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/aaa,0,1,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa,aaa");
        stream = a.tokenStream("f", new StringReader("aa"));
        stream.reset();
        assertTokenStream(stream, "aa,0,2,1/a,0,2,0/aaa,0,2,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa,aaa");
        stream = a.tokenStream("f", new StringReader("aaa"));
        stream.reset();
        assertTokenStream(stream, "aaa,0,3,1/a,0,3,0/aa,0,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a");
        stream = a.tokenStream("f", new StringReader("a"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1");
    }

    @Test
    public void testMultipleSynonyms() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa/b,bb");
        TokenStream stream = a.tokenStream("f", new StringReader("ababb"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1/a,2,3,1/bb,3,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa/b,bb/c,cc");
        stream = a.tokenStream("f", new StringReader("cba"));
        stream.reset();
        assertTokenStream(stream, "c,0,1,1/b,1,2,1/a,2,3,1");
    }

    @Test
    public void testMultipleSynonymsExpand() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb");
        TokenStream stream = a.tokenStream("f", new StringReader("ababb"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bb,1,2,0/a,2,3,1/aa,2,3,0/bb,3,5,1/b,3,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb/c,cc");
        stream = a.tokenStream("f", new StringReader("cba"));
        stream.reset();
        assertTokenStream(stream, "c,0,1,1/cc,0,1,0/b,1,2,1/bb,1,2,0/a,2,3,1/aa,2,3,0");
    }

    @Test
    public void testPrevStrSingleSynonym1() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ba"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("bba"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/b,1,2,1/a,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcba"));
        stream.reset();
        assertTokenStream(stream, "d,0,1,1/c,1,2,1/b,2,3,1/a,3,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcba"));
        stream.reset();
        assertTokenStream(stream, "e,0,1,1/d,1,2,1/c,2,3,1/b,3,4,1/a,4,5,1");
    }

    @Test
    public void testPrevStrSingleSynonym2() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ba"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("bba"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/a,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcba"));
        stream.reset();
        assertTokenStream(stream, "dc,0,2,1/cb,1,3,1/a,3,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcba"));
        stream.reset();
        assertTokenStream(stream, "ed,0,2,1/dc,1,3,1/cb,2,4,1/a,4,5,1");
    }

    @Test
    public void testPrevStrSingleSynonym3() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ba"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("bba"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/a,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcba"));
        stream.reset();
        assertTokenStream(stream, "dcb,0,3,1/a,3,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcba"));
        stream.reset();
        assertTokenStream(stream, "edc,0,3,1/dcb,1,4,1/a,4,5,1");
    }

    @Test
    public void testPrevStrSingleSynonym4() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ba"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("bba"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/a,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcba"));
        stream.reset();
        assertTokenStream(stream, "dcb,0,3,1/a,3,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcba"));
        stream.reset();
        assertTokenStream(stream, "edcb,0,4,1/a,4,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("fedcba"));
        stream.reset();
        assertTokenStream(stream, "fedc,0,4,1/edcb,1,5,1/a,5,6,1");
    }

    @Test
    public void testPrevStrSingleSynonymExpand1() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ba"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/aa,1,2,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("bba"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/b,1,2,1/a,2,3,1/aa,2,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcba"));
        stream.reset();
        assertTokenStream(stream, "d,0,1,1/c,1,2,1/b,2,3,1/a,3,4,1/aa,3,4,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcba"));
        stream.reset();
        assertTokenStream(stream, "e,0,1,1/d,1,2,1/c,2,3,1/b,3,4,1/a,4,5,1/aa,4,5,0");
    }

    @Test
    public void testPrevStrSingleSynonymExpand2() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ba"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/aa,1,2,0");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("bba"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/b,1,2,0/a,2,3,1/aa,2,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcba"));
        stream.reset();
        assertTokenStream(stream, "dc,0,2,1/cb,1,3,1/b,2,3,0/a,3,4,1/aa,3,4,0");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcba"));
        stream.reset();
        assertTokenStream(stream, "ed,0,2,1/dc,1,3,1/cb,2,4,1/b,3,4,0/a,4,5,1/aa,4,5,0");
    }

    @Test
    public void testPrevStrSingleSynonymExpand3() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ba"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/aa,1,2,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("bba"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/b,1,2,0/a,2,3,1/aa,2,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcba"));
        stream.reset();
        assertTokenStream(stream, "dcb,0,3,1/cb,1,3,0/b,2,3,0/a,3,4,1/aa,3,4,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcba"));
        stream.reset();
        assertTokenStream(stream, "edc,0,3,1/dcb,1,4,1/cb,2,4,0/b,3,4,0/a,4,5,1/aa,4,5,0");
    }

    @Test
    public void testPrevStrSingleSynonymExpand4() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ba"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/aa,1,2,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("bba"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/b,1,2,0/a,2,3,1/aa,2,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcba"));
        stream.reset();
        assertTokenStream(stream, "dcb,0,3,1/cb,1,3,0/b,2,3,0/a,3,4,1/aa,3,4,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcba"));
        stream.reset();
        assertTokenStream(stream, "edcb,0,4,1/dcb,1,4,0/cb,2,4,0/b,3,4,0/a,4,5,1/aa,4,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("fedcba"));
        stream.reset();
        assertTokenStream(stream, "fedc,0,4,1/edcb,1,5,1/dcb,2,5,0/cb,3,5,0/b,4,5,0/a,5,6,1/aa,5,6,0");
    }

    @Test
    public void testAfterStrSingleSynonym1() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ab"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abb"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1/b,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcd"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1/c,2,3,1/d,3,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcde"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1/c,2,3,1/d,3,4,1/e,4,5,1");
    }

    @Test
    public void testAfterStrSingleSynonym2() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ab"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abb"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bb,1,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcd"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bc,1,3,1/cd,2,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcde"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bc,1,3,1/cd,2,4,1/de,3,5,1");
    }

    @Test
    public void testAfterStrSingleSynonym3() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ab"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abb"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bb,1,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcd"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bcd,1,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcde"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bcd,1,4,1/cde,2,5,1");
    }

    @Test
    public void testAfterStrSingleSynonym4() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ab"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abb"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bb,1,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcd"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bcd,1,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcde"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bcde,1,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdef"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bcde,1,5,1/cdef,2,6,1");
    }

    @Test
    public void testAfterStrSingleSynonymExpand1() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ab"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abb"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/b,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcd"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/c,2,3,1/d,3,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcde"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/c,2,3,1/d,3,4,1/e,4,5,1");
    }

    @Test
    public void testAfterStrSingleSynonymExpand2() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ab"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abb"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bb,1,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcd"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/cd,2,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcde"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/cd,2,4,1/de,3,5,1");
    }

    @Test
    public void testAfterStrSingleSynonymExpand3() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ab"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abb"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bb,1,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcd"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/bcd,1,4,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcde"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/bcd,1,4,0/cde,2,5,1");
    }

    @Test
    public void testAfterStrSingleSynonymExpand4() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("ab"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abb"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bb,1,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcd"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/bcd,1,4,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcde"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/bcd,1,4,0/bcde,1,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdef"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/bcd,1,4,0/bcde,1,5,0/cdef,2,6,1");
    }

    @Test
    public void testSandwichStr1() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("aba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1/a,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1/b,2,3,1/a,3,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcda"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1/c,2,3,1/d,3,4,1/a,4,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdea"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1/c,2,3,1/d,3,4,1/e,4,5,1/a,5,6,1");
    }

    @Test
    public void testSandwichStr2() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("aba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1/a,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bb,1,3,1/a,3,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcda"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bc,1,3,1/cd,2,4,1/a,4,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdea"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bc,1,3,1/cd,2,4,1/de,3,5,1/a,5,6,1");
    }

    @Test
    public void testSandwichStr3() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("aba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1/a,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bb,1,3,1/a,3,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcda"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bcd,1,4,1/a,4,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdea"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bcd,1,4,1/cde,2,5,1/a,5,6,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdefa"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bcd,1,4,1/cde,2,5,1/def,3,6,1/a,6,7,1");
    }

    @Test
    public void testSandwichStr4() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("aba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/b,1,2,1/a,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bb,1,3,1/a,3,4,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcda"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bcd,1,4,1/a,4,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdea"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bcde,1,5,1/a,5,6,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdefa"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/bcde,1,5,1/cdef,2,6,1/a,6,7,1");
    }

    @Test
    public void testSandwichStrExpand1() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("aba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/a,2,3,1/aa,2,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/b,2,3,1/a,3,4,1/aa,3,4,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcda"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/c,2,3,1/d,3,4,1/a,4,5,1/aa,4,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdea"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/c,2,3,1/d,3,4,1/e,4,5,1/a,5,6,1/aa,5,6,0");
    }

    @Test
    public void testSandwichStrExpand2() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("aba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/a,2,3,1/aa,2,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bb,1,3,0/b,2,3,0/a,3,4,1/aa,3,4,0");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcda"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/cd,2,4,1/d,3,4,0/a,4,5,1/aa,4,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdea"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/cd,2,4,1/de,3,5,1/e,4,5,0/a,5,6,1/aa,5,6,0");
    }

    @Test
    public void testSandwichStrExpand3() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("aba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/a,2,3,1/aa,2,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bb,1,3,0/b,2,3,0/a,3,4,1/aa,3,4,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcda"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/bcd,1,4,0/cd,2,4,0/d,3,4,0/a,4,5,1/aa,4,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdea"));
        stream.reset();
        assertTokenStream(stream,
                "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/bcd,1,4,0/cde,2,5,1/de,3,5,0/e,4,5,0/a,5,6,1/aa,5,6,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdefa"));
        stream.reset();
        assertTokenStream(stream,
                "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/bcd,1,4,0/cde,2,5,1/def,3,6,1/ef,4,6,0/f,5,6,0/a,6,7,1/aa,6,7,0");
    }

    @Test
    public void testSandwichStrExpand4() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("aba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/a,2,3,1/aa,2,3,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abba"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bb,1,3,0/b,2,3,0/a,3,4,1/aa,3,4,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcda"));
        stream.reset();
        assertTokenStream(stream, "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/bcd,1,4,0/cd,2,4,0/d,3,4,0/a,4,5,1/aa,4,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdea"));
        stream.reset();
        assertTokenStream(stream,
                "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/bcd,1,4,0/bcde,1,5,0/cde,2,5,0/de,3,5,0/e,4,5,0/a,5,6,1/aa,5,6,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("abcdefa"));
        stream.reset();
        assertTokenStream(stream,
                "a,0,1,1/aa,0,1,0/b,1,2,1/bc,1,3,0/bcd,1,4,0/bcde,1,5,0/cdef,2,6,1/def,3,6,0/ef,4,6,0/f,5,6,0/a,6,7,1/aa,6,7,0");
    }

    @Test
    public void testSandwichSynonym1() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("bab"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/b,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("bbabb"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/b,1,2,1/a,2,3,1/b,3,4,1/b,4,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcbabcd"));
        stream.reset();
        assertTokenStream(stream, "d,0,1,1/c,1,2,1/b,2,3,1/a,3,4,1/b,4,5,1/c,5,6,1/d,6,7,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcbabcde"));
        stream.reset();
        assertTokenStream(stream, "e,0,1,1/d,1,2,1/c,2,3,1/b,3,4,1/a,4,5,1/b,5,6,1/c,6,7,1/d,7,8,1/e,8,9,1");
    }

    @Test
    public void testSandwichSynonym2() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("bab"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/b,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("bbabb"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/a,2,3,1/bb,3,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcbabcd"));
        stream.reset();
        assertTokenStream(stream, "dc,0,2,1/cb,1,3,1/a,3,4,1/bc,4,6,1/cd,5,7,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcbabcde"));
        stream.reset();
        assertTokenStream(stream, "ed,0,2,1/dc,1,3,1/cb,2,4,1/a,4,5,1/bc,5,7,1/cd,6,8,1/de,7,9,1");
    }

    @Test
    public void testSandwichSynonym3() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("bab"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/b,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("bbabb"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/a,2,3,1/bb,3,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcbabcd"));
        stream.reset();
        assertTokenStream(stream, "dcb,0,3,1/a,3,4,1/bcd,4,7,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcbabcde"));
        stream.reset();
        assertTokenStream(stream, "edc,0,3,1/dcb,1,4,1/a,4,5,1/bcd,5,8,1/cde,6,9,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("fedcbabcdef"));
        stream.reset();
        assertTokenStream(stream, "fed,0,3,1/edc,1,4,1/dcb,2,5,1/a,5,6,1/bcd,6,9,1/cde,7,10,1/def,8,11,1");
    }

    @Test
    public void testSandwichSynonym4() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("bab"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/b,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("bbabb"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/a,2,3,1/bb,3,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcbabcd"));
        stream.reset();
        assertTokenStream(stream, "dcb,0,3,1/a,3,4,1/bcd,4,7,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcbabcde"));
        stream.reset();
        assertTokenStream(stream, "edcb,0,4,1/a,4,5,1/bcde,5,9,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa");
        stream = a.tokenStream("f", new StringReader("fedcbabcdef"));
        stream.reset();
        assertTokenStream(stream, "fedc,0,4,1/edcb,1,5,1/a,5,6,1/bcde,6,10,1/cdef,7,11,1");
    }

    @Test
    public void testSandwichSynonymExpand1() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("bab"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("bbabb"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/b,1,2,1/a,2,3,1/aa,2,3,0/b,3,4,1/b,4,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcbabcd"));
        stream.reset();
        assertTokenStream(stream, "d,0,1,1/c,1,2,1/b,2,3,1/a,3,4,1/aa,3,4,0/b,4,5,1/c,5,6,1/d,6,7,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcbabcde"));
        stream.reset();
        assertTokenStream(stream,
                "e,0,1,1/d,1,2,1/c,2,3,1/b,3,4,1/a,4,5,1/aa,4,5,0/b,5,6,1/c,6,7,1/d,7,8,1/e,8,9,1");
    }

    @Test
    public void testSandwichSynonymExpand2() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("bab"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("bbabb"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/b,1,2,0/a,2,3,1/aa,2,3,0/b,3,4,1/bb,3,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcbabcd"));
        stream.reset();
        assertTokenStream(stream, "dc,0,2,1/cb,1,3,1/b,2,3,0/a,3,4,1/aa,3,4,0/b,4,5,1/bc,4,6,0/cd,5,7,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcbabcde"));
        stream.reset();
        assertTokenStream(stream,
                "ed,0,2,1/dc,1,3,1/cb,2,4,1/b,3,4,0/a,4,5,1/aa,4,5,0/b,5,6,1/bc,5,7,0/cd,6,8,1/de,7,9,1");
    }

    @Test
    public void testSandwichSynonymExpand3() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("bab"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("bbabb"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/b,1,2,0/a,2,3,1/aa,2,3,0/b,3,4,1/bb,3,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcbabcd"));
        stream.reset();
        assertTokenStream(stream, "dcb,0,3,1/cb,1,3,0/b,2,3,0/a,3,4,1/aa,3,4,0/b,4,5,1/bc,4,6,0/bcd,4,7,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcbabcde"));
        stream.reset();
        assertTokenStream(stream,
                "edc,0,3,1/dcb,1,4,1/cb,2,4,0/b,3,4,0/a,4,5,1/aa,4,5,0/b,5,6,1/bc,5,7,0/bcd,5,8,0/cde,6,9,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("fedcbabcdef"));
        stream.reset();
        assertTokenStream(stream,
                "fed,0,3,1/edc,1,4,1/dcb,2,5,1/cb,3,5,0/b,4,5,0/a,5,6,1/aa,5,6,0/b,6,7,1/bc,6,8,0/bcd,6,9,0/cde,7,10,1/def,8,11,1");
    }

    @Test
    public void testSandwichSynonymExpand4() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        TokenStream stream = a.tokenStream("f", new StringReader("bab"));
        stream.reset();
        assertTokenStream(stream, "b,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("bbabb"));
        stream.reset();
        assertTokenStream(stream, "bb,0,2,1/b,1,2,0/a,2,3,1/aa,2,3,0/b,3,4,1/bb,3,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("dcbabcd"));
        stream.reset();
        assertTokenStream(stream, "dcb,0,3,1/cb,1,3,0/b,2,3,0/a,3,4,1/aa,3,4,0/b,4,5,1/bc,4,6,0/bcd,4,7,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("edcbabcde"));
        stream.reset();
        assertTokenStream(stream,
                "edcb,0,4,1/dcb,1,4,0/cb,2,4,0/b,3,4,0/a,4,5,1/aa,4,5,0/b,5,6,1/bc,5,7,0/bcd,5,8,0/bcde,5,9,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa");
        stream = a.tokenStream("f", new StringReader("fedcbabcdef"));
        stream.reset();
        assertTokenStream(stream,
                "fedc,0,4,1/edcb,1,5,1/dcb,2,5,0/cb,3,5,0/b,4,5,0/a,5,6,1/aa,5,6,0/b,6,7,1/bc,6,8,0/bcd,6,9,0/bcde,6,10,0/cdef,7,11,1");
    }

    @Test
    public void testComplex1() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa/b,bb");
        TokenStream stream = a.tokenStream("f", new StringReader("cabca"));
        stream.reset();
        assertTokenStream(stream, "c,0,1,1/a,1,2,1/b,2,3,1/c,3,4,1/a,4,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("ccabcca"));
        stream.reset();
        assertTokenStream(stream, "c,0,1,1/c,1,2,1/a,2,3,1/b,3,4,1/c,4,5,1/c,5,6,1/a,6,7,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("edcabcdea"));
        stream.reset();
        assertTokenStream(stream, "e,0,1,1/d,1,2,1/c,2,3,1/a,3,4,1/b,4,5,1/c,5,6,1/d,6,7,1/e,7,8,1/a,8,9,1");

        a = new NGramSynonymTokenizerTestAnalyzer(1, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("fedcabcdefa"));
        stream.reset();
        assertTokenStream(stream,
                "f,0,1,1/e,1,2,1/d,2,3,1/c,3,4,1/a,4,5,1/b,5,6,1/c,6,7,1/d,7,8,1/e,8,9,1/f,9,10,1/a,10,11,1");
    }

    @Test
    public void testComplex2() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa/b,bb");
        TokenStream stream = a.tokenStream("f", new StringReader("cabca"));
        stream.reset();
        assertTokenStream(stream, "c,0,1,1/a,1,2,1/b,2,3,1/c,3,4,1/a,4,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("ccabcca"));
        stream.reset();
        assertTokenStream(stream, "cc,0,2,1/a,2,3,1/b,3,4,1/cc,4,6,1/a,6,7,1");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("edcabcdea"));
        stream.reset();
        assertTokenStream(stream, "ed,0,2,1/dc,1,3,1/a,3,4,1/b,4,5,1/cd,5,7,1/de,6,8,1/a");

        a = new NGramSynonymTokenizerTestAnalyzer(2, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("fedcabcdefa"));
        stream.reset();
        assertTokenStream(stream,
                "fe,0,2,1/ed,1,3,1/dc,2,4,1/a,4,5,1/b,5,6,1/cd,6,8,1/de,7,9,1/ef,8,10,1/a,10,11,1");
    }

    @Test
    public void testComplex3() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa/b,bb");
        TokenStream stream = a.tokenStream("f", new StringReader("cabca"));
        stream.reset();
        assertTokenStream(stream, "c,0,1,1/a,1,2,1/b,2,3,1/c,3,4,1/a,4,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("ccabcca"));
        stream.reset();
        assertTokenStream(stream, "cc,0,2,1/a,2,3,1/b,3,4,1/cc,4,6,1/a,6,7,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("edcabcdea"));
        stream.reset();
        assertTokenStream(stream, "edc,0,3,1/a,3,4,1/b,4,5,1/cde,5,8,1/a,8,9,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("fedcabcdefa"));
        stream.reset();
        assertTokenStream(stream, "fed,0,3,1/edc,1,4,1/a,4,5,1/b,5,6,1/cde,6,9,1/def,7,10,1/a,10,11,1");

        a = new NGramSynonymTokenizerTestAnalyzer(3, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("gfedcabcdefga"));
        stream.reset();
        assertTokenStream(stream,
                "gfe,0,3,1/fed,1,4,1/edc,2,5,1/a,5,6,1/b,6,7,1/cde,7,10,1/def,8,11,1/efg,9,12,1/a,12,13,1");
    }

    @Test
    public void testComplex4() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa/b,bb");
        TokenStream stream = a.tokenStream("f", new StringReader("cabca"));
        stream.reset();
        assertTokenStream(stream, "c,0,1,1/a,1,2,1/b,2,3,1/c,3,4,1/a,4,5,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("ccabcca"));
        stream.reset();
        assertTokenStream(stream, "cc,0,2,1/a,2,3,1/b,3,4,1/cc,4,6,1/a,6,7,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("edcabcdea"));
        stream.reset();
        assertTokenStream(stream, "edc,0,3,1/a,3,4,1/b,4,5,1/cde,5,8,1/a,8,9,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("fedcabcdefa"));
        stream.reset();
        assertTokenStream(stream, "fedc,0,4,1/a,4,5,1/b,5,6,1/cdef,6,10,1/a,10,11,1");

        a = new NGramSynonymTokenizerTestAnalyzer(4, false, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("gfedcabcdefga"));
        stream.reset();
        assertTokenStream(stream, "gfed,0,4,1/fedc,1,5,1/a,5,6,1/b,6,7,1/cdef,7,11,1/defg,8,12,1/a,12,13,1");
    }

    @Test
    public void testComplexExpand1() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb");
        TokenStream stream = a.tokenStream("f", new StringReader("cabca"));
        stream.reset();
        assertTokenStream(stream, "c,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1/bb,2,3,0/c,3,4,1/a,4,5,1/aa,4,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("ccabcca"));
        stream.reset();
        assertTokenStream(stream,
                "c,0,1,1/c,1,2,1/a,2,3,1/aa,2,3,0/b,3,4,1/bb,3,4,0/c,4,5,1/c,5,6,1/a,6,7,1/aa,6,7,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("edcabcdea"));
        stream.reset();
        assertTokenStream(stream,
                "e,0,1,1/d,1,2,1/c,2,3,1/a,3,4,1/aa,3,4,0/b,4,5,1/bb,4,5,0/c,5,6,1/d,6,7,1/e,7,8,1/a,8,9,1/aa,8,9,0");

        a = new NGramSynonymTokenizerTestAnalyzer(1, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("fedcabcdefa"));
        stream.reset();
        assertTokenStream(stream,
                "f,0,1,1/e,1,2,1/d,2,3,1/c,3,4,1/a,4,5,1/aa,4,5,0/b,5,6,1/bb,5,6,0/c,6,7,1/d,7,8,1/e,8,9,1/f,9,10,1/a,10,11,1/aa,10,11,0");
    }

    @Test
    public void testComplexExpand2() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa/b,bb");
        TokenStream stream = a.tokenStream("f", new StringReader("cabca"));
        stream.reset();
        assertTokenStream(stream, "c,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1/bb,2,3,0/c,3,4,1/a,4,5,1/aa,4,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("ccabcca"));
        stream.reset();
        assertTokenStream(stream,
                "cc,0,2,1/c,1,2,0/a,2,3,1/aa,2,3,0/b,3,4,1/bb,3,4,0/c,4,5,1/cc,4,6,0/c,5,6,0/a,6,7,1/aa,6,7,0");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("edcabcdea"));
        stream.reset();
        assertTokenStream(stream,
                "ed,0,2,1/dc,1,3,1/c,2,3,0/a,3,4,1/aa,3,4,0/b,4,5,1/bb,4,5,0/c,5,6,1/cd,5,7,0/de,6,8,1/e,7,8,0/a,8,9,1/aa,8,9,0");

        a = new NGramSynonymTokenizerTestAnalyzer(2, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("fedcabcdefa"));
        stream.reset();
        assertTokenStream(stream,
                "fe,0,2,1/ed,1,3,1/dc,2,4,1/c,3,4,0/a,4,5,1/aa,4,5,0/b,5,6,1/bb,5,6,0/c,6,7,1/cd,6,8,0/de,7,9,1/ef,8,10,1/f,9,10,0/a,10,11,1/aa,10,11,0");
    }

    @Test
    public void testComplexExpand3() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa/b,bb");
        TokenStream stream = a.tokenStream("f", new StringReader("cabca"));
        stream.reset();
        assertTokenStream(stream, "c,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1/bb,2,3,0/c,3,4,1/a,4,5,1/aa,4,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("ccabcca"));
        stream.reset();
        assertTokenStream(stream,
                "cc,0,2,1/c,1,2,0/a,2,3,1/aa,2,3,0/b,3,4,1/bb,3,4,0/c,4,5,1/cc,4,6,0/c,5,6,0/a,6,7,1/aa,6,7,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("edcabcdea"));
        stream.reset();
        assertTokenStream(stream,
                "edc,0,3,1/dc,1,3,0/c,2,3,0/a,3,4,1/aa,3,4,0/b,4,5,1/bb,4,5,0/c,5,6,1/cd,5,7,0/cde,5,8,0/de,6,8,0/e,7,8,0/a,8,9,1/aa,8,9,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("fedcabcdefa"));
        stream.reset();
        assertTokenStream(stream,
                "fed,0,3,1/edc,1,4,1/dc,2,4,0/c,3,4,0/a,4,5,1/aa,4,5,0/b,5,6,1/bb,5,6,0/c,6,7,1/cd,6,8,0/cde,6,9,0/def,7,10,1/ef,8,10,0/f,9,10,0/a,10,11,1/aa,10,11,0");

        a = new NGramSynonymTokenizerTestAnalyzer(3, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("gfedcabcdefga"));
        stream.reset();
        assertTokenStream(stream,
                "gfe,0,3,1/fed,1,4,1/edc,2,5,1/dc,3,5,0/c,4,5,0/a,5,6,1/aa,5,6,0/b,6,7,1/bb,6,7,0/c,7,8,1/cd,7,9,0/cde,7,10,0/def,8,11,1/efg,9,12,1/fg,10,12,0/g,11,12,0/a,12,13,1/aa,12,13,0");
    }

    @Test
    public void testComplexExpand4() throws Exception {
        Analyzer a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa/b,bb");
        TokenStream stream = a.tokenStream("f", new StringReader("cabca"));
        stream.reset();
        assertTokenStream(stream, "c,0,1,1/a,1,2,1/aa,1,2,0/b,2,3,1/bb,2,3,0/c,3,4,1/a,4,5,1/aa,4,5,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("ccabcca"));
        stream.reset();
        assertTokenStream(stream,
                "cc,0,2,1/c,1,2,0/a,2,3,1/aa,2,3,0/b,3,4,1/bb,3,4,0/c,4,5,1/cc,4,6,0/c,5,6,0/a,6,7,1/aa,6,7,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("edcabcdea"));
        stream.reset();
        assertTokenStream(stream,
                "edc,0,3,1/dc,1,3,0/c,2,3,0/a,3,4,1/aa,3,4,0/b,4,5,1/bb,4,5,0/c,5,6,1/cd,5,7,0/cde,5,8,0/de,6,8,0/e,7,8,0/a,8,9,1/aa,8,9,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("fedcabcdefa"));
        stream.reset();
        assertTokenStream(stream,
                "fedc,0,4,1/edc,1,4,0/dc,2,4,0/c,3,4,0/a,4,5,1/aa,4,5,0/b,5,6,1/bb,5,6,0/c,6,7,1/cd,6,8,0/cde,6,9,0/cdef,6,10,0/def,7,10,0/ef,8,10,0/f,9,10,0/a,10,11,1/aa,10,11,0");

        a = new NGramSynonymTokenizerTestAnalyzer(4, true, "a,aa/b,bb");
        stream = a.tokenStream("f", new StringReader("gfedcabcdefga"));
        stream.reset();
        assertTokenStream(stream,
                "gfed,0,4,1/fedc,1,5,1/edc,2,5,0/dc,3,5,0/c,4,5,0/a,5,6,1/aa,5,6,0/b,6,7,1/bb,6,7,0/c,7,8,1/cd,7,9,0/cde,7,10,0/cdef,7,11,0/defg,8,12,1/efg,9,12,0/fg,10,12,0/g,11,12,0/a,12,13,1/aa,12,13,0");
    }

    private void assertTokenStream(TokenStream stream, String expectedStream) throws Exception {

        String[] expectedTokens = expectedStream.split("/");
        int count = 0;
        for (String expectedToken : expectedTokens) {
            String[] attrs = expectedToken.split(",");
            assertTrue(stream.incrementToken());

            String term = attrs[0];
            assertAttribute(count, "term", term, stream.getAttribute(CharTermAttribute.class).toString());

            if (attrs.length > 1) {
                int so = Integer.parseInt(attrs[1]);
                assertAttribute(count, "startOffset", so, stream.getAttribute(OffsetAttribute.class).startOffset());

                if (attrs.length > 2) {
                    int eo = Integer.parseInt(attrs[2]);
                    assertAttribute(count, "endOffset", eo, stream.getAttribute(OffsetAttribute.class).endOffset());

                    if (attrs.length > 3) {
                        int pi = Integer.parseInt(attrs[3]);
                        assertAttribute(count, "posInc", pi,
                                stream.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
                    }
                }
            }
            count++;
        }
        assertFalse(stream.incrementToken());
    }

    private void assertAttribute(int count, String type, String expected, String actual) throws Exception {
        if (expected.equals("[null]"))
            assertNull(String.format("%s is invalid at token %d, expected : \"%s\" != actual : \"%s\"", type, count,
                    expected, actual), actual);
        else
            assertEquals(String.format("%s is invalid at token %d, expected : \"%s\" != actual : \"%s\"", type,
                    count, expected, actual), expected, actual);
    }

    private void assertAttribute(int count, String type, int expected, int actual) throws Exception {
        assertEquals(String.format("%s is invalid at token %d, expected : \"%d\" != actual : \"%d\"", type, count,
                expected, actual), expected, actual);
    }

    public static final class NGramSynonymTokenizerTestAnalyzer extends Analyzer {

        final int n;
        final String delimiters;
        final boolean expand;
        final SynonymMap synonyms;

        public NGramSynonymTokenizerTestAnalyzer(int n) {
            this(n, NGramSynonymTokenizer.DEFAULT_DELIMITERS, false);
        }

        public NGramSynonymTokenizerTestAnalyzer(int n, boolean expand) {
            this(n, NGramSynonymTokenizer.DEFAULT_DELIMITERS, expand);
        }

        public NGramSynonymTokenizerTestAnalyzer(int n, String delimiters, boolean expand) {
            this(n, delimiters, expand, (String) null);
        }

        public NGramSynonymTokenizerTestAnalyzer(int n, boolean expand, String synonyms) {
            this(n, NGramSynonymTokenizer.DEFAULT_DELIMITERS, expand, synonyms);
        }

        public NGramSynonymTokenizerTestAnalyzer(int n, String delimiters, boolean expand, String synonyms) {
            this.n = n;
            this.delimiters = delimiters;
            this.expand = expand;
            this.synonyms = getSynonymMap(synonyms);
        }

        public NGramSynonymTokenizerTestAnalyzer(int n, String delimiters, boolean expand, SynonymMap synonyms) {
            this.n = n;
            this.delimiters = delimiters;
            this.expand = expand;
            this.synonyms = synonyms;
        }

        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            final Tokenizer source = new NGramSynonymTokenizer(reader, n, delimiters, expand, true,
                    new SynonymLoader(null, null, expand, null) {
                        @Override
                        public SynonymMap getSynonymMap() {
                            return synonyms;
                        }

                        @Override
                        protected void createSynonymMap(boolean reload) {
                            // nothing
                        }
                    });
            return new TokenStreamComponents(source);
        }

        private SynonymMap getSynonymMap(String synonyms) {
            if (synonyms != null) {
                SolrSynonymParser parser = new SolrSynonymParser(true, true, SynonymLoader.getAnalyzer(true));
                try {
                    parser.parse(new StringReader(synonyms.replace('/', '\n')));
                    return parser.build();
                } catch (IOException e) {
                    throw new RuntimeException();
                } catch (ParseException e) {
                    throw new RuntimeException();
                }
            } else
                return null;
        }
    }
}