org.tallison.lucene.search.concordance.TestConcordanceSearcher.java Source code

Introduction

Here is the source code for org.tallison.lucene.search.concordance.TestConcordanceSearcher.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.tallison.lucene.search.concordance;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.tallison.lucene.search.concordance.classic.AbstractConcordanceWindowCollector;
import org.tallison.lucene.search.concordance.classic.ConcordanceSearcher;
import org.tallison.lucene.search.concordance.classic.ConcordanceSortOrder;
import org.tallison.lucene.search.concordance.classic.ConcordanceWindow;
import org.tallison.lucene.search.concordance.classic.DocIdBuilder;
import org.tallison.lucene.search.concordance.classic.DocMetadataExtractor;
import org.tallison.lucene.search.concordance.classic.WindowBuilder;
import org.tallison.lucene.search.concordance.classic.impl.ConcordanceWindowCollector;
import org.tallison.lucene.search.concordance.classic.impl.DedupingConcordanceWindowCollector;
import org.tallison.lucene.search.concordance.classic.impl.DefaultSortKeyBuilder;
import org.tallison.lucene.search.concordance.classic.impl.IndexIdDocIdBuilder;

public class TestConcordanceSearcher extends ConcordanceTestBase {

    private final static DocMetadataExtractor metadataExtractor = new DocMetadataExtractor() {
        private final Set<String> fields = new HashSet<>();
        private final Map<String, String> data = new HashMap<>();

        @Override
        public Set<String> getFieldSelector() {
            return fields;
        }

        @Override
        public Map<String, String> extract(Document d) {
            return data;
        }
    };

    private final static DocIdBuilder docIdBuilder = new IndexIdDocIdBuilder();

    @BeforeClass
    public static void beforeClass() throws Exception {
        // NOOP for now
    }

    @AfterClass
    public static void afterClass() throws Exception {
        // NOOP for now
    }

    @Test
    public void testSimple() throws Exception {
        String[] docs = new String[] { "a b c a b c", "c b a c b a" };
        Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);

        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);

        WindowBuilder wb = new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD),
                new DefaultSortKeyBuilder(ConcordanceSortOrder.PRE), metadataExtractor, docIdBuilder);
        ConcordanceSearcher searcher = new ConcordanceSearcher(wb);
        SpanQuery q = new SpanTermQuery(new Term(FIELD, "a"));

        ConcordanceWindowCollector collector = new ConcordanceWindowCollector(3);
        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);

        assertEquals(3, collector.size());

        collector = new ConcordanceWindowCollector(ConcordanceWindowCollector.COLLECT_ALL);
        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);

        // test result size
        assertEquals(4, collector.size());

        // test result with sort order = pre
        List<ConcordanceWindow> windows = collector.getSortedWindows();
        String[] pres = new String[] { "", "c b", "c b a c b", "a b c" };
        String[] posts = new String[] { " b c a b c", " c b a", "", " b c" };

        for (int i = 0; i < windows.size(); i++) {
            ConcordanceWindow w = windows.get(i);

            assertEquals(pres[i], w.getPre());
            assertEquals(posts[i], w.getPost());
        }

        // test sort order post
        // sort key is built at search time, so must re-search
        wb = new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD),
                new DefaultSortKeyBuilder(ConcordanceSortOrder.POST), metadataExtractor, docIdBuilder);
        searcher = new ConcordanceSearcher(wb);

        collector = new ConcordanceWindowCollector(ConcordanceWindowCollector.COLLECT_ALL);
        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);

        windows = collector.getSortedWindows();

        posts = new String[] { "", " b c", " b c a b c", " c b a", };
        for (int i = 0; i < windows.size(); i++) {
            ConcordanceWindow w = windows.get(i);
            assertEquals(posts[i], w.getPost());
        }
        reader.close();
        directory.close();
    }

    @Test
    public void testSimpleMultiValuedField() throws Exception {
        String[] doc = new String[] { "a b c a b c", "c b a c b a" };
        List<String[]> docs = new ArrayList<>();
        docs.add(doc);
        Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        ConcordanceSearcher searcher = new ConcordanceSearcher(
                new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD)));
        SpanQuery q = new SpanTermQuery(new Term(FIELD, "a"));

        ConcordanceWindowCollector collector = new ConcordanceWindowCollector(100);

        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);

        // test result size
        assertEquals(4, collector.size());

        // test result with sort order = pre
        List<ConcordanceWindow> windows = collector.getSortedWindows();
        String[] pres = new String[] { "", "c b", "c b a c b", "a b c" };
        String[] posts = new String[] { " b c a b c", " c b a", "", " b c" };

        for (int i = 0; i < pres.length; i++) {
            ConcordanceWindow w = windows.get(i);

            assertEquals("pres: " + i, pres[i], w.getPre());

            assertEquals("posts: " + i, posts[i], w.getPost());
        }

        // test sort order post
        // sort key is built at search time, so must re-search
        WindowBuilder wb = new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD),
                new DefaultSortKeyBuilder(ConcordanceSortOrder.POST), metadataExtractor, docIdBuilder);
        searcher = new ConcordanceSearcher(wb);

        collector = new ConcordanceWindowCollector(100);

        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);

        windows = collector.getSortedWindows();

        posts = new String[] { "", " b c", " b c a b c", " c b a", };
        for (int i = 0; i < posts.length; i++) {
            ConcordanceWindow w = windows.get(i);
            assertEquals(posts[i], w.getPost());
        }
        reader.close();
        directory.close();
    }

    @Test
    public void testWindowLengths() throws Exception {
        String[] doc = new String[] { "a b c d e f g" };
        List<String[]> docs = new ArrayList<>();
        docs.add(doc);
        Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);

        SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));

        String[] pres = { "", "c", "b c", "a b c", "a b c", "a b c" };
        String[] posts = { "", " e", " e f", " e f g", " e f g", " e f g" };

        for (int tokensBefore = 0; tokensBefore < pres.length; tokensBefore++) {
            for (int tokensAfter = 0; tokensAfter < posts.length; tokensAfter++) {
                WindowBuilder wb = new WindowBuilder(tokensBefore, tokensAfter, analyzer.getOffsetGap(FIELD));
                ConcordanceSearcher searcher = new ConcordanceSearcher(wb);
                ConcordanceWindowCollector collector = new ConcordanceWindowCollector(100);
                searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);
                ConcordanceWindow w = collector.getSortedWindows().get(0);
                assertEquals(tokensBefore + " : " + tokensAfter, pres[tokensBefore], w.getPre());
                assertEquals(tokensBefore + " : " + tokensAfter, posts[tokensAfter], w.getPost());
            }
        }

        reader.close();
        directory.close();

    }

    @Test
    public void testClockworkOrangeMultiValuedFieldProblem() throws Exception {
        /*
         * test handling of target match (or not) over different indices into multivalued
         * field array
         */
        String[] doc = new String[] { "a b c a b the", "clockwork", "orange b a c b a" };
        List<String[]> docs = new ArrayList<>();
        docs.add(doc);
        Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET, 0, 10);
        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        WindowBuilder wb = new WindowBuilder(3, 3, analyzer.getOffsetGap(FIELD));

        ConcordanceSearcher searcher = new ConcordanceSearcher(wb);
        SpanQuery q1 = new SpanTermQuery(new Term(FIELD, "the"));
        SpanQuery q2 = new SpanTermQuery(new Term(FIELD, "clockwork"));
        SpanQuery q3 = new SpanTermQuery(new Term(FIELD, "orange"));
        SpanQuery q = new SpanNearQuery(new SpanQuery[] { q1, q2, q3 }, 3, true);
        ConcordanceWindowCollector collector = new ConcordanceWindowCollector(3);

        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);
        assertEquals(1, collector.size());

        ConcordanceWindow w = collector.getSortedWindows().iterator().next();
        assertEquals("target", "the | clockwork | orange", w.getTarget());
        assertEquals("pre", "c a b", w.getPre());
        assertEquals("post", " b a c", w.getPost());

        reader.close();
        directory.close();

        // test hit even over long inter-field gap
        analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET, 20, 50);
        directory = getDirectory(analyzer, docs);
        reader = DirectoryReader.open(directory);
        indexSearcher = new IndexSearcher(reader);

        wb = new WindowBuilder(3, 3, analyzer.getOffsetGap(FIELD));

        searcher = new ConcordanceSearcher(wb);
        q = new SpanNearQuery(new SpanQuery[] { q1, q2, q3 }, 120, true);
        collector = new ConcordanceWindowCollector(100);

        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);

        assertEquals(1, collector.size());
        w = collector.getSortedWindows().iterator().next();
        assertEquals("target", "the | clockwork | orange", w.getTarget());
        assertEquals("pre", "c a b", w.getPre());
        assertEquals("post", " b a c", w.getPost());

        reader.close();
        directory.close();
        // test miss
        analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET, 100, 100);
        directory = getDirectory(analyzer, docs);
        reader = DirectoryReader.open(directory);
        indexSearcher = new IndexSearcher(reader);

        wb = new WindowBuilder();
        searcher = new ConcordanceSearcher(wb);
        q = new SpanNearQuery(new SpanQuery[] { q1, q2, q3 }, 5, true);
        collector = new ConcordanceWindowCollector(100);

        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);

        assertEquals(0, collector.size());

        reader.close();
        directory.close();
    }

    @Test
    public void testWithStops() throws Exception {
        String[] docs = new String[] { "a b the d e the f", "g h the d the j" };
        Analyzer analyzer = getAnalyzer(MockTokenFilter.ENGLISH_STOPSET);
        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        WindowBuilder wb = new WindowBuilder(2, 2, analyzer.getOffsetGap(FIELD));

        ConcordanceSearcher searcher = new ConcordanceSearcher(wb);
        SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));
        ConcordanceWindowCollector collector = new ConcordanceWindowCollector(3);

        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);
        List<ConcordanceWindow> windows = collector.getSortedWindows();
        assertEquals(2, windows.size());

        // the second word after the target is a stop word
        // this post-component of this window should only go to the first word after
        // the target
        assertEquals("b the", windows.get(0).getPre());
        assertEquals("d", windows.get(0).getTarget());
        assertEquals(" e", windows.get(0).getPost());

        assertEquals("h the", windows.get(1).getPre());
        assertEquals("d", windows.get(1).getTarget());
        assertEquals(" the j", windows.get(1).getPost());

        reader.close();
        directory.close();
    }

    @Test
    public void testBasicStandardQueryConversion() throws Exception {
        String[] docs = new String[] { "a b c a b c", "c b a c b a d e a", "c b a c b a e a b c a" };
        Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        ConcordanceSearcher searcher = new ConcordanceSearcher(
                new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD)));
        BooleanQuery q = new BooleanQuery.Builder().add(new TermQuery(new Term(FIELD, "a")), Occur.MUST)
                .add(new TermQuery(new Term(FIELD, "d")), Occur.MUST_NOT).build();

        ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10);
        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);
        // shouldn't include document with "d"
        assertEquals(6, collector.size());

        // should only include document with "e" and not "d"
        Query filter = new TermQuery(new Term(FIELD, "e"));
        collector = new ConcordanceWindowCollector(10);

        searcher.search(indexSearcher, FIELD, (Query) q, filter, analyzer, collector);
        assertEquals(4, collector.size());

        reader.close();
        directory.close();
    }

    @Test
    public void testMismatchingFieldsInStandardQueryConversion() throws Exception {
        // tests what happens if a Query doesn't contain a term in the "span" field
        // in the searcher...should be no exception and zero documents returned.

        String[] docs = new String[] { "a b c a b c", };
        Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);

        ConcordanceSearcher searcher = new ConcordanceSearcher(
                new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD)));

        Query q = new TermQuery(new Term("_" + FIELD, "a"));

        int windowCount = -1;
        ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10);

        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);
        windowCount = collector.size();
        assertEquals(0, windowCount);
        reader.close();
        directory.close();
    }

    @Test
    public void testUniqueCollector() throws Exception {
        String[] docs = new String[] { "a b c d c b a", "a B C d c b a", "a b C d C B a", "a b c d C B A",
                "e f g d g f e", "h i j d j i h" };

        Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        ConcordanceSearcher searcher = new ConcordanceSearcher(
                new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD)));
        SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));

        DedupingConcordanceWindowCollector collector = new DedupingConcordanceWindowCollector(2);
        searcher.search(indexSearcher, FIELD, (Query) q, null, analyzer, collector);
        assertEquals(2, collector.size());

        collector = new DedupingConcordanceWindowCollector(AbstractConcordanceWindowCollector.COLLECT_ALL);
        searcher.search(indexSearcher, FIELD, (Query) q, null, analyzer, collector);
        assertEquals(3, collector.size());

        reader.close();
        directory.close();

    }

    @Test
    public void testUniqueCollectorWithSameWindowOverflow() throws Exception {
        String[] docs = new String[] { "a b c d c b a", "a b c d c b a", "a b c d c b a", "a b c d c b a",
                "e f g d g f e", "h i j d j i h" };

        Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        ConcordanceSearcher searcher = new ConcordanceSearcher(
                new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD)));

        SpanQuery q = new SpanTermQuery(new Term(FIELD, "d"));

        DedupingConcordanceWindowCollector collector = new DedupingConcordanceWindowCollector(3);
        searcher.search(indexSearcher, FIELD, (Query) q, null, analyzer, collector);
        assertEquals(3, collector.size());
        assertEquals(4, collector.getSortedWindows().get(0).getCount());
        reader.close();
        directory.close();
    }

    @Test
    public void testAllowTargetOverlaps() throws Exception {
        String[] docs = new String[] { "a b c" };
        Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);

        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        WindowBuilder wb = new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD),
                new DefaultSortKeyBuilder(ConcordanceSortOrder.PRE), metadataExtractor, docIdBuilder);
        ConcordanceSearcher searcher = new ConcordanceSearcher(wb);
        SpanQuery term = new SpanTermQuery(new Term(FIELD, "a"));
        SpanQuery phrase = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "a")),
                new SpanTermQuery(new Term(FIELD, "b")) }, 0, true);
        SpanOrQuery q = new SpanOrQuery(new SpanQuery[] { term, phrase });

        ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10);
        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);

        //default should be: don't allow target overlaps
        assertEquals(1, collector.size());

        searcher.setAllowTargetOverlaps(true);
        collector = new ConcordanceWindowCollector(10);
        searcher.search(indexSearcher, FIELD, q, null, analyzer, collector);

        //now there should be two windows with allowTargetOverlaps = true
        assertEquals(2, collector.size());
        reader.close();
        directory.close();
    }

    @Test
    public void testRewrites() throws Exception {
        //test to make sure that queries are rewritten
        //first test straight prefix queries
        String[] docs = new String[] { "aa ba ca aa ba ca", "ca ba aa ca ba aa da ea za",
                "ca ba aa ca ba aa ea aa ba ca za" };
        Analyzer analyzer = getAnalyzer(MockTokenFilter.EMPTY_STOPSET);
        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        ConcordanceSearcher searcher = new ConcordanceSearcher(
                new WindowBuilder(10, 10, analyzer.getOffsetGap(FIELD)));
        BooleanQuery q = new BooleanQuery.Builder().add(new PrefixQuery(new Term(FIELD, "a")), Occur.MUST)
                .add(new PrefixQuery(new Term(FIELD, "d")), Occur.MUST_NOT).build();

        //now test straight and span wrapper
        ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10);
        searcher.search(indexSearcher, FIELD, q, new PrefixQuery(new Term(FIELD, "z")), analyzer, collector);
        // shouldn't include document with "da", but must include one with za
        assertEquals(3, collector.size());

        collector = new ConcordanceWindowCollector(10);
        searcher.search(indexSearcher, FIELD, q,
                new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(FIELD, "z"))), analyzer, collector);
        // shouldn't include document with "da", but must include one with za
        assertEquals(3, collector.size());

        reader.close();
        directory.close();
    }

    @Test
    @Ignore("until we fix bigram filter")
    public void testBigrams() throws Exception {
        String[] docs = new String[] { "a b c z d e f g h z i j z k l m n o p" };
        Analyzer analyzer = getBigramAnalyzer(MockTokenFilter.EMPTY_STOPSET, 10, 10, true);

        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        ConcordanceSearcher searcher = new ConcordanceSearcher(
                new WindowBuilder(2, 2, analyzer.getOffsetGap(FIELD)));
        Query q = new TermQuery(new Term(FIELD, "z"));
        //now test straight and span wrapper
        ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10);
        searcher.search(indexSearcher, FIELD, q, q, analyzer, collector);
        for (ConcordanceWindow w : collector.getWindows()) {
            //System.out.println(w);
        }
        reader.close();
        directory.close();
    }

    @Test
    public void testCJKNoUnigrams() throws Exception {

        final CharacterRunAutomaton stops = MockTokenFilter.EMPTY_STOPSET;
        int posIncGap = 10;
        final int charOffsetGap = 10;
        Analyzer analyzer = getCJKBigramAnalyzer(false);
        TokenStream ts = analyzer.tokenStream(FIELD, "");
        ts.reset();
        CharTermAttribute charTermAttribute = ts.getAttribute(CharTermAttribute.class);
        PositionIncrementAttribute positionIncrementAttribute = ts.getAttribute(PositionIncrementAttribute.class);

        ts.end();
        ts.close();
        String[] docs = new String[] { "" };

        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        ConcordanceSearcher searcher = new ConcordanceSearcher(
                new WindowBuilder(2, 2, analyzer.getOffsetGap(FIELD)));
        Query q = new TermQuery(new Term(FIELD, ""));
        //now test straight and span wrapper
        ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10);
        searcher.search(indexSearcher, FIELD, q, q, analyzer, collector);
        for (ConcordanceWindow w : collector.getWindows()) {
            //System.out.println(w);
        }
        reader.close();
        directory.close();

    }

    @Test
    @Ignore("until we fix bigrams")
    public void testCJKUnigrams() throws Exception {

        final CharacterRunAutomaton stops = MockTokenFilter.EMPTY_STOPSET;
        int posIncGap = 10;
        final int charOffsetGap = 10;
        //    Analyzer analyzer = getBigramAnalyzer(stops, 10, 10, true);
        Analyzer analyzer = getCJKBigramAnalyzer(true);
        TokenStream ts = analyzer.tokenStream(FIELD, "?");
        ts.reset();

        String[] docs = new String[] { "a b c d e f g" };

        Directory directory = getDirectory(analyzer, docs);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        ConcordanceSearcher searcher = new ConcordanceSearcher(
                new WindowBuilder(2, 2, analyzer.getOffsetGap(FIELD)));
        Query q = new TermQuery(new Term(FIELD, "c"));
        //now test straight and span wrapper
        ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10);
        searcher.search(indexSearcher, FIELD, q, q, analyzer, collector);
        for (ConcordanceWindow w : collector.getWindows()) {
            //System.out.println(w);
        }
        reader.close();
        directory.close();
    }
}