Example usage for org.apache.lucene.analysis TokenStream reset

List of usage examples for org.apache.lucene.analysis TokenStream reset

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream reset.

Prototype

public void reset() throws IOException 

Source Link

Document

This method is called by a consumer before it begins consumption using #incrementToken() .

Usage

From source file:org.sindice.siren.qparser.keyword.processors.DatatypeAnalyzerProcessor.java

License:Apache License

@Override
protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException {
    if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode)
            && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode)
            && !(node.getParent() instanceof RangeQueryNode)) {

        this.positionIncrementsEnabled = false;
        final Boolean positionIncrementsEnabled = this.getQueryConfigHandler()
                .get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS);
        if (positionIncrementsEnabled != null) {
            this.positionIncrementsEnabled = positionIncrementsEnabled;
        }/* w  w w .  ja  v  a  2  s . co m*/

        final FieldQueryNode fieldNode = ((FieldQueryNode) node);
        final String text = fieldNode.getTextAsString();
        final String field = fieldNode.getFieldAsString();
        final String datatype = (String) fieldNode.getTag(DatatypeQueryNode.DATATYPE_TAGID);

        if (datatype == null) {
            return node;
        }

        final Analyzer analyzer = this.getQueryConfigHandler().get(KeywordConfigurationKeys.DATATYPES_ANALYZERS)
                .get(datatype);
        if (analyzer == null) {
            throw new QueryNodeException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX,
                    "No analyzer associated with " + datatype));
        }

        PositionIncrementAttribute posIncrAtt = null;
        int numTokens = 0;
        int positionCount = 0;
        boolean severalTokensAtSamePosition = false;

        final TokenStream source;
        try {
            source = analyzer.tokenStream(field, new StringReader(text));
            source.reset();
        } catch (final IOException e1) {
            throw new RuntimeException(e1);
        }
        final CachingTokenFilter buffer = new CachingTokenFilter(source);

        if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
            posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
        }

        try {
            while (buffer.incrementToken()) {
                numTokens++;
                final int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1;
                if (positionIncrement != 0) {
                    positionCount += positionIncrement;
                } else {
                    severalTokensAtSamePosition = true;
                }
            }
        } catch (final IOException e) {
            // ignore
        }

        try {
            // rewind the buffer stream
            buffer.reset();
            // close original stream - all tokens buffered
            source.close();
        } catch (final IOException e) {
            // ignore
        }

        if (!buffer.hasAttribute(CharTermAttribute.class)) {
            return new NoTokenFoundQueryNode();
        }
        final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

        if (numTokens == 0) {
            if (nbTwigs != 0) { // Twig special case
                return new WildcardNodeQueryNode();
            }
            return new NoTokenFoundQueryNode();
        } else if (numTokens == 1) {
            String term = null;
            try {
                boolean hasNext;
                hasNext = buffer.incrementToken();
                assert hasNext == true;
                term = termAtt.toString();
            } catch (final IOException e) {
                // safe to ignore, because we know the number of tokens
            }
            fieldNode.setText(term);
            return fieldNode;
        } else {
            // no phrase query:
            final LinkedList<QueryNode> children = new LinkedList<QueryNode>();

            int position = -1;

            for (int i = 0; i < numTokens; i++) {
                String term = null;
                final int positionIncrement = 1;

                try {
                    final boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();

                } catch (final IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);

                if (this.positionIncrementsEnabled) {
                    position += positionIncrement;
                    newFieldNode.setPositionIncrement(position);
                } else {
                    newFieldNode.setPositionIncrement(i);
                }

                children.add(new FieldQueryNode(field, term, -1, -1));
            }

            if (node.getParent() instanceof TokenizedPhraseQueryNode) {
                throw new QueryNodeException(new MessageImpl("Cannot build a MultiPhraseQuery"));
            }
            // If multiple terms at one single position, this must be a query
            // expansion. Perform a OR between the terms.
            if (severalTokensAtSamePosition && positionCount == 1) {
                return new GroupQueryNode(new OrQueryNode(children));
            }
            // if several tokens at same position && position count > 1, then
            // results can be unexpected
            else {
                final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
                for (int i = 0; i < children.size(); i++) {
                    pq.add(children.get(i));
                }
                return pq;
            }
        }
    } else if (node instanceof TwigQueryNode) {
        nbTwigs--;
        assert nbTwigs >= 0;
    }
    return node;
}

From source file:org.sindice.siren.qparser.keyword.processors.PhraseQueryNodeProcessor.java

License:Apache License

@Override
protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException {
    if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode)
            && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode)
            && !(node.getParent() instanceof RangeQueryNode)) {

        final FieldQueryNode fieldNode = ((FieldQueryNode) node);
        final String text = fieldNode.getTextAsString();
        final String field = fieldNode.getFieldAsString();

        final TokenStream source;
        try {/*from  w w  w .  j  a va 2  s  .  c  o  m*/
            source = this.analyzer.tokenStream(field, new StringReader(text));
            source.reset();
        } catch (final IOException e1) {
            throw new RuntimeException(e1);
        }
        final CachingTokenFilter buffer = new CachingTokenFilter(source);

        int numTokens = 0;
        try {
            while (buffer.incrementToken()) {
                numTokens++;
            }
        } catch (final IOException e) {
            // ignore
        }

        try {
            // rewind the buffer stream
            buffer.reset();
            // close original stream - all tokens buffered
            source.close();
        } catch (final IOException e) {
            // ignore
        }

        if (!buffer.hasAttribute(CharTermAttribute.class)) {
            return new NoTokenFoundQueryNode();
        }
        final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);

        if (numTokens == 0) {
            return new NoTokenFoundQueryNode();
        } else if (numTokens != 1) {
            // phrase query
            final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();

            for (int i = 0; i < numTokens; i++) {
                String term = null;

                try {
                    final boolean hasNext = buffer.incrementToken();
                    assert hasNext == true;
                    term = termAtt.toString();

                } catch (final IOException e) {
                    // safe to ignore, because we know the number of tokens
                }

                final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
                newFieldNode.setPositionIncrement(i);
                pq.add(newFieldNode);
            }
            return pq;
        }
    }
    return node;
}

From source file:org.solbase.lucenehbase.IndexWriter.java

License:Apache License

@SuppressWarnings("unchecked")
public ParsedDoc parseDoc(Document doc, Analyzer analyzer, String indexName, int docNumber,
        List<String> sortFieldNames) throws CorruptIndexException, IOException {
    // given doc, what are all of terms we indexed
    List<Term> allIndexedTerms = new ArrayList<Term>();
    Map<String, byte[]> fieldCache = new HashMap<String, byte[]>(1024);

    // need to hold onto TermDocMetaData, so it can return this array
    List<TermDocMetadata> metadatas = new ArrayList<TermDocMetadata>();

    byte[] docId = Bytes.toBytes(docNumber);
    int position = 0;

    for (Fieldable field : (List<Fieldable>) doc.getFields()) {
        // Indexed field
        if (field.isIndexed() && field.isTokenized()) {

            TokenStream tokens = field.tokenStreamValue();

            if (tokens == null) {
                tokens = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
            }//from   ww w.ja  v a  2 s.  c  o  m

            // collect term information per field
            Map<Term, Map<ByteBuffer, List<Number>>> allTermInformation = new ConcurrentSkipListMap<Term, Map<ByteBuffer, List<Number>>>();

            int lastOffset = 0;
            if (position > 0) {
                position += analyzer.getPositionIncrementGap(field.name());
            }

            tokens.reset(); // reset the TokenStream to the first token

            // offsets
            OffsetAttribute offsetAttribute = null;
            if (field.isStoreOffsetWithTermVector())
                offsetAttribute = (OffsetAttribute) tokens.addAttribute(OffsetAttribute.class);

            // positions
            PositionIncrementAttribute posIncrAttribute = null;
            if (field.isStorePositionWithTermVector())
                posIncrAttribute = (PositionIncrementAttribute) tokens
                        .addAttribute(PositionIncrementAttribute.class);

            TermAttribute termAttribute = (TermAttribute) tokens.addAttribute(TermAttribute.class);

            // store normalizations of field per term per document
            // rather
            // than per field.
            // this adds more to write but less to read on other side
            Integer tokensInField = new Integer(0);

            while (tokens.incrementToken()) {
                tokensInField++;
                Term term = new Term(field.name(), termAttribute.term());

                allIndexedTerms.add(term);

                // fetch all collected information for this term
                Map<ByteBuffer, List<Number>> termInfo = allTermInformation.get(term);

                if (termInfo == null) {
                    termInfo = new ConcurrentSkipListMap<ByteBuffer, List<Number>>();
                    allTermInformation.put(term, termInfo);
                }

                // term frequency
                List<Number> termFrequency = termInfo.get(TermDocMetadata.termFrequencyKeyBytes);
                if (termFrequency == null) {
                    termFrequency = new ArrayList<Number>();
                    termFrequency.add(new Integer(0));
                    termInfo.put(TermDocMetadata.termFrequencyKeyBytes, termFrequency);
                }
                // increment
                termFrequency.set(0, termFrequency.get(0).intValue() + 1);

                // position vector
                if (field.isStorePositionWithTermVector()) {
                    position += (posIncrAttribute.getPositionIncrement() - 1);

                    List<Number> positionVector = termInfo.get(TermDocMetadata.positionVectorKeyBytes);

                    if (positionVector == null) {
                        positionVector = new ArrayList<Number>();
                        termInfo.put(TermDocMetadata.positionVectorKeyBytes, positionVector);
                    }

                    positionVector.add(++position);
                }

                // term offsets
                if (field.isStoreOffsetWithTermVector()) {

                    List<Number> offsetVector = termInfo.get(TermDocMetadata.offsetVectorKeyBytes);
                    if (offsetVector == null) {
                        offsetVector = new ArrayList<Number>();
                        termInfo.put(TermDocMetadata.offsetVectorKeyBytes, offsetVector);
                    }

                    offsetVector.add(lastOffset + offsetAttribute.startOffset());
                    offsetVector.add(lastOffset + offsetAttribute.endOffset());

                }

                List<Number> sortValues = new ArrayList<Number>();
                // init sortValues
                for (int i = 0; i < Scorer.numSort; i++) {
                    sortValues.add(new Integer(-1));
                }

                int order = 0;

                // extract sort field value and store it in term doc metadata obj
                for (String fieldName : sortFieldNames) {
                    Fieldable fieldable = doc.getFieldable(fieldName);

                    if (fieldable instanceof EmbeddedSortField) {
                        EmbeddedSortField sortField = (EmbeddedSortField) fieldable;

                        int value = -1;
                        if (sortField.stringValue() != null) {
                            value = Integer.parseInt(sortField.stringValue());
                        }
                        int sortSlot = sortField.getSortSlot();

                        sortValues.set(sortSlot - 1, new Integer(value));
                    } else {
                        // TODO: this logic is used for real time indexing.
                        // hacky. depending on order of sort field names in array
                        int value = -1;
                        if (fieldable.stringValue() != null) {
                            value = Integer.parseInt(fieldable.stringValue());
                        }
                        sortValues.set(order++, new Integer(value));
                    }
                }
                termInfo.put(TermDocMetadata.sortFieldKeyBytes, sortValues);
            }

            List<Number> bnorm = null;
            if (!field.getOmitNorms()) {
                bnorm = new ArrayList<Number>();
                float norm = doc.getBoost();
                norm *= field.getBoost();
                norm *= similarity.lengthNorm(field.name(), tokensInField);
                bnorm.add(Similarity.encodeNorm(norm));
            }

            for (Map.Entry<Term, Map<ByteBuffer, List<Number>>> term : allTermInformation.entrySet()) {
                Term tempTerm = term.getKey();

                byte[] fieldTermKeyBytes = SolbaseUtil.generateTermKey(tempTerm);

                // Mix in the norm for this field alongside each term
                // more writes but faster on read side.
                if (!field.getOmitNorms()) {
                    term.getValue().put(TermDocMetadata.normsKeyBytes, bnorm);
                }

                TermDocMetadata data = new TermDocMetadata(docNumber, term.getValue(), fieldTermKeyBytes,
                        tempTerm);
                metadatas.add(data);
            }
        }

        // Untokenized fields go in without a termPosition
        if (field.isIndexed() && !field.isTokenized()) {
            Term term = new Term(field.name(), field.stringValue());
            allIndexedTerms.add(term);

            byte[] fieldTermKeyBytes = SolbaseUtil.generateTermKey(term);

            Map<ByteBuffer, List<Number>> termMap = new ConcurrentSkipListMap<ByteBuffer, List<Number>>();
            termMap.put(TermDocMetadata.termFrequencyKeyBytes, Arrays.asList(new Number[] {}));
            termMap.put(TermDocMetadata.positionVectorKeyBytes, Arrays.asList(new Number[] {}));

            TermDocMetadata data = new TermDocMetadata(docNumber, termMap, fieldTermKeyBytes, term);
            metadatas.add(data);
        }

        // Stores each field as a column under this doc key
        if (field.isStored()) {

            byte[] _value = field.isBinary() ? field.getBinaryValue() : Bytes.toBytes(field.stringValue());

            // first byte flags if binary or not
            byte[] value = new byte[_value.length + 1];
            System.arraycopy(_value, 0, value, 0, _value.length);

            value[value.length - 1] = (byte) (field.isBinary() ? Byte.MAX_VALUE : Byte.MIN_VALUE);

            // logic to handle multiple fields w/ same name
            byte[] currentValue = fieldCache.get(field.name());
            if (currentValue == null) {
                fieldCache.put(field.name(), value);
            } else {

                // append new data
                byte[] newValue = new byte[currentValue.length + SolbaseUtil.delimiter.length + value.length
                        - 1];
                System.arraycopy(currentValue, 0, newValue, 0, currentValue.length - 1);
                System.arraycopy(SolbaseUtil.delimiter, 0, newValue, currentValue.length - 1,
                        SolbaseUtil.delimiter.length);
                System.arraycopy(value, 0, newValue, currentValue.length + SolbaseUtil.delimiter.length - 1,
                        value.length);

                fieldCache.put(field.name(), newValue);
            }
        }
    }

    Put documentPut = new Put(SolbaseUtil.randomize(docNumber));

    // Store each field as a column under this docId
    for (Map.Entry<String, byte[]> field : fieldCache.entrySet()) {
        documentPut.add(Bytes.toBytes("field"), Bytes.toBytes(field.getKey()), field.getValue());
    }

    // in case of real time update, we need to add back docId field
    if (!documentPut.has(Bytes.toBytes("field"), Bytes.toBytes("docId"))) {

        byte[] docIdStr = Bytes.toBytes(new Integer(docNumber).toString());
        // first byte flags if binary or not
        byte[] value = new byte[docIdStr.length + 1];
        System.arraycopy(docIdStr, 0, value, 0, docIdStr.length);

        value[value.length - 1] = (byte) (Byte.MIN_VALUE);
        documentPut.add(Bytes.toBytes("field"), Bytes.toBytes("docId"), value);
    }

    // Finally, Store meta-data so we can delete this document
    documentPut.add(Bytes.toBytes("allTerms"), Bytes.toBytes("allTerms"),
            SolbaseUtil.toBytes(allIndexedTerms).array());

    ParsedDoc parsedDoc = new ParsedDoc(metadatas, doc, documentPut, fieldCache.entrySet(), allIndexedTerms);
    return parsedDoc;

}

From source file:org.splevo.vpm.analyzer.semantic.lucene.LuceneCodeAnalyzer.java

License:Open Source License

/**
 * Stem a list of words with a configured stemmer.
 *
 * @param words/*from  www  .j  av a 2 s  .c o m*/
 *            The list of words to stem.
 * @param stemming
 *            The stemmer to be used.
 * @return The stemmed list of words.
 */
@SuppressWarnings("resource")
public static String[] stemWords(String[] words, Stemming stemming) {
    Set<String> stemmedStopWords = Sets.newHashSet();

    for (String word : words) {
        TokenStream tokenStream = new StandardTokenizer(LUCENE_VERSION, new StringReader(word));
        tokenStream = Stemming.wrapStemmingFilter(tokenStream, stemming);

        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        try {
            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                String term = charTermAttribute.toString();
                stemmedStopWords.add(term);
            }
        } catch (IOException e) {
            logger.error("Failed to stem a list of words", e);
        }
    }
    return stemmedStopWords.toArray(new String[] {});
}

From source file:org.tallison.lucene.contrast.QueryToCorpusContraster.java

License:Apache License

private void processFieldEntry(String fieldName, String s, CharArraySet set) throws IOException {
    TokenStream ts = analyzer.tokenStream(fieldName, s);
    CharTermAttribute cattr = ts.getAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
        set.add(cattr.toString());//from  w ww  . j a  v a  2 s  .c om
    }
    ts.end();
    ts.close();
}

From source file:org.tallison.lucene.search.concordance.charoffsets.ReanalyzingTokenCharOffsetsReader.java

License:Apache License

private int addFieldValue(String fieldName, int currInd, int charBase, String fieldValue,
        TokenCharOffsetRequests requests, RandomAccessCharOffsetContainer results) throws IOException {
    //Analyzer limitAnalyzer = new LimitTokenCountAnalyzer(baseAnalyzer, 10, true);
    TokenStream stream = baseAnalyzer.tokenStream(fieldName, fieldValue);
    stream.reset();

    int defaultInc = 1;

    CharTermAttribute termAtt = stream/* w ww  .  j ava  2  s  . c om*/
            .getAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
    OffsetAttribute offsetAtt = stream
            .getAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
    PositionIncrementAttribute incAtt = null;
    if (stream.hasAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class)) {
        incAtt = stream
                .getAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class);
    }

    while (stream.incrementToken()) {

        //Do we need this?
        if (incAtt != null && incAtt.getPositionIncrement() == 0) {
            continue;
        }

        currInd += (incAtt != null) ? incAtt.getPositionIncrement() : defaultInc;
        if (requests.contains(currInd)) {
            results.add(currInd, offsetAtt.startOffset() + charBase, offsetAtt.endOffset() + charBase,
                    termAtt.toString());
        }
        if (currInd > requests.getLast()) {
            // TODO: Is there a way to avoid this? Or, is this
            // an imaginary performance hit?
            while (stream.incrementToken()) {
                //NO-OP
            }
            stream.end();
            stream.close();
            return GOT_ALL_REQUESTS;
        }
    }
    stream.end();
    stream.close();
    return currInd;
}

From source file:org.tallison.lucene.search.concordance.charoffsets.SimpleAnalyzerUtil.java

License:Apache License

/**
 * allows reuse of terms, this method calls terms.clear() before adding new
 * terms/*from   w  w  w .j  av a 2  s  .  c  om*/
 *
 * @param s        string to analyze
 * @param field    to use in analysis
 * @param analyzer analyzer
 * @param terms    list for reuse
 * @return list of strings
 * @throws java.io.IOException if there's an IOException during analysis
 */
public static List<String> getTermStrings(String s, String field, Analyzer analyzer, List<String> terms)
        throws IOException {
    if (terms == null) {
        terms = new ArrayList<>();
    }
    terms.clear();
    TokenStream stream = analyzer.tokenStream(field, s);
    stream.reset();
    CharTermAttribute termAtt = stream
            .getAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);

    while (stream.incrementToken()) {
        terms.add(termAtt.toString());
    }
    stream.end();
    stream.close();

    return terms;
}

From source file:org.tallison.lucene.search.concordance.TestBigramFilter.java

License:Apache License

@Test
public void testBasicNoUnigrams() throws Exception {
    Analyzer analyzer = ConcordanceTestBase.getBigramAnalyzer(MockTokenFilter.EMPTY_STOPSET, 10, 10, false);

    String s = "a b c d e f g";
    TokenStream tokenStream = analyzer.tokenStream(ConcordanceTestBase.FIELD, s);
    tokenStream.reset();
    CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncAttribute = tokenStream.getAttribute(PositionIncrementAttribute.class);

    List<String> expected = Arrays.asList(new String[] { "a_b", "b_c", "c_d", "d_e", "e_f", "f_g", });

    List<String> returned = new ArrayList<>();
    while (tokenStream.incrementToken()) {
        String token = charTermAttribute.toString();
        assertEquals(1, posIncAttribute.getPositionIncrement());
        returned.add(token);//from  ww w. j  a  v a  2 s.c  om
    }
    tokenStream.end();
    tokenStream.close();
    assertEquals(expected, returned);
}

From source file:org.tallison.lucene.search.concordance.TestBigramFilter.java

License:Apache License

@Test
public void testIncludeUnigrams() throws Exception {
    List<String> expected = Arrays.asList(
            new String[] { "a", "a_b", "b", "b_c", "c", "c_d", "d", "d_e", "e", "e_f", "f", "f_g", "g", });
    Analyzer analyzer = ConcordanceTestBase.getBigramAnalyzer(MockTokenFilter.EMPTY_STOPSET, 10, 10, true);

    String s = "a b c d e f g";
    TokenStream tokenStream = analyzer.tokenStream("f", s);
    tokenStream.reset();
    CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncAttribute = tokenStream.getAttribute(PositionIncrementAttribute.class);

    List<String> returned = new ArrayList<>();
    int i = 0;/*from   w w w .  ja v  a 2  s.co  m*/
    while (tokenStream.incrementToken()) {
        String token = charTermAttribute.toString();
        if (i++ % 2 == 0) {
            assertEquals(1, posIncAttribute.getPositionIncrement());
        } else {
            assertEquals(0, posIncAttribute.getPositionIncrement());
        }
        returned.add(token);
    }
    tokenStream.end();
    tokenStream.close();
    assertEquals(expected, returned);
}

From source file:org.tallison.lucene.search.concordance.TestConcordanceSearcher.java

License:Apache License

@Test
public void testCJKNoUnigrams() throws Exception {

    final CharacterRunAutomaton stops = MockTokenFilter.EMPTY_STOPSET;
    int posIncGap = 10;
    final int charOffsetGap = 10;
    Analyzer analyzer = getCJKBigramAnalyzer(false);
    TokenStream ts = analyzer.tokenStream(FIELD, "");
    ts.reset();
    CharTermAttribute charTermAttribute = ts.getAttribute(CharTermAttribute.class);
    PositionIncrementAttribute positionIncrementAttribute = ts.getAttribute(PositionIncrementAttribute.class);

    ts.end();/*from   w  w w  .j  a  va 2s .  co  m*/
    ts.close();
    String[] docs = new String[] { "" };

    Directory directory = getDirectory(analyzer, docs);
    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(reader);
    ConcordanceSearcher searcher = new ConcordanceSearcher(
            new WindowBuilder(2, 2, analyzer.getOffsetGap(FIELD)));
    Query q = new TermQuery(new Term(FIELD, ""));
    //now test straight and span wrapper
    ConcordanceWindowCollector collector = new ConcordanceWindowCollector(10);
    searcher.search(indexSearcher, FIELD, q, q, analyzer, collector);
    for (ConcordanceWindow w : collector.getWindows()) {
        //System.out.println(w);
    }
    reader.close();
    directory.close();

}