Example usage for org.apache.lucene.analysis TokenFilter TokenFilter

List of usage examples for org.apache.lucene.analysis TokenFilter TokenFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenFilter TokenFilter.

Prototype

protected TokenFilter(TokenStream input) 

Source Link

Document

Construct a token stream filtering the given input.

Usage

From source file:com.bigdata.search.ConfigurableAnalyzerFactory.java

License:Open Source License

@Override
public Analyzer getAnalyzer(final String languageCode, boolean filterStopwords) {

    final Analyzer unlogged = delegate.getAnalyzer(languageCode, filterStopwords);
    if (log.isDebugEnabled()) {
        return new Analyzer() {
            @Override//w w  w .jav a 2  s  . c om
            public TokenStream tokenStream(final String fieldName, final Reader reader) {
                final int id = loggerIdCounter++;
                final String term = TermCompletionAnalyzer.getStringReaderContents((StringReader) reader);
                log.debug(id + " " + languageCode + " **" + term + "**");
                return new TokenFilter(unlogged.tokenStream(fieldName, reader)) {

                    TermAttribute attr = addAttribute(TermAttribute.class);

                    @Override
                    public boolean incrementToken() throws IOException {
                        if (input.incrementToken()) {
                            log.debug(id + " |" + attr.term() + "|");
                            return true;
                        }
                        return false;
                    }
                };
            }
        };
    } else {
        return unlogged;
    }

}

From source file:magoffin.matt.ma2.lucene.StandardMatteAnalyzer.java

License:Open Source License

private TokenStream standardFilters(Reader reader) {
    TokenStream result = new StandardTokenizer(reader);
    result = new StandardFilter(result);

    // split words with periods, which StandardTokenizer does not do
    result = new TokenFilter(result) {

        Queue<Token> queue = new LinkedList<Token>();

        @SuppressWarnings("deprecation")
        @Override/*from  w ww  .j  av a2  s. c om*/
        public Token next() throws IOException {
            if (queue.size() > 0) {
                return queue.poll();
            }
            Token t = input.next();
            if (t == null) {
                return null;
            }
            if (!WORD_WITH_PERIOD.matcher(t.term()).find()) {
                return t;
            }
            String[] split = t.term().split("\\.");
            int startPos = t.startOffset();
            for (int i = 0; i < split.length; i++) {
                Token next = new Token(split[i], startPos, startPos + split[i].length());
                queue.offer(next);
                startPos = startPos + split[i].length() + 1;
            }
            return queue.poll();
        }
    };
    result = new LowerCaseFilter(result);
    return result;
}

From source file:org.apache.solr.analysis.ThrowingMockTokenFilterFactory.java

License:Apache License

@Override
public TokenStream create(TokenStream input) {
    return new TokenFilter(input) {
        @Override/*from   w ww . jav a2 s . c om*/
        public boolean incrementToken() throws IOException {
            if (input.incrementToken()) {
                try {
                    throw exceptionClass.newInstance();
                } catch (IllegalAccessException iae) {
                    throw new RuntimeException(iae);
                } catch (InstantiationException ie) {
                    throw new RuntimeException(ie);
                }
            }
            return false;
        }
    };
}