Example usage for org.apache.lucene.analysis.util TokenFilterFactory create

List of usage examples for org.apache.lucene.analysis.util TokenFilterFactory create

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.util TokenFilterFactory create.

Prototype

public abstract TokenStream create(TokenStream input);

Source Link

Document

Transform the specified input TokenStream

Usage

From source file:com.qwazr.search.analysis.CustomAnalyzer.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fieldName) {
    final Tokenizer source = tokenizerFactory.create();
    if (tokenFilterFactories == null)
        return new TokenStreamComponents(source);
    TokenStream result = source;/*from   ww  w. ja v a2  s .  c om*/
    if (tokenFilterFactories != null)
        for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories)
            result = tokenFilterFactory.create(result);
    return new TokenStreamComponents(source, result);
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.util.TokenizerChain.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer tk = tokenizer.create(reader);
    TokenStream ts = tk;/*from www  . ja  v  a  2s .c o  m*/
    for (TokenFilterFactory filter : filters) {
        ts = filter.create(ts);
    }
    return new TokenStreamComponents(tk, ts);
}

From source file:org.apache.solr.analysis.TokenizerChain.java

License:Apache License

@Override
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
    Tokenizer tk = tokenizer.create(aReader);
    TokenStream ts = tk;//from ww  w .  java  2s.c  o  m
    for (TokenFilterFactory filter : filters) {
        ts = filter.create(ts);
    }
    return new TokenStreamComponents(tk, ts);
}

From source file:org.apache.solr.handler.AnalysisRequestHandlerBase.java

License:Apache License

/**
 * Analyzes the given value using the given Analyzer.
 *
 * @param value   Value to analyze/*  w ww.  j  a  v a2s  . c om*/
 * @param context The {@link AnalysisContext analysis context}.
 *
 * @return NamedList containing the tokens produced by analyzing the given value
 */
protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) {

    Analyzer analyzer = context.getAnalyzer();

    if (!TokenizerChain.class.isInstance(analyzer)) {

        TokenStream tokenStream = null;
        try {
            tokenStream = analyzer.tokenStream(context.getFieldName(), value);
            NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>();
            namedList.add(tokenStream.getClass().getName(),
                    convertTokensToNamedLists(analyzeTokenStream(tokenStream), context));
            return namedList;
        } catch (IOException e) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
        } finally {
            IOUtils.closeWhileHandlingException(tokenStream);
        }
    }

    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories();
    TokenizerFactory tfac = tokenizerChain.getTokenizerFactory();
    TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories();

    NamedList<Object> namedList = new NamedList<Object>();

    if (cfiltfacs != null) {
        String source = value;
        for (CharFilterFactory cfiltfac : cfiltfacs) {
            Reader reader = new StringReader(source);
            reader = cfiltfac.create(reader);
            source = writeCharStream(namedList, reader);
        }
    }

    TokenStream tokenStream = tfac.create(tokenizerChain.initReader(null, new StringReader(value)));
    List<AttributeSource> tokens = analyzeTokenStream(tokenStream);

    namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));

    ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens);

    for (TokenFilterFactory tokenFilterFactory : filtfacs) {
        for (final AttributeSource tok : tokens) {
            tok.getAttribute(TokenTrackingAttribute.class).freezeStage();
        }
        tokenStream = tokenFilterFactory.create(listBasedTokenStream);
        tokens = analyzeTokenStream(tokenStream);
        namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
        listBasedTokenStream = new ListBasedTokenStream(tokens);
    }

    return namedList;
}

From source file:org.elasticsearch.plugin.ingest.kuromoji_part_of_speech_extract.KuromojiPartOfSpeechExtractProcessor.java

License:Apache License

private Analyzer loadAnalyzer(List<String> posTags) {
    Map<String, String> tokenizerOptions = new HashMap<>();
    tokenizerOptions.put("mode", JapaneseTokenizer.Mode.NORMAL.toString());
    TokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(tokenizerOptions);
    TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[1];
    tokenFilterFactories[0] = new JapanesePartOfSpeechKeepFilterFactory(new HashMap<>(), this.posTags);

    Analyzer analyzer = new Analyzer() {
        @Override/* w  w w .  ja v a2 s.co  m*/
        protected TokenStreamComponents createComponents(String s) {
            Tokenizer tokenizer = tokenizerFactory.create();
            TokenStream tokenStream = tokenizer;
            for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
                tokenStream = tokenFilterFactory.create(tokenStream);
            }
            return new TokenStreamComponents(tokenizer, tokenStream);
        }
    };

    return analyzer;
}

From source file:org.hibernate.search.engine.impl.TokenizerChain.java

License:LGPL

@Override
protected TokenStreamComponents createComponents(String fieldName) {
    Tokenizer tk = tokenizer.create();//from w ww .  j  a v a  2 s .com
    TokenStream ts = tk;
    for (TokenFilterFactory filter : filters) {
        ts = filter.create(ts);
    }
    return new TokenStreamComponents(tk, ts);
}

From source file:org.tallison.gramreaper.ingest.schema.MyTokenizerChain.java

License:Apache License

@Override

protected TokenStreamComponents createComponents(String fieldName) {
    Tokenizer tk = tokenizer.create();/*  w w  w  . ja  va 2  s  . co m*/
    TokenStream ts = tk;
    for (TokenFilterFactory filter : filters) {
        ts = filter.create(ts);
    }

    return new TokenStreamComponents(tk, ts);
}