Example usage for org.apache.lucene.analysis.util TokenizerFactory create

List of usage examples for org.apache.lucene.analysis.util TokenizerFactory create

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.util TokenizerFactory create.

Prototype

abstract public Tokenizer create(AttributeFactory factory);

Source Link

Document

Creates a TokenStream of the specified input using the given AttributeFactory

Usage

From source file:org.apache.solr.handler.AnalysisRequestHandlerBase.java

License:Apache License

/**
 * Analyzes the given value using the given Analyzer.
 *
 * @param value   Value to analyze/*  w ww. j ava 2 s . com*/
 * @param context The {@link AnalysisContext analysis context}.
 *
 * @return NamedList containing the tokens produced by analyzing the given value
 */
protected NamedList<? extends Object> analyzeValue(String value, AnalysisContext context) {

    Analyzer analyzer = context.getAnalyzer();

    if (!TokenizerChain.class.isInstance(analyzer)) {

        TokenStream tokenStream = null;
        try {
            tokenStream = analyzer.tokenStream(context.getFieldName(), value);
            NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>();
            namedList.add(tokenStream.getClass().getName(),
                    convertTokensToNamedLists(analyzeTokenStream(tokenStream), context));
            return namedList;
        } catch (IOException e) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
        } finally {
            IOUtils.closeWhileHandlingException(tokenStream);
        }
    }

    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories();
    TokenizerFactory tfac = tokenizerChain.getTokenizerFactory();
    TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories();

    NamedList<Object> namedList = new NamedList<Object>();

    if (cfiltfacs != null) {
        String source = value;
        for (CharFilterFactory cfiltfac : cfiltfacs) {
            Reader reader = new StringReader(source);
            reader = cfiltfac.create(reader);
            source = writeCharStream(namedList, reader);
        }
    }

    TokenStream tokenStream = tfac.create(tokenizerChain.initReader(null, new StringReader(value)));
    List<AttributeSource> tokens = analyzeTokenStream(tokenStream);

    namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));

    ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokens);

    for (TokenFilterFactory tokenFilterFactory : filtfacs) {
        for (final AttributeSource tok : tokens) {
            tok.getAttribute(TokenTrackingAttribute.class).freezeStage();
        }
        tokenStream = tokenFilterFactory.create(listBasedTokenStream);
        tokens = analyzeTokenStream(tokenStream);
        namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
        listBasedTokenStream = new ListBasedTokenStream(tokens);
    }

    return namedList;
}

From source file:org.apache.solr.rest.schema.analysis.FSTSynonymFilterFactory.java

License:Apache License

@Override
public void inform(ResourceLoader loader) throws IOException {
    final TokenizerFactory factory = tokenizerFactory == null ? null
            : loadTokenizerFactory(loader, tokenizerFactory);

    Analyzer analyzer = new Analyzer() {
        @Override//from  www.jav  a  2s. c  om
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)
                    : factory.create(reader);
            TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer)
                    : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };

    try {
        String formatClass = format;
        if (format == null || format.equals("solr")) {
            formatClass = SolrSynonymParser.class.getName();
        } else if (format.equals("wordnet")) {
            formatClass = WordnetSynonymParser.class.getName();
        }
        // TODO: expose dedup as a parameter?
        map = loadSynonyms(loader, formatClass, true, analyzer);
    } catch (ParseException e) {
        throw new IOException("Error parsing synonyms file:", e);
    }
}

From source file:pl.litwiniuk.rowicki.modsynonyms.ModificatedFSTSynonymFilterFactory.java

License:Apache License

@Override
public void inform(ResourceLoader loader) throws IOException {
    final TokenizerFactory factory = tokenizerFactory == null ? null
            : loadTokenizerFactory(loader, tokenizerFactory);

    Analyzer analyzer = new Analyzer() {
        @Override//from  ww  w.  j  a  v a2  s.  co m
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_43, reader)
                    : factory.create(reader);
            TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_43, tokenizer) : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };

    try {
        if (format == null || format.equals("solr")) {
            // TODO: expose dedup as a parameter?
            map = loadSolrSynonyms(loader, true, analyzer);
        } else if (format.equals("wordnet")) {
            map = loadWordnetSynonyms(loader, true, analyzer);
        } else {
            // TODO: somehow make this more pluggable
            throw new IllegalArgumentException("Unrecognized synonyms format: " + format);
        }
    } catch (ParseException e) {
        throw new IOException("Error parsing synonyms file:", e);
    }
}

From source file:pl.litwiniuk.rowicki.modsynonyms.SlowSynonymFilterFactory.java

License:Apache License

private static TokenStream loadTokenizer(TokenizerFactory tokFactory, Reader reader) {
    return tokFactory.create(reader);
}