Example usage for org.apache.solr.analysis TokenizerChain getCharFilterFactories

List of usage examples for org.apache.solr.analysis TokenizerChain getCharFilterFactories

Introduction

In this page you can find the example usage for org.apache.solr.analysis TokenizerChain getCharFilterFactories.

Prototype

public CharFilterFactory[] getCharFilterFactories() 

Source Link

Usage

From source file:com.sindicetech.siren.solr.schema.ConciseJsonField.java

License:Open Source License

/**
 * Append the mandatory SIREn filters for the concise model, i.e.,
 * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory},
 * {@link com.sindicetech.siren.solr.analysis.PathEncodingFilterFactory},
 * {@link com.sindicetech.siren.solr.analysis.PositionAttributeFilterFactory} and
 * {@link com.sindicetech.siren.solr.analysis.SirenPayloadFilterFactory}, to the tokenizer chain.
 *
 * @see ExtendedJsonField#appendSirenFilters(org.apache.lucene.analysis.Analyzer, java.util.Map)
 *///from   w w w.  j  av  a 2s .co m
@Override
protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) {
    if (!(analyzer instanceof TokenizerChain)) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                "Invalid index analyzer '" + analyzer.getClass() + "' received");
    }

    final TokenizerChain chain = (TokenizerChain) analyzer;
    // copy the existing list of token filters
    final TokenFilterFactory[] old = chain.getTokenFilterFactories();
    final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 4];
    System.arraycopy(old, 0, filterFactories, 0, old.length);
    // append the datatype analyzer filter factory
    final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory(
            new HashMap<String, String>());
    datatypeFactory.register(datatypes);
    filterFactories[old.length] = datatypeFactory;
    // append the path encoding filter factory
    filterFactories[old.length + 1] = new PathEncodingFilterFactory(new HashMap<String, String>());
    // append the position attribute filter factory
    filterFactories[old.length + 2] = new PositionAttributeFilterFactory(new HashMap<String, String>());
    // append the siren payload filter factory
    filterFactories[old.length + 3] = new SirenPayloadFilterFactory(new HashMap<String, String>());
    // create a new tokenizer chain with the updated list of filter factories
    return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories);
}

From source file:com.sindicetech.siren.solr.schema.ExtendedJsonField.java

License:Open Source License

/**
 * Append the mandatory SIREn filters, i.e.,
 * {@link DatatypeAnalyzerFilterFactory},
 * {@link PositionAttributeFilterFactory} and
 * {@link SirenPayloadFilterFactory}, to the tokenizer chain.
 * <br/>//w  ww .  ja v  a2 s.  c  om
 * The first time this is called, it will create a
 * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory} with no datatype registered. The datatypes
 * will be loaded and registered later, when {@link #inform(org.apache.lucene.analysis.util.ResourceLoader)} is
 * called.
 * <br/>
 * This is necessary to avoid having to call {@link org.apache.solr.schema.IndexSchema#refreshAnalyzers()}.
 * The {@link org.apache.solr.schema.IndexSchema} will have a reference to the SIREn field's analyzer, and
 * to the {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory}. When the datatypes will be loaded,
 * we will access this reference, and register the datatypes.
 */
protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) {
    if (!(analyzer instanceof TokenizerChain)) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                "Invalid index analyzer '" + analyzer.getClass() + "' received");
    }

    final TokenizerChain chain = (TokenizerChain) analyzer;
    // copy the existing list of token filters
    final TokenFilterFactory[] old = chain.getTokenFilterFactories();
    final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3];
    System.arraycopy(old, 0, filterFactories, 0, old.length);
    // append the datatype analyzer filter factory
    final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory(
            new HashMap<String, String>());
    datatypeFactory.register(datatypes);
    filterFactories[old.length] = datatypeFactory;
    // append the position attribute filter factory
    filterFactories[old.length + 1] = new PositionAttributeFilterFactory(new HashMap<String, String>());
    // append the siren payload filter factory
    filterFactories[old.length + 2] = new SirenPayloadFilterFactory(new HashMap<String, String>());
    // create a new tokenizer chain with the updated list of filter factories
    return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories);
}

From source file:org.alfresco.solr.AlfrescoFieldType.java

License:Open Source License

private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) {
    if (queryAnalyzer == null)
        return null;

    if (!(queryAnalyzer instanceof TokenizerChain)) {
        return new KeywordAnalyzer();
    }//  ww  w . j av  a 2 s  .  c  o  m

    TokenizerChain tc = (TokenizerChain) queryAnalyzer;
    MultiTermChainBuilder builder = new MultiTermChainBuilder();

    CharFilterFactory[] charFactories = tc.getCharFilterFactories();
    if (charFactories != null) {
        for (CharFilterFactory fact : charFactories) {
            builder.add(fact);
        }
    }

    builder.add(tc.getTokenizerFactory());

    for (TokenFilterFactory fact : tc.getTokenFilterFactories()) {
        builder.add(fact);
    }

    return builder.build();
}