Example usage for org.apache.solr.analysis TokenizerChain getTokenFilterFactories

List of usage examples for org.apache.solr.analysis TokenizerChain getTokenFilterFactories

Introduction

In this page you can find the example usage for org.apache.solr.analysis TokenizerChain getTokenFilterFactories.

Prototype

public TokenFilterFactory[] getTokenFilterFactories() 

Source Link

Usage

From source file:com.sindicetech.siren.solr.schema.ConciseJsonField.java

License:Open Source License

/**
 * Load the config when resource loader initialized.
 *
 * @param resourceLoader The resource loader.
 *///from w  ww.ja  v a 2  s.  c o  m
@Override
public void inform(final ResourceLoader resourceLoader) {
    super.inform(resourceLoader);

    // if there was a attributeWildcard parameter defined, updates the configuration of the PathEncodingFilterFactory
    if (this.hasAttributeWildcard) {
        final TokenizerChain chain = (TokenizerChain) this.getIndexAnalyzer();
        for (TokenFilterFactory tokenFilterFactory : chain.getTokenFilterFactories()) {
            if (tokenFilterFactory instanceof PathEncodingFilterFactory) {
                ((PathEncodingFilterFactory) tokenFilterFactory).setAttributeWildcard(this.attributeWildcard);
            }
        }
    }
}

From source file:com.sindicetech.siren.solr.schema.ConciseJsonField.java

License:Open Source License

/**
 * Append the mandatory SIREn filters for the concise model, i.e.,
 * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory},
 * {@link com.sindicetech.siren.solr.analysis.PathEncodingFilterFactory},
 * {@link com.sindicetech.siren.solr.analysis.PositionAttributeFilterFactory} and
 * {@link com.sindicetech.siren.solr.analysis.SirenPayloadFilterFactory}, to the tokenizer chain.
 *
 * @see ExtendedJsonField#appendSirenFilters(org.apache.lucene.analysis.Analyzer, java.util.Map)
 *//*from w  w w .ja v a  2s.  c o  m*/
@Override
protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) {
    if (!(analyzer instanceof TokenizerChain)) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                "Invalid index analyzer '" + analyzer.getClass() + "' received");
    }

    final TokenizerChain chain = (TokenizerChain) analyzer;
    // copy the existing list of token filters
    final TokenFilterFactory[] old = chain.getTokenFilterFactories();
    final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 4];
    System.arraycopy(old, 0, filterFactories, 0, old.length);
    // append the datatype analyzer filter factory
    final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory(
            new HashMap<String, String>());
    datatypeFactory.register(datatypes);
    filterFactories[old.length] = datatypeFactory;
    // append the path encoding filter factory
    filterFactories[old.length + 1] = new PathEncodingFilterFactory(new HashMap<String, String>());
    // append the position attribute filter factory
    filterFactories[old.length + 2] = new PositionAttributeFilterFactory(new HashMap<String, String>());
    // append the siren payload filter factory
    filterFactories[old.length + 3] = new SirenPayloadFilterFactory(new HashMap<String, String>());
    // create a new tokenizer chain with the updated list of filter factories
    return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories);
}

From source file:com.sindicetech.siren.solr.schema.ExtendedJsonField.java

License:Open Source License

/**
 * Load the datatype config when resource loader initialized.
 *
 * @param resourceLoader The resource loader.
 */// w ww .j a  v  a2  s .co  m
@Override
public void inform(final ResourceLoader resourceLoader) {
    // load the datatypes
    this.loadDatatypeConfig((SolrResourceLoader) resourceLoader);

    // Register the datatypes in the DatatypeAnalyzerFilterFactory instance
    final TokenizerChain chain = (TokenizerChain) this.getIndexAnalyzer();
    for (TokenFilterFactory tokenFilterFactory : chain.getTokenFilterFactories()) {
        if (tokenFilterFactory instanceof DatatypeAnalyzerFilterFactory) {
            ((DatatypeAnalyzerFilterFactory) tokenFilterFactory).register(this.getDatatypes());
        }
    }
}

From source file:com.sindicetech.siren.solr.schema.ExtendedJsonField.java

License:Open Source License

/**
 * Append the mandatory SIREn filters, i.e.,
 * {@link DatatypeAnalyzerFilterFactory},
 * {@link PositionAttributeFilterFactory} and
 * {@link SirenPayloadFilterFactory}, to the tokenizer chain.
 * <br/>//  w  w  w. j  a v  a2 s.  c  om
 * The first time this is called, it will create a
 * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory} with no datatype registered. The datatypes
 * will be loaded and registered later, when {@link #inform(org.apache.lucene.analysis.util.ResourceLoader)} is
 * called.
 * <br/>
 * This is necessary to avoid having to call {@link org.apache.solr.schema.IndexSchema#refreshAnalyzers()}.
 * The {@link org.apache.solr.schema.IndexSchema} will have a reference to the SIREn field's analyzer, and
 * to the {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory}. When the datatypes will be loaded,
 * we will access this reference, and register the datatypes.
 */
protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) {
    if (!(analyzer instanceof TokenizerChain)) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                "Invalid index analyzer '" + analyzer.getClass() + "' received");
    }

    final TokenizerChain chain = (TokenizerChain) analyzer;
    // copy the existing list of token filters
    final TokenFilterFactory[] old = chain.getTokenFilterFactories();
    final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3];
    System.arraycopy(old, 0, filterFactories, 0, old.length);
    // append the datatype analyzer filter factory
    final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory(
            new HashMap<String, String>());
    datatypeFactory.register(datatypes);
    filterFactories[old.length] = datatypeFactory;
    // append the position attribute filter factory
    filterFactories[old.length + 1] = new PositionAttributeFilterFactory(new HashMap<String, String>());
    // append the siren payload filter factory
    filterFactories[old.length + 2] = new SirenPayloadFilterFactory(new HashMap<String, String>());
    // create a new tokenizer chain with the updated list of filter factories
    return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories);
}

From source file:com.sindicetech.siren.solr.schema.TestConciseJsonField.java

License:Open Source License

@Test
public void testConciseSirenFieldAnalyzer() throws Exception {
    final IndexSchema schema = h.getCore().getLatestSchema();
    final SchemaField json = schema.getField("concise");
    final FieldType tmp = json.getType();

    assertTrue(tmp.getAnalyzer() instanceof TokenizerChain);
    final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof ConciseJsonTokenizerFactory);

    // 4 filters for index analyzer
    assertNotNull(ts.getTokenFilterFactories());
    assertEquals(4, ts.getTokenFilterFactories().length);
    assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[1] instanceof PathEncodingFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[2] instanceof PositionAttributeFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[3] instanceof SirenPayloadFilterFactory);
}

From source file:com.sindicetech.siren.solr.schema.TestExtendedJsonField.java

License:Open Source License

@Test
public void testSirenFieldAnalyzer() throws Exception {
    final IndexSchema schema = h.getCore().getLatestSchema();
    final SchemaField ntriple = schema.getField(JSON_FIELD);
    final FieldType tmp = ntriple.getType();

    assertTrue(tmp.getAnalyzer() instanceof TokenizerChain);
    final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof ExtendedJsonTokenizerFactory);

    // 3 filters for index analyzer
    assertNotNull(ts.getTokenFilterFactories());
    assertEquals(3, ts.getTokenFilterFactories().length);
    assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory);
}

From source file:com.sindicetech.siren.solr.schema.TestExtendedJsonField.java

License:Open Source License

@Test
public void testSirenFieldDatatypeAnalyzer() throws Exception {
    final IndexSchema schema = h.getCore().getLatestSchema();
    final SchemaField ntriple = schema.getField(JSON_FIELD);
    final FieldType tmp = ntriple.getType();

    TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();

    assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
    final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0];
    assertNotNull(f.getDatatypeAnalyzers());
    assertEquals(9, f.getDatatypeAnalyzers().size());

    assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field"));
    ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field");
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory);

    assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"));
    ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string");
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory);

    assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int"));
    assertTrue(//from w w  w.  j a va 2  s  . co  m
            f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer);
    final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers()
            .get("http://www.w3.org/2001/XMLSchema#int");
    assertEquals(8, a.getPrecisionStep());
    assertEquals(32, a.getNumericParser().getValueSize());
    assertEquals(NumericType.INT, a.getNumericParser().getNumericType());
}

From source file:NomusSolrPlugins.NomusDismaxQParserPlugin.java

License:Apache License

public static StopFilterFactory getQueryStopFilter(QParser parser, String fieldName) {
    TokenizerChain tcq = getQueryTokenizerChain(parser, fieldName);
    if (tcq == null)
        return null;
    TokenFilterFactory[] facs = tcq.getTokenFilterFactories();

    for (int i = 0; i < facs.length; i++) {
        TokenFilterFactory tf = facs[i];
        if (tf instanceof StopFilterFactory) {
            return (StopFilterFactory) tf;
        }/*from   w w w . j  ava 2 s.c  o  m*/
    }
    return null;
}

From source file:NomusSolrPlugins.NomusDismaxQParserPlugin.java

License:Apache License

public TokenStream tokenStream(String fieldName, Reader reader) {
    if (!removeStopFilter) {
        return queryAnalyzer.tokenStream(fieldName, reader);
    }//from  w ww . j a  va 2 s  .c  om

    Analyzer a = map.get(fieldName);
    if (a != null) {
        return a.tokenStream(fieldName, reader);
    }

    FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
    Analyzer qa = ft.getQueryAnalyzer();
    if (!(qa instanceof TokenizerChain)) {
        map.put(fieldName, qa);
        return qa.tokenStream(fieldName, reader);
    }
    TokenizerChain tcq = (TokenizerChain) qa;
    Analyzer ia = ft.getAnalyzer();
    if (ia == qa || !(ia instanceof TokenizerChain)) {
        map.put(fieldName, qa);
        return qa.tokenStream(fieldName, reader);
    }
    TokenizerChain tci = (TokenizerChain) ia;

    // make sure that there isn't a stop filter in the indexer
    for (TokenFilterFactory tf : tci.getTokenFilterFactories()) {
        if (tf instanceof StopFilterFactory) {
            map.put(fieldName, qa);
            return qa.tokenStream(fieldName, reader);
        }
    }

    // now if there is a stop filter in the query analyzer, remove it
    int stopIdx = -1;
    TokenFilterFactory[] facs = tcq.getTokenFilterFactories();

    for (int i = 0; i < facs.length; i++) {
        TokenFilterFactory tf = facs[i];
        if (tf instanceof StopFilterFactory) {
            stopIdx = i;
            break;
        }
    }

    if (stopIdx == -1) {
        // no stop filter exists
        map.put(fieldName, qa);
        return qa.tokenStream(fieldName, reader);
    }

    TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length - 1];
    for (int i = 0, j = 0; i < facs.length; i++) {
        if (i == stopIdx)
            continue;
        newtf[j++] = facs[i];
    }

    TokenizerChain newa = new TokenizerChain(tcq.getTokenizerFactory(), newtf);
    newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName));

    map.put(fieldName, newa);
    return newa.tokenStream(fieldName, reader);
}

From source file:org.alfresco.solr.AlfrescoFieldType.java

License:Open Source License

private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) {
    if (queryAnalyzer == null)
        return null;

    if (!(queryAnalyzer instanceof TokenizerChain)) {
        return new KeywordAnalyzer();
    }/*from ww  w  . ja  va  2 s. c  om*/

    TokenizerChain tc = (TokenizerChain) queryAnalyzer;
    MultiTermChainBuilder builder = new MultiTermChainBuilder();

    CharFilterFactory[] charFactories = tc.getCharFilterFactories();
    if (charFactories != null) {
        for (CharFilterFactory fact : charFactories) {
            builder.add(fact);
        }
    }

    builder.add(tc.getTokenizerFactory());

    for (TokenFilterFactory fact : tc.getTokenFilterFactories()) {
        builder.add(fact);
    }

    return builder.build();
}