Example usage for org.apache.solr.analysis TokenizerChain getTokenizerFactory

List of usage examples for org.apache.solr.analysis TokenizerChain getTokenizerFactory

Introduction

In this page you can find the example usage for org.apache.solr.analysis TokenizerChain getTokenizerFactory.

Prototype

public TokenizerFactory getTokenizerFactory() 

Source Link

Usage

From source file:com.sindicetech.siren.solr.schema.ConciseJsonField.java

License:Open Source License

/**
 * Append the mandatory SIREn filters for the concise model, i.e.,
 * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory},
 * {@link com.sindicetech.siren.solr.analysis.PathEncodingFilterFactory},
 * {@link com.sindicetech.siren.solr.analysis.PositionAttributeFilterFactory} and
 * {@link com.sindicetech.siren.solr.analysis.SirenPayloadFilterFactory}, to the tokenizer chain.
 *
 * @see ExtendedJsonField#appendSirenFilters(org.apache.lucene.analysis.Analyzer, java.util.Map)
 *//*from www  .  j  a va 2  s  .  c  o  m*/
@Override
protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) {
    if (!(analyzer instanceof TokenizerChain)) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                "Invalid index analyzer '" + analyzer.getClass() + "' received");
    }

    final TokenizerChain chain = (TokenizerChain) analyzer;
    // copy the existing list of token filters
    final TokenFilterFactory[] old = chain.getTokenFilterFactories();
    final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 4];
    System.arraycopy(old, 0, filterFactories, 0, old.length);
    // append the datatype analyzer filter factory
    final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory(
            new HashMap<String, String>());
    datatypeFactory.register(datatypes);
    filterFactories[old.length] = datatypeFactory;
    // append the path encoding filter factory
    filterFactories[old.length + 1] = new PathEncodingFilterFactory(new HashMap<String, String>());
    // append the position attribute filter factory
    filterFactories[old.length + 2] = new PositionAttributeFilterFactory(new HashMap<String, String>());
    // append the siren payload filter factory
    filterFactories[old.length + 3] = new SirenPayloadFilterFactory(new HashMap<String, String>());
    // create a new tokenizer chain with the updated list of filter factories
    return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories);
}

From source file:com.sindicetech.siren.solr.schema.ExtendedJsonField.java

License:Open Source License

/**
 * Append the mandatory SIREn filters, i.e.,
 * {@link DatatypeAnalyzerFilterFactory},
 * {@link PositionAttributeFilterFactory} and
 * {@link SirenPayloadFilterFactory}, to the tokenizer chain.
 * <br/>/*from  w ww .  j ava  2  s.  c o  m*/
 * The first time this is called, it will create a
 * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory} with no datatype registered. The datatypes
 * will be loaded and registered later, when {@link #inform(org.apache.lucene.analysis.util.ResourceLoader)} is
 * called.
 * <br/>
 * This is necessary to avoid having to call {@link org.apache.solr.schema.IndexSchema#refreshAnalyzers()}.
 * The {@link org.apache.solr.schema.IndexSchema} will have a reference to the SIREn field's analyzer, and
 * to the {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory}. When the datatypes will be loaded,
 * we will access this reference, and register the datatypes.
 */
protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) {
    if (!(analyzer instanceof TokenizerChain)) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                "Invalid index analyzer '" + analyzer.getClass() + "' received");
    }

    final TokenizerChain chain = (TokenizerChain) analyzer;
    // copy the existing list of token filters
    final TokenFilterFactory[] old = chain.getTokenFilterFactories();
    final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3];
    System.arraycopy(old, 0, filterFactories, 0, old.length);
    // append the datatype analyzer filter factory
    final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory(
            new HashMap<String, String>());
    datatypeFactory.register(datatypes);
    filterFactories[old.length] = datatypeFactory;
    // append the position attribute filter factory
    filterFactories[old.length + 1] = new PositionAttributeFilterFactory(new HashMap<String, String>());
    // append the siren payload filter factory
    filterFactories[old.length + 2] = new SirenPayloadFilterFactory(new HashMap<String, String>());
    // create a new tokenizer chain with the updated list of filter factories
    return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories);
}

From source file:com.sindicetech.siren.solr.schema.TestConciseJsonField.java

License:Open Source License

@Test
public void testConciseSirenFieldAnalyzer() throws Exception {
    final IndexSchema schema = h.getCore().getLatestSchema();
    final SchemaField json = schema.getField("concise");
    final FieldType tmp = json.getType();

    assertTrue(tmp.getAnalyzer() instanceof TokenizerChain);
    final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof ConciseJsonTokenizerFactory);

    // 4 filters for index analyzer
    assertNotNull(ts.getTokenFilterFactories());
    assertEquals(4, ts.getTokenFilterFactories().length);
    assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[1] instanceof PathEncodingFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[2] instanceof PositionAttributeFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[3] instanceof SirenPayloadFilterFactory);
}

From source file:com.sindicetech.siren.solr.schema.TestExtendedJsonField.java

License:Open Source License

@Test
public void testSirenFieldAnalyzer() throws Exception {
    final IndexSchema schema = h.getCore().getLatestSchema();
    final SchemaField ntriple = schema.getField(JSON_FIELD);
    final FieldType tmp = ntriple.getType();

    assertTrue(tmp.getAnalyzer() instanceof TokenizerChain);
    final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof ExtendedJsonTokenizerFactory);

    // 3 filters for index analyzer
    assertNotNull(ts.getTokenFilterFactories());
    assertEquals(3, ts.getTokenFilterFactories().length);
    assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory);
    assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory);
}

From source file:com.sindicetech.siren.solr.schema.TestExtendedJsonField.java

License:Open Source License

@Test
public void testSirenFieldDatatypeAnalyzer() throws Exception {
    final IndexSchema schema = h.getCore().getLatestSchema();
    final SchemaField ntriple = schema.getField(JSON_FIELD);
    final FieldType tmp = ntriple.getType();

    TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();

    assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory);
    final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0];
    assertNotNull(f.getDatatypeAnalyzers());
    assertEquals(9, f.getDatatypeAnalyzers().size());

    assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field"));
    ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field");
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory);

    assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"));
    ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string");
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory);

    assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int"));
    assertTrue(//from   ww  w .ja v a  2s .  com
            f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer);
    final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers()
            .get("http://www.w3.org/2001/XMLSchema#int");
    assertEquals(8, a.getPrecisionStep());
    assertEquals(32, a.getNumericParser().getValueSize());
    assertEquals(NumericType.INT, a.getNumericParser().getNumericType());
}

From source file:NomusSolrPlugins.NomusDismaxQParserPlugin.java

License:Apache License

public TokenStream tokenStream(String fieldName, Reader reader) {
    if (!removeStopFilter) {
        return queryAnalyzer.tokenStream(fieldName, reader);
    }// w  ww. j av  a2 s .co m

    Analyzer a = map.get(fieldName);
    if (a != null) {
        return a.tokenStream(fieldName, reader);
    }

    FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
    Analyzer qa = ft.getQueryAnalyzer();
    if (!(qa instanceof TokenizerChain)) {
        map.put(fieldName, qa);
        return qa.tokenStream(fieldName, reader);
    }
    TokenizerChain tcq = (TokenizerChain) qa;
    Analyzer ia = ft.getAnalyzer();
    if (ia == qa || !(ia instanceof TokenizerChain)) {
        map.put(fieldName, qa);
        return qa.tokenStream(fieldName, reader);
    }
    TokenizerChain tci = (TokenizerChain) ia;

    // make sure that there isn't a stop filter in the indexer
    for (TokenFilterFactory tf : tci.getTokenFilterFactories()) {
        if (tf instanceof StopFilterFactory) {
            map.put(fieldName, qa);
            return qa.tokenStream(fieldName, reader);
        }
    }

    // now if there is a stop filter in the query analyzer, remove it
    int stopIdx = -1;
    TokenFilterFactory[] facs = tcq.getTokenFilterFactories();

    for (int i = 0; i < facs.length; i++) {
        TokenFilterFactory tf = facs[i];
        if (tf instanceof StopFilterFactory) {
            stopIdx = i;
            break;
        }
    }

    if (stopIdx == -1) {
        // no stop filter exists
        map.put(fieldName, qa);
        return qa.tokenStream(fieldName, reader);
    }

    TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length - 1];
    for (int i = 0, j = 0; i < facs.length; i++) {
        if (i == stopIdx)
            continue;
        newtf[j++] = facs[i];
    }

    TokenizerChain newa = new TokenizerChain(tcq.getTokenizerFactory(), newtf);
    newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName));

    map.put(fieldName, newa);
    return newa.tokenStream(fieldName, reader);
}

From source file:org.alfresco.solr.AlfrescoFieldType.java

License:Open Source License

private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) {
    if (queryAnalyzer == null)
        return null;

    if (!(queryAnalyzer instanceof TokenizerChain)) {
        return new KeywordAnalyzer();
    }//from   w  ww  .j a va2 s .c  o  m

    TokenizerChain tc = (TokenizerChain) queryAnalyzer;
    MultiTermChainBuilder builder = new MultiTermChainBuilder();

    CharFilterFactory[] charFactories = tc.getCharFilterFactories();
    if (charFactories != null) {
        for (CharFilterFactory fact : charFactories) {
            builder.add(fact);
        }
    }

    builder.add(tc.getTokenizerFactory());

    for (TokenFilterFactory fact : tc.getTokenFilterFactories()) {
        builder.add(fact);
    }

    return builder.build();
}

From source file:org.sindice.siren.solr.schema.TestSirenField.java

License:Open Source License

@Test
public void testSirenFieldTopLevelAnalyzer() throws Exception {
    final IndexSchema schema = h.getCore().getSchema();
    final SchemaField ntriple = schema.getField("ntriple");
    final FieldType tmp = ntriple.getType();

    assertTrue(tmp.getAnalyzer() instanceof TokenizerChain);
    TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof TupleTokenizerFactory);

    // 3 filters for index
    assertNotNull(ts.getTokenFilterFactories());
    assertEquals(3, ts.getTokenFilterFactories().length);

    assertTrue(tmp.getQueryAnalyzer() instanceof TokenizerChain);
    ts = (TokenizerChain) tmp.getQueryAnalyzer();
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory);

    // 6 filters for keyword-query
    assertNotNull(ts.getTokenFilterFactories());
    assertEquals(6, ts.getTokenFilterFactories().length);
}

From source file:org.sindice.siren.solr.schema.TestSirenField.java

License:Open Source License

@Test
public void testSirenFieldDatatypeAnalyzer() throws Exception {
    final IndexSchema schema = h.getCore().getSchema();
    final SchemaField ntriple = schema.getField("ntriple");
    final FieldType tmp = ntriple.getType();

    TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer();

    assertTrue(ts.getTokenFilterFactories()[1] instanceof DatatypeAnalyzerFilterFactory);
    final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[1];
    assertNotNull(f.getDatatypeAnalyzers());
    // three datatypes are defined
    assertEquals(6, f.getDatatypeAnalyzers().size());

    assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#anyURI"));
    ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#anyURI");
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory);

    assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"));
    ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string");
    assertNotNull(ts.getTokenizerFactory());
    assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory);

    assertNotNull(f.getDatatypeAnalyzers().get("xsd:int"));
    assertTrue(f.getDatatypeAnalyzers().get("xsd:int") instanceof IntNumericAnalyzer);
    final NumericAnalyzer a = (NumericAnalyzer) f.getDatatypeAnalyzers().get("xsd:int");
    assertEquals(8, a.getPrecisionStep());
    assertEquals(DataType.INT, a.getNumericType());
}