List of usage examples for org.apache.solr.analysis TokenizerChain getTokenFilterFactories
public TokenFilterFactory[] getTokenFilterFactories()
From source file:com.sindicetech.siren.solr.schema.ConciseJsonField.java
License:Open Source License
/** * Load the config when resource loader initialized. * * @param resourceLoader The resource loader. *///from w ww.ja v a 2 s. c o m @Override public void inform(final ResourceLoader resourceLoader) { super.inform(resourceLoader); // if there was a attributeWildcard parameter defined, updates the configuration of the PathEncodingFilterFactory if (this.hasAttributeWildcard) { final TokenizerChain chain = (TokenizerChain) this.getIndexAnalyzer(); for (TokenFilterFactory tokenFilterFactory : chain.getTokenFilterFactories()) { if (tokenFilterFactory instanceof PathEncodingFilterFactory) { ((PathEncodingFilterFactory) tokenFilterFactory).setAttributeWildcard(this.attributeWildcard); } } } }
From source file:com.sindicetech.siren.solr.schema.ConciseJsonField.java
License:Open Source License
/** * Append the mandatory SIREn filters for the concise model, i.e., * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory}, * {@link com.sindicetech.siren.solr.analysis.PathEncodingFilterFactory}, * {@link com.sindicetech.siren.solr.analysis.PositionAttributeFilterFactory} and * {@link com.sindicetech.siren.solr.analysis.SirenPayloadFilterFactory}, to the tokenizer chain. * * @see ExtendedJsonField#appendSirenFilters(org.apache.lucene.analysis.Analyzer, java.util.Map) *//*from w w w .ja v a 2s. c o m*/ @Override protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) { if (!(analyzer instanceof TokenizerChain)) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid index analyzer '" + analyzer.getClass() + "' received"); } final TokenizerChain chain = (TokenizerChain) analyzer; // copy the existing list of token filters final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 4]; System.arraycopy(old, 0, filterFactories, 0, old.length); // append the datatype analyzer filter factory final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory( new HashMap<String, String>()); datatypeFactory.register(datatypes); filterFactories[old.length] = datatypeFactory; // append the path encoding filter factory filterFactories[old.length + 1] = new PathEncodingFilterFactory(new HashMap<String, String>()); // append the position attribute filter factory filterFactories[old.length + 2] = new PositionAttributeFilterFactory(new HashMap<String, String>()); // append the siren payload filter factory filterFactories[old.length + 3] = new SirenPayloadFilterFactory(new HashMap<String, String>()); // create a new tokenizer chain with the updated list of filter factories return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories); }
From source file:com.sindicetech.siren.solr.schema.ExtendedJsonField.java
License:Open Source License
/** * Load the datatype config when resource loader initialized. * * @param resourceLoader The resource loader. */// w ww .j a v a2 s .co m @Override public void inform(final ResourceLoader resourceLoader) { // load the datatypes this.loadDatatypeConfig((SolrResourceLoader) resourceLoader); // Register the datatypes in the DatatypeAnalyzerFilterFactory instance final TokenizerChain chain = (TokenizerChain) this.getIndexAnalyzer(); for (TokenFilterFactory tokenFilterFactory : chain.getTokenFilterFactories()) { if (tokenFilterFactory instanceof DatatypeAnalyzerFilterFactory) { ((DatatypeAnalyzerFilterFactory) tokenFilterFactory).register(this.getDatatypes()); } } }
From source file:com.sindicetech.siren.solr.schema.ExtendedJsonField.java
License:Open Source License
/** * Append the mandatory SIREn filters, i.e., * {@link DatatypeAnalyzerFilterFactory}, * {@link PositionAttributeFilterFactory} and * {@link SirenPayloadFilterFactory}, to the tokenizer chain. * <br/>// w w w. j a v a2 s. c om * The first time this is called, it will create a * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory} with no datatype registered. The datatypes * will be loaded and registered later, when {@link #inform(org.apache.lucene.analysis.util.ResourceLoader)} is * called. * <br/> * This is necessary to avoid having to call {@link org.apache.solr.schema.IndexSchema#refreshAnalyzers()}. * The {@link org.apache.solr.schema.IndexSchema} will have a reference to the SIREn field's analyzer, and * to the {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory}. When the datatypes will be loaded, * we will access this reference, and register the datatypes. */ protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) { if (!(analyzer instanceof TokenizerChain)) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid index analyzer '" + analyzer.getClass() + "' received"); } final TokenizerChain chain = (TokenizerChain) analyzer; // copy the existing list of token filters final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3]; System.arraycopy(old, 0, filterFactories, 0, old.length); // append the datatype analyzer filter factory final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory( new HashMap<String, String>()); datatypeFactory.register(datatypes); filterFactories[old.length] = datatypeFactory; // append the position attribute filter factory filterFactories[old.length + 1] = new PositionAttributeFilterFactory(new HashMap<String, String>()); // append the siren payload filter factory filterFactories[old.length + 2] = new SirenPayloadFilterFactory(new HashMap<String, String>()); // create a new tokenizer chain with the updated list of filter factories return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories); }
From source file:com.sindicetech.siren.solr.schema.TestConciseJsonField.java
License:Open Source License
@Test public void testConciseSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField json = schema.getField("concise"); final FieldType tmp = json.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ConciseJsonTokenizerFactory); // 4 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(4, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PathEncodingFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[3] instanceof SirenPayloadFilterFactory); }
From source file:com.sindicetech.siren.solr.schema.TestExtendedJsonField.java
License:Open Source License
@Test public void testSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ExtendedJsonTokenizerFactory); // 3 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(3, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory); }
From source file:com.sindicetech.siren.solr.schema.TestExtendedJsonField.java
License:Open Source License
@Test public void testSirenFieldDatatypeAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0]; assertNotNull(f.getDatatypeAnalyzers()); assertEquals(9, f.getDatatypeAnalyzers().size()); assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int")); assertTrue(//from w w w. j a va 2 s . co m f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer); final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers() .get("http://www.w3.org/2001/XMLSchema#int"); assertEquals(8, a.getPrecisionStep()); assertEquals(32, a.getNumericParser().getValueSize()); assertEquals(NumericType.INT, a.getNumericParser().getNumericType()); }
From source file:NomusSolrPlugins.NomusDismaxQParserPlugin.java
License:Apache License
public static StopFilterFactory getQueryStopFilter(QParser parser, String fieldName) { TokenizerChain tcq = getQueryTokenizerChain(parser, fieldName); if (tcq == null) return null; TokenFilterFactory[] facs = tcq.getTokenFilterFactories(); for (int i = 0; i < facs.length; i++) { TokenFilterFactory tf = facs[i]; if (tf instanceof StopFilterFactory) { return (StopFilterFactory) tf; }/*from w w w . j ava 2 s.c o m*/ } return null; }
From source file:NomusSolrPlugins.NomusDismaxQParserPlugin.java
License:Apache License
public TokenStream tokenStream(String fieldName, Reader reader) { if (!removeStopFilter) { return queryAnalyzer.tokenStream(fieldName, reader); }//from w ww . j a va 2 s .c om Analyzer a = map.get(fieldName); if (a != null) { return a.tokenStream(fieldName, reader); } FieldType ft = parser.getReq().getSchema().getFieldType(fieldName); Analyzer qa = ft.getQueryAnalyzer(); if (!(qa instanceof TokenizerChain)) { map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } TokenizerChain tcq = (TokenizerChain) qa; Analyzer ia = ft.getAnalyzer(); if (ia == qa || !(ia instanceof TokenizerChain)) { map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } TokenizerChain tci = (TokenizerChain) ia; // make sure that there isn't a stop filter in the indexer for (TokenFilterFactory tf : tci.getTokenFilterFactories()) { if (tf instanceof StopFilterFactory) { map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } } // now if there is a stop filter in the query analyzer, remove it int stopIdx = -1; TokenFilterFactory[] facs = tcq.getTokenFilterFactories(); for (int i = 0; i < facs.length; i++) { TokenFilterFactory tf = facs[i]; if (tf instanceof StopFilterFactory) { stopIdx = i; break; } } if (stopIdx == -1) { // no stop filter exists map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length - 1]; for (int i = 0, j = 0; i < facs.length; i++) { if (i == stopIdx) continue; newtf[j++] = facs[i]; } TokenizerChain newa = new TokenizerChain(tcq.getTokenizerFactory(), newtf); newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName)); map.put(fieldName, newa); return newa.tokenStream(fieldName, reader); }
From source file:org.alfresco.solr.AlfrescoFieldType.java
License:Open Source License
private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) { if (queryAnalyzer == null) return null; if (!(queryAnalyzer instanceof TokenizerChain)) { return new KeywordAnalyzer(); }/*from ww w . ja va 2 s. c om*/ TokenizerChain tc = (TokenizerChain) queryAnalyzer; MultiTermChainBuilder builder = new MultiTermChainBuilder(); CharFilterFactory[] charFactories = tc.getCharFilterFactories(); if (charFactories != null) { for (CharFilterFactory fact : charFactories) { builder.add(fact); } } builder.add(tc.getTokenizerFactory()); for (TokenFilterFactory fact : tc.getTokenFilterFactories()) { builder.add(fact); } return builder.build(); }