List of usage examples for org.apache.solr.analysis TokenizerChain getTokenizerFactory
public TokenizerFactory getTokenizerFactory()
From source file:com.sindicetech.siren.solr.schema.ConciseJsonField.java
License:Open Source License
/** * Append the mandatory SIREn filters for the concise model, i.e., * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory}, * {@link com.sindicetech.siren.solr.analysis.PathEncodingFilterFactory}, * {@link com.sindicetech.siren.solr.analysis.PositionAttributeFilterFactory} and * {@link com.sindicetech.siren.solr.analysis.SirenPayloadFilterFactory}, to the tokenizer chain. * * @see ExtendedJsonField#appendSirenFilters(org.apache.lucene.analysis.Analyzer, java.util.Map) *//*from www . j a va 2 s . c o m*/ @Override protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) { if (!(analyzer instanceof TokenizerChain)) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid index analyzer '" + analyzer.getClass() + "' received"); } final TokenizerChain chain = (TokenizerChain) analyzer; // copy the existing list of token filters final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 4]; System.arraycopy(old, 0, filterFactories, 0, old.length); // append the datatype analyzer filter factory final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory( new HashMap<String, String>()); datatypeFactory.register(datatypes); filterFactories[old.length] = datatypeFactory; // append the path encoding filter factory filterFactories[old.length + 1] = new PathEncodingFilterFactory(new HashMap<String, String>()); // append the position attribute filter factory filterFactories[old.length + 2] = new PositionAttributeFilterFactory(new HashMap<String, String>()); // append the siren payload filter factory filterFactories[old.length + 3] = new SirenPayloadFilterFactory(new HashMap<String, String>()); // create a new tokenizer chain with the updated list of filter factories return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories); }
From source file:com.sindicetech.siren.solr.schema.ExtendedJsonField.java
License:Open Source License
/** * Append the mandatory SIREn filters, i.e., * {@link DatatypeAnalyzerFilterFactory}, * {@link PositionAttributeFilterFactory} and * {@link SirenPayloadFilterFactory}, to the tokenizer chain. * <br/>/*from w ww . j ava 2 s. c o m*/ * The first time this is called, it will create a * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory} with no datatype registered. The datatypes * will be loaded and registered later, when {@link #inform(org.apache.lucene.analysis.util.ResourceLoader)} is * called. * <br/> * This is necessary to avoid having to call {@link org.apache.solr.schema.IndexSchema#refreshAnalyzers()}. * The {@link org.apache.solr.schema.IndexSchema} will have a reference to the SIREn field's analyzer, and * to the {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory}. When the datatypes will be loaded, * we will access this reference, and register the datatypes. */ protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) { if (!(analyzer instanceof TokenizerChain)) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid index analyzer '" + analyzer.getClass() + "' received"); } final TokenizerChain chain = (TokenizerChain) analyzer; // copy the existing list of token filters final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3]; System.arraycopy(old, 0, filterFactories, 0, old.length); // append the datatype analyzer filter factory final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory( new HashMap<String, String>()); datatypeFactory.register(datatypes); filterFactories[old.length] = datatypeFactory; // append the position attribute filter factory filterFactories[old.length + 1] = new PositionAttributeFilterFactory(new HashMap<String, String>()); // append the siren payload filter factory filterFactories[old.length + 2] = new SirenPayloadFilterFactory(new HashMap<String, String>()); // create a new tokenizer chain with the updated list of filter factories return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories); }
From source file:com.sindicetech.siren.solr.schema.TestConciseJsonField.java
License:Open Source License
@Test public void testConciseSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField json = schema.getField("concise"); final FieldType tmp = json.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ConciseJsonTokenizerFactory); // 4 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(4, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PathEncodingFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[3] instanceof SirenPayloadFilterFactory); }
From source file:com.sindicetech.siren.solr.schema.TestExtendedJsonField.java
License:Open Source License
@Test public void testSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ExtendedJsonTokenizerFactory); // 3 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(3, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory); }
From source file:com.sindicetech.siren.solr.schema.TestExtendedJsonField.java
License:Open Source License
@Test public void testSirenFieldDatatypeAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0]; assertNotNull(f.getDatatypeAnalyzers()); assertEquals(9, f.getDatatypeAnalyzers().size()); assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int")); assertTrue(//from ww w .ja v a 2s . com f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer); final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers() .get("http://www.w3.org/2001/XMLSchema#int"); assertEquals(8, a.getPrecisionStep()); assertEquals(32, a.getNumericParser().getValueSize()); assertEquals(NumericType.INT, a.getNumericParser().getNumericType()); }
From source file:NomusSolrPlugins.NomusDismaxQParserPlugin.java
License:Apache License
public TokenStream tokenStream(String fieldName, Reader reader) { if (!removeStopFilter) { return queryAnalyzer.tokenStream(fieldName, reader); }// w ww. j av a2 s .co m Analyzer a = map.get(fieldName); if (a != null) { return a.tokenStream(fieldName, reader); } FieldType ft = parser.getReq().getSchema().getFieldType(fieldName); Analyzer qa = ft.getQueryAnalyzer(); if (!(qa instanceof TokenizerChain)) { map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } TokenizerChain tcq = (TokenizerChain) qa; Analyzer ia = ft.getAnalyzer(); if (ia == qa || !(ia instanceof TokenizerChain)) { map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } TokenizerChain tci = (TokenizerChain) ia; // make sure that there isn't a stop filter in the indexer for (TokenFilterFactory tf : tci.getTokenFilterFactories()) { if (tf instanceof StopFilterFactory) { map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } } // now if there is a stop filter in the query analyzer, remove it int stopIdx = -1; TokenFilterFactory[] facs = tcq.getTokenFilterFactories(); for (int i = 0; i < facs.length; i++) { TokenFilterFactory tf = facs[i]; if (tf instanceof StopFilterFactory) { stopIdx = i; break; } } if (stopIdx == -1) { // no stop filter exists map.put(fieldName, qa); return qa.tokenStream(fieldName, reader); } TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length - 1]; for (int i = 0, j = 0; i < facs.length; i++) { if (i == stopIdx) continue; newtf[j++] = facs[i]; } TokenizerChain newa = new TokenizerChain(tcq.getTokenizerFactory(), newtf); newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName)); map.put(fieldName, newa); return newa.tokenStream(fieldName, reader); }
From source file:org.alfresco.solr.AlfrescoFieldType.java
License:Open Source License
private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) { if (queryAnalyzer == null) return null; if (!(queryAnalyzer instanceof TokenizerChain)) { return new KeywordAnalyzer(); }//from w ww .j a va2 s .c o m TokenizerChain tc = (TokenizerChain) queryAnalyzer; MultiTermChainBuilder builder = new MultiTermChainBuilder(); CharFilterFactory[] charFactories = tc.getCharFilterFactories(); if (charFactories != null) { for (CharFilterFactory fact : charFactories) { builder.add(fact); } } builder.add(tc.getTokenizerFactory()); for (TokenFilterFactory fact : tc.getTokenFilterFactories()) { builder.add(fact); } return builder.build(); }
From source file:org.sindice.siren.solr.schema.TestSirenField.java
License:Open Source License
@Test public void testSirenFieldTopLevelAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getSchema(); final SchemaField ntriple = schema.getField("ntriple"); final FieldType tmp = ntriple.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof TupleTokenizerFactory); // 3 filters for index assertNotNull(ts.getTokenFilterFactories()); assertEquals(3, ts.getTokenFilterFactories().length); assertTrue(tmp.getQueryAnalyzer() instanceof TokenizerChain); ts = (TokenizerChain) tmp.getQueryAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory); // 6 filters for keyword-query assertNotNull(ts.getTokenFilterFactories()); assertEquals(6, ts.getTokenFilterFactories().length); }
From source file:org.sindice.siren.solr.schema.TestSirenField.java
License:Open Source License
@Test public void testSirenFieldDatatypeAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getSchema(); final SchemaField ntriple = schema.getField("ntriple"); final FieldType tmp = ntriple.getType(); TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertTrue(ts.getTokenFilterFactories()[1] instanceof DatatypeAnalyzerFilterFactory); final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[1]; assertNotNull(f.getDatatypeAnalyzers()); // three datatypes are defined assertEquals(6, f.getDatatypeAnalyzers().size()); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#anyURI")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#anyURI"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("xsd:int")); assertTrue(f.getDatatypeAnalyzers().get("xsd:int") instanceof IntNumericAnalyzer); final NumericAnalyzer a = (NumericAnalyzer) f.getDatatypeAnalyzers().get("xsd:int"); assertEquals(8, a.getPrecisionStep()); assertEquals(DataType.INT, a.getNumericType()); }