List of usage examples for org.apache.solr.analysis TokenizerChain TokenizerChain
public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer,
TokenFilterFactory[] filters)
From source file:com.sindicetech.siren.solr.schema.AnalyzerConfigReader.java
License:Open Source License
/** * Read an analyzer definition and instantiate an {@link Analyzer} object. * * <p> Code taken from {@link org.apache.solr.schema.FieldTypePluginLoader#readAnalyzer(org.w3c.dom.Node)}} * * @param node An analyzer node from the config file * @return An analyzer// w ww . j a v a2 s .c o m * @throws XPathExpressionException If an XPath expression cannot be evaluated */ protected static Analyzer readAnalyzer(final Node node, final SolrResourceLoader loader, final Version luceneMatchVersion) throws XPathExpressionException { if (node == null) return null; final NamedNodeMap attrs = node.getAttributes(); final String analyzerName = DOMUtil.getAttr(attrs, "class"); // check for all of these up front, so we can error if used in // conjunction with an explicit analyzer class. final XPath xpath = XPathFactory.newInstance().newXPath(); final NodeList charFilterNodes = (NodeList) xpath.evaluate("./charFilter", node, XPathConstants.NODESET); final NodeList tokenizerNodes = (NodeList) xpath.evaluate("./tokenizer", node, XPathConstants.NODESET); final NodeList tokenFilterNodes = (NodeList) xpath.evaluate("./filter", node, XPathConstants.NODESET); if (analyzerName != null) { // explicitly check for child analysis factories instead of // just any child nodes, because the user might have their // own custom nodes (ie: <description> or something like that) if (0 != charFilterNodes.getLength() || 0 != tokenizerNodes.getLength() || 0 != tokenFilterNodes.getLength()) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Configuration Error: Analyzer class='" + analyzerName + "' can not be combined with nested analysis factories"); } try { // No need to be core-aware as Analyzers are not in the core-aware list final Class<? extends Analyzer> clazz = loader.findClass(analyzerName, Analyzer.class); try { // first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore) final Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class); final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM); final Version matchVersion = (matchVersionStr == null) ? luceneMatchVersion : Config.parseLuceneVersionString(matchVersionStr); if (matchVersion == null) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Configuration Error: Analyzer '" + clazz.getName() + "' needs a 'luceneMatchVersion' parameter"); } return cnstr.newInstance(matchVersion); } catch (final NoSuchMethodException nsme) { // otherwise use default ctor return clazz.newInstance(); } } catch (final Exception e) { logger.error("Cannot load analyzer: " + analyzerName, e); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Cannot load analyzer: " + analyzerName, e); } } // Load the CharFilters // -------------------------------------------------------------------------------- final ArrayList<CharFilterFactory> charFilters = new ArrayList<CharFilterFactory>(); final AbstractPluginLoader<CharFilterFactory> charFilterLoader = new AbstractPluginLoader<CharFilterFactory>( "[analyzerConfig] analyzer/charFilter", CharFilterFactory.class, false, false) { @Override protected CharFilterFactory create(final SolrResourceLoader loader, final String name, final String className, final Node node) throws Exception { final Map<String, String> params = DOMUtil.toMap(node.getAttributes()); String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM); params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, CharFilterFactory.class.getSimpleName(), luceneMatchVersion).toString()); CharFilterFactory factory = loader.newInstance(className, CharFilterFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params }); factory.setExplicitLuceneMatchVersion(null != configuredVersion); return factory; } @Override protected void init(final CharFilterFactory plugin, final Node node) throws Exception { if (plugin != null) { charFilters.add(plugin); } } @Override protected CharFilterFactory register(final String name, final CharFilterFactory plugin) { return null; // used for map registration } }; charFilterLoader.load(loader, charFilterNodes); // Load the Tokenizer // Although an analyzer only allows a single Tokenizer, we load a list to make sure // the configuration is ok // -------------------------------------------------------------------------------- final ArrayList<TokenizerFactory> tokenizers = new ArrayList<TokenizerFactory>(1); final AbstractPluginLoader<TokenizerFactory> tokenizerLoader = new AbstractPluginLoader<TokenizerFactory>( "[analyzerConfig] analyzer/tokenizer", TokenizerFactory.class, false, false) { @Override protected TokenizerFactory create(final SolrResourceLoader loader, final String name, final String className, final Node node) throws Exception { final Map<String, String> params = DOMUtil.toMap(node.getAttributes()); String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM); params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, TokenizerFactory.class.getSimpleName(), luceneMatchVersion).toString()); TokenizerFactory factory = loader.newInstance(className, TokenizerFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params }); factory.setExplicitLuceneMatchVersion(null != configuredVersion); return factory; } @Override protected void init(final TokenizerFactory plugin, final Node node) throws Exception { if (!tokenizers.isEmpty()) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Multiple tokenizers defined for: " + node); } tokenizers.add(plugin); } @Override protected TokenizerFactory register(final String name, final TokenizerFactory plugin) { return null; // used for map registration } }; tokenizerLoader.load(loader, tokenizerNodes); // Make sure something was loaded if (tokenizers.isEmpty()) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "analyzer without class or tokenizer & filter list"); } // Load the Filters // -------------------------------------------------------------------------------- final ArrayList<TokenFilterFactory> filters = new ArrayList<TokenFilterFactory>(); final AbstractPluginLoader<TokenFilterFactory> filterLoader = new AbstractPluginLoader<TokenFilterFactory>( "[analyzerConfig] analyzer/filter", TokenFilterFactory.class, false, false) { @Override protected TokenFilterFactory create(final SolrResourceLoader loader, final String name, final String className, final Node node) throws Exception { final Map<String, String> params = DOMUtil.toMap(node.getAttributes()); String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM); params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, TokenFilterFactory.class.getSimpleName(), luceneMatchVersion).toString()); TokenFilterFactory factory = loader.newInstance(className, TokenFilterFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params }); factory.setExplicitLuceneMatchVersion(null != configuredVersion); return factory; } @Override protected void init(final TokenFilterFactory plugin, final Node node) throws Exception { if (plugin != null) { filters.add(plugin); } } @Override protected TokenFilterFactory register(final String name, final TokenFilterFactory plugin) throws Exception { return null; // used for map registration } }; filterLoader.load(loader, tokenFilterNodes); return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]), tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()])); }
From source file:com.sindicetech.siren.solr.schema.ConciseJsonField.java
License:Open Source License
/** * Append the mandatory SIREn filters for the concise model, i.e., * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory}, * {@link com.sindicetech.siren.solr.analysis.PathEncodingFilterFactory}, * {@link com.sindicetech.siren.solr.analysis.PositionAttributeFilterFactory} and * {@link com.sindicetech.siren.solr.analysis.SirenPayloadFilterFactory}, to the tokenizer chain. * * @see ExtendedJsonField#appendSirenFilters(org.apache.lucene.analysis.Analyzer, java.util.Map) *///from www.j av a 2 s.co m @Override protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) { if (!(analyzer instanceof TokenizerChain)) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid index analyzer '" + analyzer.getClass() + "' received"); } final TokenizerChain chain = (TokenizerChain) analyzer; // copy the existing list of token filters final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 4]; System.arraycopy(old, 0, filterFactories, 0, old.length); // append the datatype analyzer filter factory final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory( new HashMap<String, String>()); datatypeFactory.register(datatypes); filterFactories[old.length] = datatypeFactory; // append the path encoding filter factory filterFactories[old.length + 1] = new PathEncodingFilterFactory(new HashMap<String, String>()); // append the position attribute filter factory filterFactories[old.length + 2] = new PositionAttributeFilterFactory(new HashMap<String, String>()); // append the siren payload filter factory filterFactories[old.length + 3] = new SirenPayloadFilterFactory(new HashMap<String, String>()); // create a new tokenizer chain with the updated list of filter factories return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories); }
From source file:com.sindicetech.siren.solr.schema.ExtendedJsonField.java
License:Open Source License
@Override protected void init(final IndexSchema schema, final Map<String, String> args) { // first call TextField.init to set omitTermFreqAndPositions to false super.init(schema, args); this.checkFieldTypeProperties(); // initialise specific SIREn's properties this.datatypeAnalyzerConfigPath = args.remove(DATATYPECONFIG_KEY); if (datatypeAnalyzerConfigPath == null) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ExtendedJsonField types require a '" + DATATYPECONFIG_KEY + "' parameter: " + this.typeName); }/*from ww w .j av a 2s .com*/ // set the posting format args.put("postingsFormat", Siren10AForPostingsFormat.NAME); this.luceneDefaultVersion = schema.getDefaultLuceneMatchVersion(); // instantiate the index analyzer associated to the field Analyzer indexAnalyzer = new TokenizerChain(new CharFilterFactory[0], this.getTokenizerFactory(args), new TokenFilterFactory[0]); indexAnalyzer = this.appendSirenFilters(indexAnalyzer, this.getDatatypes()); this.setIndexAnalyzer(indexAnalyzer); super.init(schema, args); }
From source file:com.sindicetech.siren.solr.schema.ExtendedJsonField.java
License:Open Source License
/** * Append the mandatory SIREn filters, i.e., * {@link DatatypeAnalyzerFilterFactory}, * {@link PositionAttributeFilterFactory} and * {@link SirenPayloadFilterFactory}, to the tokenizer chain. * <br/>/*w w w . j a v a2s . co m*/ * The first time this is called, it will create a * {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory} with no datatype registered. The datatypes * will be loaded and registered later, when {@link #inform(org.apache.lucene.analysis.util.ResourceLoader)} is * called. * <br/> * This is necessary to avoid having to call {@link org.apache.solr.schema.IndexSchema#refreshAnalyzers()}. * The {@link org.apache.solr.schema.IndexSchema} will have a reference to the SIREn field's analyzer, and * to the {@link com.sindicetech.siren.solr.analysis.DatatypeAnalyzerFilterFactory}. When the datatypes will be loaded, * we will access this reference, and register the datatypes. */ protected Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes) { if (!(analyzer instanceof TokenizerChain)) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid index analyzer '" + analyzer.getClass() + "' received"); } final TokenizerChain chain = (TokenizerChain) analyzer; // copy the existing list of token filters final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3]; System.arraycopy(old, 0, filterFactories, 0, old.length); // append the datatype analyzer filter factory final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory( new HashMap<String, String>()); datatypeFactory.register(datatypes); filterFactories[old.length] = datatypeFactory; // append the position attribute filter factory filterFactories[old.length + 1] = new PositionAttributeFilterFactory(new HashMap<String, String>()); // append the siren payload filter factory filterFactories[old.length + 2] = new SirenPayloadFilterFactory(new HashMap<String, String>()); // create a new tokenizer chain with the updated list of filter factories return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories); }
From source file:org.hibernate.search.impl.SolrAnalyzerBuilder.java
License:Open Source License
/** * Builds a Lucene <code>Analyzer</code> from the specified <code>AnalyzerDef</code> annotation. * * @param analyzerDef The <code>AnalyzerDef</code> annotation as found in the annotated domain class. * @param luceneMatchVersion The lucene version (required since Lucene 3.x) * * @return a Lucene <code>Analyzer</code> *//* w w w . j a v a2 s .com*/ public static Analyzer buildAnalyzer(AnalyzerDef analyzerDef, Version luceneMatchVersion) { ResourceLoader defaultResourceLoader = new HibernateSearchResourceLoader(); TokenizerDef token = analyzerDef.tokenizer(); TokenizerFactory tokenFactory = instanceFromClass(TokenizerFactory.class, token.factory(), "Tokenizer factory"); final Map<String, String> tokenMapsOfParameters = getMapOfParameters(token.params(), luceneMatchVersion); tokenFactory.init(tokenMapsOfParameters); injectResourceLoader(tokenFactory, defaultResourceLoader, tokenMapsOfParameters); final int length = analyzerDef.filters().length; final int charLength = analyzerDef.charFilters().length; TokenFilterFactory[] filters = new TokenFilterFactory[length]; CharFilterFactory[] charFilters = new CharFilterFactory[charLength]; for (int index = 0; index < length; index++) { TokenFilterDef filterDef = analyzerDef.filters()[index]; filters[index] = instanceFromClass(TokenFilterFactory.class, filterDef.factory(), "Token filter factory"); final Map<String, String> mapOfParameters = getMapOfParameters(filterDef.params(), luceneMatchVersion); filters[index].init(mapOfParameters); injectResourceLoader(filters[index], defaultResourceLoader, mapOfParameters); } for (int index = 0; index < charFilters.length; index++) { CharFilterDef charFilterDef = analyzerDef.charFilters()[index]; charFilters[index] = instanceFromClass(CharFilterFactory.class, charFilterDef.factory(), "Character filter factory"); final Map<String, String> mapOfParameters = getMapOfParameters(charFilterDef.params(), luceneMatchVersion); charFilters[index].init(mapOfParameters); injectResourceLoader(charFilters[index], defaultResourceLoader, mapOfParameters); } return new TokenizerChain(charFilters, tokenFactory, filters); }
From source file:org.sindice.siren.solr.schema.AnalyzerConfigReader.java
License:Open Source License
/** * Read an analyzer definition and instantiate an {@link Analyzer} object. * * <p> Code taken from {@link IndexSchema#readAnalyzer()} * * @param node An analyzer node from the config file * @return An analyzer/*ww w. j a v a2 s.c om*/ * @throws XPathExpressionException If an XPath expression cannot be evaluated */ protected static Analyzer readAnalyzer(final Node node, final SolrResourceLoader loader, final Version luceneMatchVersion) throws XPathExpressionException { if (node == null) return null; final NamedNodeMap attrs = node.getAttributes(); final String analyzerName = DOMUtil.getAttr(attrs, "class"); if (analyzerName != null) { // No need to be core-aware as Analyzers are not in the core-aware list final Class<? extends Analyzer> clazz = loader.findClass(analyzerName).asSubclass(Analyzer.class); try { try { // first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore) final Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class); final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM); final Version matchVersion = (matchVersionStr == null) ? luceneMatchVersion : Config.parseLuceneVersionString(matchVersionStr); if (matchVersion == null) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Configuration Error: Analyzer '" + clazz.getName() + "' needs a 'luceneMatchVersion' parameter"); } return cnstr.newInstance(matchVersion); } catch (final NoSuchMethodException nsme) { // otherwise use default ctor return clazz.newInstance(); } } catch (final Exception e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Cannot load analyzer: " + analyzerName); } } final XPath xpath = XPathFactory.newInstance().newXPath(); // Load the CharFilters // -------------------------------------------------------------------------------- final ArrayList<CharFilterFactory> charFilters = new ArrayList<CharFilterFactory>(); final AbstractPluginLoader<CharFilterFactory> charFilterLoader = new AbstractPluginLoader<CharFilterFactory>( "[analyzerConfig] analyzer/charFilter", false, false) { @Override protected void init(final CharFilterFactory plugin, final Node node) throws Exception { if (plugin != null) { final Map<String, String> params = DOMUtil.toMapExcept(node.getAttributes(), "class"); // copy the luceneMatchVersion from config, if not set if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM)) params.put(LUCENE_MATCH_VERSION_PARAM, luceneMatchVersion.toString()); plugin.init(params); charFilters.add(plugin); } } @Override protected CharFilterFactory register(final String name, final CharFilterFactory plugin) throws Exception { return null; // used for map registration } }; charFilterLoader.load(loader, (NodeList) xpath.evaluate("./charFilter", node, XPathConstants.NODESET)); // Load the Tokenizer // Although an analyzer only allows a single Tokenizer, we load a list to make sure // the configuration is ok // -------------------------------------------------------------------------------- final ArrayList<TokenizerFactory> tokenizers = new ArrayList<TokenizerFactory>(1); final AbstractPluginLoader<TokenizerFactory> tokenizerLoader = new AbstractPluginLoader<TokenizerFactory>( "[analyzerConfig] analyzer/tokenizer", false, false) { @Override protected void init(final TokenizerFactory plugin, final Node node) throws Exception { if (!tokenizers.isEmpty()) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Multiple tokenizers defined for: " + node); } final Map<String, String> params = DOMUtil.toMapExcept(node.getAttributes(), "class"); // copy the luceneMatchVersion from config, if not set if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM)) params.put(LUCENE_MATCH_VERSION_PARAM, luceneMatchVersion.toString()); plugin.init(params); tokenizers.add(plugin); } @Override protected TokenizerFactory register(final String name, final TokenizerFactory plugin) throws Exception { return null; // used for map registration } }; tokenizerLoader.load(loader, (NodeList) xpath.evaluate("./tokenizer", node, XPathConstants.NODESET)); // Make sure something was loaded if (tokenizers.isEmpty()) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "analyzer without class or tokenizer & filter list"); } // Load the Filters // -------------------------------------------------------------------------------- final ArrayList<TokenFilterFactory> filters = new ArrayList<TokenFilterFactory>(); final AbstractPluginLoader<TokenFilterFactory> filterLoader = new AbstractPluginLoader<TokenFilterFactory>( "[analyzerConfig] analyzer/filter", false, false) { @Override protected void init(final TokenFilterFactory plugin, final Node node) throws Exception { if (plugin != null) { final Map<String, String> params = DOMUtil.toMapExcept(node.getAttributes(), "class"); // copy the luceneMatchVersion from config, if not set if (!params.containsKey(LUCENE_MATCH_VERSION_PARAM)) params.put(LUCENE_MATCH_VERSION_PARAM, luceneMatchVersion.toString()); plugin.init(params); filters.add(plugin); } } @Override protected TokenFilterFactory register(final String name, final TokenFilterFactory plugin) throws Exception { return null; // used for map registration } }; filterLoader.load(loader, (NodeList) xpath.evaluate("./filter", node, XPathConstants.NODESET)); return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]), tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()])); }
From source file:solr2155.solr.schema.GeoHashField.java
License:Apache License
@Override protected void init(IndexSchema schema, Map<String, String> args) { String len = args.remove("length"); gridReferenceSystem = new GridNode.GridReferenceSystem( len != null ? Integer.parseInt(len) : DEFAULT_LENGTH); CharFilterFactory[] filterFactories = new CharFilterFactory[0]; TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0]; analyzer = new TokenizerChain(filterFactories, new BaseTokenizerFactory() { public Tokenizer create(Reader input) { return new EdgeNGramTokenizer(input, EdgeNGramTokenizer.Side.FRONT, 1, Integer.MAX_VALUE); }//from w w w.j ava 2s . com }, tokenFilterFactories); //(leave default queryAnalyzer -- single token) //properties |= OMIT_NORMS; //can't do this since properties isn't public/protected }