Example usage for org.apache.lucene.analysis.util TokenizerFactory lookupClass

List of usage examples for org.apache.lucene.analysis.util TokenizerFactory lookupClass

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.util TokenizerFactory lookupClass.

Prototype

public static Class<? extends TokenizerFactory> lookupClass(String name) 

Source Link

Document

looks up a tokenizer class by name from context classpath

Usage

From source file:com.grantingersoll.opengrok.analysis.BaseTokenStreamFactoryTestCase.java

License:Apache License

/**
 * Returns a fully initialized TokenizerFactory with the specified name, version, resource loader,
 * and key-value arguments.//from   ww w  .  java2s . c  o m
 */
protected TokenizerFactory tokenizerFactory(String name, Version matchVersion, ResourceLoader loader,
        String... keysAndValues) throws Exception {
    return (TokenizerFactory) analysisFactory(TokenizerFactory.lookupClass(name), matchVersion, loader,
            keysAndValues);
}

From source file:org.apache.solr.core.SolrResourceLoader.java

License:Apache License

/**
 * This method loads a class either with it's FQN or a short-name (solr.class-simplename or class-simplename).
 * It tries to load the class with the name that is given first and if it fails, it tries all the known
 * solr packages. This method caches the FQN of a short-name in a static map in-order to make subsequent lookups
 * for the same class faster. The caching is done only if the class is loaded by the webapp classloader and it
 * is loaded using a shortname./*from  www .  j av  a  2s  . c om*/
 *
 * @param cname The name or the short name of the class.
 * @param subpackages the packages to be tried if the cname starts with solr.
 * @return the loaded class. An exception is thrown if it fails
 */
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType, String... subpackages) {
    if (subpackages == null || subpackages.length == 0 || subpackages == packages) {
        subpackages = packages;
        String c = classNameCache.get(cname);
        if (c != null) {
            try {
                return Class.forName(c, true, classLoader).asSubclass(expectedType);
            } catch (ClassNotFoundException e) {
                //this is unlikely
                log.error("Unable to load cached class-name :  " + c + " for shortname : " + cname + e);
            }

        }
    }

    Class<? extends T> clazz = null;
    try {
        // first try legacy analysis patterns, now replaced by Lucene's Analysis package:
        final Matcher m = legacyAnalysisPattern.matcher(cname);
        if (m.matches()) {
            final String name = m.group(4);
            log.trace("Trying to load class from analysis SPI using name='{}'", name);
            try {
                if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
                    return clazz = CharFilterFactory.lookupClass(name).asSubclass(expectedType);
                } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
                    return clazz = TokenizerFactory.lookupClass(name).asSubclass(expectedType);
                } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
                    return clazz = TokenFilterFactory.lookupClass(name).asSubclass(expectedType);
                } else {
                    log.warn(
                            "'{}' looks like an analysis factory, but caller requested different class type: {}",
                            cname, expectedType.getName());
                }
            } catch (IllegalArgumentException ex) {
                // ok, we fall back to legacy loading
            }
        }

        // first try cname == full name
        try {
            return clazz = Class.forName(cname, true, classLoader).asSubclass(expectedType);
        } catch (ClassNotFoundException e) {
            String newName = cname;
            if (newName.startsWith(project)) {
                newName = cname.substring(project.length() + 1);
            }
            for (String subpackage : subpackages) {
                try {
                    String name = base + '.' + subpackage + newName;
                    log.trace("Trying class name " + name);
                    return clazz = Class.forName(name, true, classLoader).asSubclass(expectedType);
                } catch (ClassNotFoundException e1) {
                    // ignore... assume first exception is best.
                }
            }

            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error loading class '" + cname + "'",
                    e);
        }

    } finally {
        if (clazz != null) {
            //cache the shortname vs FQN if it is loaded by the webapp classloader  and it is loaded
            // using a shortname
            if (clazz.getClassLoader() == SolrResourceLoader.class.getClassLoader()
                    && !cname.equals(clazz.getName()) && (subpackages.length == 0 || subpackages == packages)) {
                //store in the cache
                classNameCache.put(cname, clazz.getName());
            }

            // print warning if class is deprecated
            if (clazz.isAnnotationPresent(Deprecated.class)) {
                log.warn(
                        "Solr loaded a deprecated plugin/analysis class [{}]. Please consult documentation how to replace it accordingly.",
                        cname);
            }
        }
    }
}

From source file:org.apache.tika.eval.tokens.AnalyzerDeserializer.java

License:Apache License

private static TokenizerFactory buildTokenizerFactory(JsonElement map, String analyzerName) throws IOException {
    if (!(map instanceof JsonObject)) {
        throw new IllegalArgumentException("Expecting a map with \"factory\" string and "
                + "\"params\" map in tokenizer factory;" + " not: " + map.toString() + " in " + analyzerName);
    }/*www .ja v a2s .  c o m*/
    JsonElement factoryEl = ((JsonObject) map).get(FACTORY);
    if (factoryEl == null || !factoryEl.isJsonPrimitive()) {
        throw new IllegalArgumentException(
                "Expecting value for factory in char filter factory builder in:" + analyzerName);
    }
    String factoryName = factoryEl.getAsString();
    factoryName = factoryName.startsWith("oala.")
            ? factoryName.replaceFirst("oala.", "org.apache.lucene.analysis.")
            : factoryName;

    JsonElement paramsEl = ((JsonObject) map).get(PARAMS);
    Map<String, String> params = mapify(paramsEl);
    String spiName = "";
    for (String s : TokenizerFactory.availableTokenizers()) {
        Class clazz = TokenizerFactory.lookupClass(s);
        if (clazz.getName().equals(factoryName)) {
            spiName = s;
            break;
        }
    }
    if (spiName.equals("")) {
        throw new IllegalArgumentException(
                "A SPI class of type org.apache.lucene.analysis.util.TokenizerFactory with name" + "'"
                        + factoryName + "' does not exist.");
    }
    try {
        TokenizerFactory tokenizerFactory = TokenizerFactory.forName(spiName, params);
        if (tokenizerFactory instanceof ResourceLoaderAware) {
            ((ResourceLoaderAware) tokenizerFactory)
                    .inform(new ClasspathResourceLoader(AnalyzerDeserializer.class));
        }

        return tokenizerFactory;
    } catch (IllegalArgumentException e) {
        throw new IllegalArgumentException("While working on " + analyzerName, e);
    }
}

From source file:org.elasticsearch.indices.analysis.AnalysisFactoryTestCase.java

License:Apache License

public void testPreBuiltMultiTermAware() {
    Collection<Object> expected = new HashSet<>();
    Collection<Object> actual = new HashSet<>();

    Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = new HashMap<>(
            AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin)));
    for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenFilters().entrySet()) {
        String name = entry.getKey();
        Class<?> luceneFactory = entry.getValue();
        PreConfiguredTokenFilter filter = preConfiguredTokenFilters.remove(name);
        assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't",
                filter);/*from  w  w  w . j  av a2s . c  o m*/
        if (luceneFactory == Void.class) {
            continue;
        }
        if (luceneFactory == null) {
            luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
        }
        assertThat(luceneFactory, typeCompatibleWith(TokenFilterFactory.class));
        if (filter.shouldUseFilterForMultitermQueries()) {
            actual.add("token filter [" + name + "]");
        }
        if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
            expected.add("token filter [" + name + "]");
        }
    }
    assertThat("pre configured token filter not registered with test", preConfiguredTokenFilters.keySet(),
            empty());

    Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = new HashMap<>(
            AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin)));
    for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenizers().entrySet()) {
        String name = entry.getKey();
        Class<?> luceneFactory = entry.getValue();
        PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.remove(name);
        assertNotNull("test claims pre built tokenizer [" + name + "] should be available but it wasn't",
                tokenizer);
        if (luceneFactory == Void.class) {
            continue;
        }
        if (luceneFactory == null) {
            luceneFactory = TokenizerFactory.lookupClass(toCamelCase(name));
        }
        assertThat(luceneFactory, typeCompatibleWith(TokenizerFactory.class));
        if (tokenizer.hasMultiTermComponent()) {
            actual.add(tokenizer);
        }
        if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
            expected.add(tokenizer);
        }
    }
    assertThat("pre configured tokenizer not registered with test", preConfiguredTokenizers.keySet(), empty());

    Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = new HashMap<>(
            AnalysisModule.setupPreConfiguredCharFilters(singletonList(plugin)));
    for (Map.Entry<String, Class<?>> entry : getPreConfiguredCharFilters().entrySet()) {
        String name = entry.getKey();
        Class<?> luceneFactory = entry.getValue();
        PreConfiguredCharFilter filter = preConfiguredCharFilters.remove(name);
        assertNotNull("test claims pre built char filter [" + name + "] should be available but it wasn't",
                filter);
        if (luceneFactory == Void.class) {
            continue;
        }
        if (luceneFactory == null) {
            luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
        }
        assertThat(luceneFactory, typeCompatibleWith(CharFilterFactory.class));
        if (filter.shouldUseFilterForMultitermQueries()) {
            actual.add(filter);
        }
        if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
            expected.add("token filter [" + name + "]");
        }
    }
    assertThat("pre configured char filter not registered with test", preConfiguredCharFilters.keySet(),
            empty());

    Set<Object> classesMissingMultiTermSupport = new HashSet<>(expected);
    classesMissingMultiTermSupport.removeAll(actual);
    assertTrue("Pre-built components are missing multi-term support: " + classesMissingMultiTermSupport,
            classesMissingMultiTermSupport.isEmpty());

    Set<Object> classesThatShouldNotHaveMultiTermSupport = new HashSet<>(actual);
    classesThatShouldNotHaveMultiTermSupport.removeAll(expected);
    assertTrue(
            "Pre-built components should not have multi-term support: "
                    + classesThatShouldNotHaveMultiTermSupport,
            classesThatShouldNotHaveMultiTermSupport.isEmpty());
}

From source file:org.liveSense.service.solr.api.OverridedSolrResourceLoader.java

License:Apache License

/**
 * This method loads a class either with it's FQN or a short-name (solr.class-simplename or class-simplename).
 * It tries to load the class with the name that is given first and if it fails, it tries all the known
 * solr packages. This method caches the FQN of a short-name in a static map in-order to make subsequent lookups
 * for the same class faster. The caching is done only if the class is loaded by the webapp classloader and it
 * is loaded using a shortname./*from   www  .  ja v  a 2 s .  c om*/
 *
 * @param cname The name or the short name of the class.
 * @param subpackages the packages to be tried if the cnams starts with solr.
 * @return the loaded class. An exception is thrown if it fails
 */
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType, String... subpackages) {
    if (subpackages == null || subpackages.length == 0 || subpackages == packages) {
        subpackages = packages;
        String c = classNameCache.get(cname);
        if (c != null) {
            try {
                return Class.forName(c, true, classLoader).asSubclass(expectedType);
            } catch (ClassNotFoundException e) {
                //this is unlikely
                log.error("Unable to load cached class-name :  " + c + " for shortname : " + cname + e);
            }

        }
    }
    Class<? extends T> clazz = null;

    // first try legacy analysis patterns, now replaced by Lucene's Analysis package:
    final Matcher m = legacyAnalysisPattern.matcher(cname);
    if (m.matches()) {
        final String name = m.group(4);
        log.trace("Trying to load class from analysis SPI using name='{}'", name);
        try {
            if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
                return clazz = CharFilterFactory.lookupClass(name).asSubclass(expectedType);
            } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
                return clazz = TokenizerFactory.lookupClass(name).asSubclass(expectedType);
            } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
                return clazz = TokenFilterFactory.lookupClass(name).asSubclass(expectedType);
            } else {
                log.warn("'{}' looks like an analysis factory, but caller requested different class type: {}",
                        cname, expectedType.getName());
            }
        } catch (IllegalArgumentException ex) {
            // ok, we fall back to legacy loading
        }
    }

    // first try cname == full name
    try {
        return Class.forName(cname, true, classLoader).asSubclass(expectedType);
    } catch (ClassNotFoundException e) {
        String newName = cname;
        if (newName.startsWith(project)) {
            newName = cname.substring(project.length() + 1);
        }
        for (String subpackage : subpackages) {
            try {
                String name = base + '.' + subpackage + newName;
                log.trace("Trying class name " + name);
                return clazz = Class.forName(name, true, classLoader).asSubclass(expectedType);
            } catch (ClassNotFoundException e1) {
                // ignore... assume first exception is best.
            }
        }

        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error loading class '" + cname + "'", e);
    } finally {
        //cache the shortname vs FQN if it is loaded by the webapp classloader  and it is loaded
        // using a shortname
        if (clazz != null && clazz.getClassLoader() == OverridedSolrResourceLoader.class.getClassLoader()
                && !cname.equals(clazz.getName()) && (subpackages.length == 0 || subpackages == packages)) {
            //store in the cache
            classNameCache.put(cname, clazz.getName());
        }
    }
}

From source file:org.tallison.gramreaper.ingest.schema.AnalyzerDeserializer.java

License:Apache License

private static TokenizerFactory buildTokenizerFactory(JsonElement map, String analyzerName) throws IOException {
    if (!(map instanceof JsonObject)) {
        throw new IllegalArgumentException("Expecting a map with \"factory\" string and "
                + "\"params\" map in tokenizer factory;" + " not: " + map.toString() + " in " + analyzerName);
    }//from  w w  w.  j a va 2 s . c o m
    JsonElement factoryEl = ((JsonObject) map).get(FACTORY);
    if (factoryEl == null || !factoryEl.isJsonPrimitive()) {
        throw new IllegalArgumentException(
                "Expecting value for factory in char filter factory builder in:" + analyzerName);
    }
    String factoryName = factoryEl.getAsString();
    factoryName = factoryName.startsWith("oala.")
            ? factoryName.replaceFirst("oala.", "org.apache.lucene.analysis.")
            : factoryName;

    JsonElement paramsEl = ((JsonObject) map).get(PARAMS);
    Map<String, String> params = mapify(paramsEl);
    String spiName = "";
    for (String s : TokenizerFactory.availableTokenizers()) {
        Class clazz = TokenizerFactory.lookupClass(s);
        if (clazz.getName().equals(factoryName)) {
            spiName = s;
            break;
        }
    }
    try {
        TokenizerFactory tokenizerFactory = TokenizerFactory.forName(spiName, params);
        if (tokenizerFactory instanceof ResourceLoaderAware) {
            ((ResourceLoaderAware) tokenizerFactory)
                    .inform(new ClasspathResourceLoader(AnalyzerDeserializer.class));
        }

        return tokenizerFactory;
    } catch (IllegalArgumentException e) {
        throw new IllegalArgumentException("While working on " + analyzerName, e);
    }
}