org.apache.lucene.analysis.util.AbstractAnalysisFactory.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.analysis.util.AbstractAnalysisFactory.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.util;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;

/**
 * Abstract parent class for analysis factories {@link TokenizerFactory},
 * {@link TokenFilterFactory} and {@link CharFilterFactory}.
 * <p>
 * The typical lifecycle for a factory consumer is:
 * <ol>
 *   <li>Create factory via its constructor (or via XXXFactory.forName)
 *   <li>(Optional) If the factory uses resources such as files, {@link ResourceLoaderAware#inform(ResourceLoader)} is called to initialize those resources.
 *   <li>Consumer calls create() to obtain instances.
 * </ol>
 */
public abstract class AbstractAnalysisFactory {
    public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";

    /** The original args, before any processing */
    private final Map<String, String> originalArgs;

    /** the luceneVersion arg */
    protected final Version luceneMatchVersion;
    /** whether the luceneMatchVersion arg is explicitly specified in the serialized schema */
    private boolean isExplicitLuceneMatchVersion = false;

    /**
     * Initialize this factory via a set of key-value pairs.
     */
    protected AbstractAnalysisFactory(Map<String, String> args) {
        originalArgs = Collections.unmodifiableMap(new HashMap<>(args));
        String version = get(args, LUCENE_MATCH_VERSION_PARAM);
        if (version == null) {
            luceneMatchVersion = Version.LATEST;
        } else {
            try {
                luceneMatchVersion = Version.parseLeniently(version);
            } catch (ParseException pe) {
                throw new IllegalArgumentException(pe);
            }
        }
        args.remove(CLASS_NAME); // consume the class arg
    }

    public final Map<String, String> getOriginalArgs() {
        return originalArgs;
    }

    public final Version getLuceneMatchVersion() {
        return this.luceneMatchVersion;
    }

    public String require(Map<String, String> args, String name) {
        String s = args.remove(name);
        if (s == null) {
            throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
        }
        return s;
    }

    public String require(Map<String, String> args, String name, Collection<String> allowedValues) {
        return require(args, name, allowedValues, true);
    }

    public String require(Map<String, String> args, String name, Collection<String> allowedValues,
            boolean caseSensitive) {
        String s = args.remove(name);
        if (s == null) {
            throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
        } else {
            for (String allowedValue : allowedValues) {
                if (caseSensitive) {
                    if (s.equals(allowedValue)) {
                        return s;
                    }
                } else {
                    if (s.equalsIgnoreCase(allowedValue)) {
                        return s;
                    }
                }
            }
            throw new IllegalArgumentException(
                    "Configuration Error: '" + name + "' value must be one of " + allowedValues);
        }
    }

    public String get(Map<String, String> args, String name) {
        return args.remove(name); // defaultVal = null
    }

    public String get(Map<String, String> args, String name, String defaultVal) {
        String s = args.remove(name);
        return s == null ? defaultVal : s;
    }

    public String get(Map<String, String> args, String name, Collection<String> allowedValues) {
        return get(args, name, allowedValues, null); // defaultVal = null
    }

    public String get(Map<String, String> args, String name, Collection<String> allowedValues, String defaultVal) {
        return get(args, name, allowedValues, defaultVal, true);
    }

    public String get(Map<String, String> args, String name, Collection<String> allowedValues, String defaultVal,
            boolean caseSensitive) {
        String s = args.remove(name);
        if (s == null) {
            return defaultVal;
        } else {
            for (String allowedValue : allowedValues) {
                if (caseSensitive) {
                    if (s.equals(allowedValue)) {
                        return s;
                    }
                } else {
                    if (s.equalsIgnoreCase(allowedValue)) {
                        return s;
                    }
                }
            }
            throw new IllegalArgumentException(
                    "Configuration Error: '" + name + "' value must be one of " + allowedValues);
        }
    }

    protected final int requireInt(Map<String, String> args, String name) {
        return Integer.parseInt(require(args, name));
    }

    protected final int getInt(Map<String, String> args, String name, int defaultVal) {
        String s = args.remove(name);
        return s == null ? defaultVal : Integer.parseInt(s);
    }

    protected final boolean requireBoolean(Map<String, String> args, String name) {
        return Boolean.parseBoolean(require(args, name));
    }

    protected final boolean getBoolean(Map<String, String> args, String name, boolean defaultVal) {
        String s = args.remove(name);
        return s == null ? defaultVal : Boolean.parseBoolean(s);
    }

    protected final float requireFloat(Map<String, String> args, String name) {
        return Float.parseFloat(require(args, name));
    }

    protected final float getFloat(Map<String, String> args, String name, float defaultVal) {
        String s = args.remove(name);
        return s == null ? defaultVal : Float.parseFloat(s);
    }

    public char requireChar(Map<String, String> args, String name) {
        return require(args, name).charAt(0);
    }

    public char getChar(Map<String, String> args, String name, char defaultValue) {
        String s = args.remove(name);
        if (s == null) {
            return defaultValue;
        } else {
            if (s.length() != 1) {
                throw new IllegalArgumentException(name + " should be a char. \"" + s + "\" is invalid");
            } else {
                return s.charAt(0);
            }
        }
    }

    private static final Pattern ITEM_PATTERN = Pattern.compile("[^,\\s]+");

    /** Returns whitespace- and/or comma-separated set of values, or null if none are found */
    public Set<String> getSet(Map<String, String> args, String name) {
        String s = args.remove(name);
        if (s == null) {
            return null;
        } else {
            Set<String> set = null;
            Matcher matcher = ITEM_PATTERN.matcher(s);
            if (matcher.find()) {
                set = new HashSet<>();
                set.add(matcher.group(0));
                while (matcher.find()) {
                    set.add(matcher.group(0));
                }
            }
            return set;
        }
    }

    /**
     * Compiles a pattern for the value of the specified argument key <code>name</code> 
     */
    protected final Pattern getPattern(Map<String, String> args, String name) {
        try {
            return Pattern.compile(require(args, name));
        } catch (PatternSyntaxException e) {
            throw new IllegalArgumentException(
                    "Configuration Error: '" + name + "' can not be parsed in " + this.getClass().getSimpleName(),
                    e);
        }
    }

    /**
     * Returns as {@link CharArraySet} from wordFiles, which
     * can be a comma-separated list of filenames
     */
    protected final CharArraySet getWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase)
            throws IOException {
        List<String> files = splitFileNames(wordFiles);
        CharArraySet words = null;
        if (files.size() > 0) {
            // default stopwords list has 35 or so words, but maybe don't make it that
            // big to start
            words = new CharArraySet(files.size() * 10, ignoreCase);
            for (String file : files) {
                List<String> wlist = getLines(loader, file.trim());
                words.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
            }
        }
        return words;
    }

    /**
     * Returns the resource's lines (with content treated as UTF-8)
     */
    protected final List<String> getLines(ResourceLoader loader, String resource) throws IOException {
        return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
    }

    /** same as {@link #getWordSet(ResourceLoader, String, boolean)},
     * except the input is in snowball format. */
    protected final CharArraySet getSnowballWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase)
            throws IOException {
        List<String> files = splitFileNames(wordFiles);
        CharArraySet words = null;
        if (files.size() > 0) {
            // default stopwords list has 35 or so words, but maybe don't make it that
            // big to start
            words = new CharArraySet(files.size() * 10, ignoreCase);
            for (String file : files) {
                InputStream stream = null;
                Reader reader = null;
                try {
                    stream = loader.openResource(file.trim());
                    CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
                            .onMalformedInput(CodingErrorAction.REPORT)
                            .onUnmappableCharacter(CodingErrorAction.REPORT);
                    reader = new InputStreamReader(stream, decoder);
                    WordlistLoader.getSnowballWordSet(reader, words);
                } finally {
                    IOUtils.closeWhileHandlingException(reader, stream);
                }
            }
        }
        return words;
    }

    /**
     * Splits file names separated by comma character.
     * File names can contain comma characters escaped by backslash '\'
     *
     * @param fileNames the string containing file names
     * @return a list of file names with the escaping backslashed removed
     */
    protected final List<String> splitFileNames(String fileNames) {
        return splitAt(',', fileNames);
    }

    /**
     * Splits a list separated by zero or more given separator characters.
     * List items can contain comma characters escaped by backslash '\'.
     * Whitespace is NOT trimmed from the returned list items.
     *
     * @param list the string containing the split list items
     * @return a list of items with the escaping backslashes removed
     */
    protected final List<String> splitAt(char separator, String list) {
        if (list == null)
            return Collections.emptyList();

        List<String> result = new ArrayList<>();
        for (String item : list.split("(?<!\\\\)[" + separator + "]")) {
            result.add(item.replaceAll("\\\\(?=[" + separator + "])", ""));
        }

        return result;
    }

    private static final String CLASS_NAME = "class";

    /**
     * @return the string used to specify the concrete class name in a serialized representation: the class arg.  
     *         If the concrete class name was not specified via a class arg, returns {@code getClass().getName()}.
     */
    public String getClassArg() {
        if (null != originalArgs) {
            String className = originalArgs.get(CLASS_NAME);
            if (null != className) {
                return className;
            }
        }
        return getClass().getName();
    }

    public boolean isExplicitLuceneMatchVersion() {
        return isExplicitLuceneMatchVersion;
    }

    public void setExplicitLuceneMatchVersion(boolean isExplicitLuceneMatchVersion) {
        this.isExplicitLuceneMatchVersion = isExplicitLuceneMatchVersion;
    }

    /**
     * Looks up SPI name (static "NAME" field) with appropriate modifiers.
     * Also it must be a String class and declared in the concrete class.
     * @return the SPI name
     * @throws NoSuchFieldException - if the "NAME" field is not defined.
     * @throws IllegalAccessException - if the "NAME" field is inaccessible.
     * @throws IllegalStateException - if the "NAME" field does not have appropriate modifiers or isn't a String field.
     */
    static String lookupSPIName(Class<? extends AbstractAnalysisFactory> service)
            throws NoSuchFieldException, IllegalAccessException, IllegalStateException {
        final Field field = service.getField("NAME");
        int modifier = field.getModifiers();
        if (Modifier.isStatic(modifier) && Modifier.isFinal(modifier) && field.getType().equals(String.class)
                && Objects.equals(field.getDeclaringClass(), service)) {
            return ((String) field.get(null));
        }
        throw new IllegalStateException("No SPI name defined.");
    }

    /**
     * Generate legacy SPI name derived from the class name.
     * @return the SPI name
     */
    @Deprecated
    static String generateLegacySPIName(Class<? extends AbstractAnalysisFactory> service, String[] suffixes) {
        final String clazzName = service.getSimpleName();
        String name = null;
        for (String suffix : suffixes) {
            if (clazzName.endsWith(suffix)) {
                name = clazzName.substring(0, clazzName.length() - suffix.length()).toLowerCase(Locale.ROOT);
                break;
            }
        }
        return name;
    }
}