Example usage for org.apache.lucene.analysis CharArraySet CharArraySet

Introduction

In this page you can find the example usage for org.apache.lucene.analysis CharArraySet CharArraySet.

Prototype

public CharArraySet(Collection<?> c, boolean ignoreCase)

Source Link

Document

Creates a set from a Collection of objects.

Usage

From source file:CommonResources.RanksNL.java

RanksNL() {
    List<String> stopWordsList = new ArrayList(Arrays.asList(RanksNLStpWrd));
    stopWords = new CharArraySet(stopWordsList, true);
}

From source file:de.dlr.knowledgefinder.dataimport.utils.transformer.ExcludeValuesTransformer.java

License:Apache License

private CharArraySet initializeExcludeValues(String filename) throws IOException {

    CharArraySet loadedFile = loadedFiles.get(filename);
    if (loadedFile == null) {
        SolrCore core = context.getSolrCore();

        SolrResourceLoader loader = core.getResourceLoader();
        List<String> lines = Collections.emptyList();
        lines = loader.getLines(filename);
        loadedFiles.put(filename, new CharArraySet(lines, true));
    }/*w w  w . j av  a2s. c  om*/
    return loadedFiles.get(filename);
}

From source file:org.apache.jena.query.text.assembler.GenericAnalyzerAssembler.java

License:Apache License

private ParamSpec getParamSpec(Resource node) {
    Statement nameStmt = node.getProperty(TextVocab.pParamName);
    Statement typeStmt = node.getProperty(TextVocab.pParamType);
    Statement valueStmt = node.getProperty(TextVocab.pParamValue);

    if (typeStmt == null) {
        throw new TextIndexException("Parameter specification must have a text:paramType: " + node);
    }//from w ww  . j a v  a2s . c  o m
    Resource typeRes = typeStmt.getResource();
    String type = typeRes.getLocalName();

    String name = getStringValue(nameStmt);
    String value = getStringValue(valueStmt);

    switch (type) {

    // String
    case TYPE_STRING: {
        if (value == null) {
            throw new TextIndexException("Value for string param: " + name + " must not be empty!");
        }

        return new ParamSpec(name, value, String.class);
    }

    // java.io.FileReader
    case TYPE_FILE: {

        if (value == null) {
            throw new TextIndexException("Value for file param must exist and must contain a file name.");
        }

        try {
            // The analyzer is responsible for closing the file
            Reader fileReader = new java.io.FileReader(value);
            return new ParamSpec(name, fileReader, Reader.class);

        } catch (java.io.FileNotFoundException ex) {
            throw new TextIndexException("File " + value + " for param " + name + " not found!");
        }
    }

    // org.apache.lucene.analysis.util.CharArraySet
    case TYPE_SET: {
        if (valueStmt == null) {
            throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
        }

        RDFNode valueNode = valueStmt.getObject();
        if (!valueNode.isResource()) {
            throw new TextIndexException(
                    "A set param spec text:paramValue must be a list of strings: " + valueNode);
        }

        List<String> values = toStrings((Resource) valueNode);

        return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
    }

    // int
    case TYPE_INT:
        if (value == null) {
            throw new TextIndexException("Value for int param: " + name + " must not be empty!");
        }

        int n = ((Literal) valueStmt.getObject()).getInt();
        return new ParamSpec(name, n, int.class);

    // boolean
    case TYPE_BOOL:
        if (value == null) {
            throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
        }

        boolean b = ((Literal) valueStmt.getObject()).getBoolean();
        return new ParamSpec(name, b, boolean.class);

    // org.apache.lucene.analysis.Analyzer
    case TYPE_ANALYZER:
        if (valueStmt == null) {
            throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node);
        }

        RDFNode valueNode = valueStmt.getObject();
        if (!valueNode.isResource()) {
            throw new TextIndexException(
                    "Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode);
        }

        Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode);
        return new ParamSpec(name, analyzer, Analyzer.class);

    default:
        // there was no match
        Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);
        break;
    }

    return null;
}

From source file:org.apache.jena.query.text.assembler.Params.java

License:Apache License

protected static ParamSpec getParamSpec(Resource node) {
    Statement nameStmt = node.getProperty(TextVocab.pParamName);
    Statement valueStmt = node.getProperty(TextVocab.pParamValue);

    String name = getStringValue(nameStmt);
    String type = getType(node);/*from   w ww . j a v a  2  s  .  com*/
    String value = getStringValue(valueStmt);

    switch (type) {

    // String
    case TYPE_STRING: {
        if (value == null) {
            throw new TextIndexException("Value for string param: " + name + " must not be empty!");
        }

        return new ParamSpec(name, value, String.class);
    }

    // java.io.FileReader
    case TYPE_FILE: {

        if (value == null) {
            throw new TextIndexException("Value for file param must exist and must contain a file name.");
        }

        try {
            // The analyzer is responsible for closing the file
            Reader fileReader = new java.io.FileReader(value);
            return new ParamSpec(name, fileReader, Reader.class);

        } catch (java.io.FileNotFoundException ex) {
            throw new TextIndexException("File " + value + " for param " + name + " not found!");
        }
    }

    // org.apache.lucene.analysis.util.CharArraySet
    case TYPE_SET: {
        if (valueStmt == null) {
            throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
        }

        RDFNode valueNode = valueStmt.getObject();
        if (!valueNode.isResource()) {
            throw new TextIndexException(
                    "A set param spec text:paramValue must be a list of strings: " + valueNode);
        }

        List<String> values = toStrings((Resource) valueNode);

        return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
    }

    // int
    case TYPE_INT:
        if (value == null) {
            throw new TextIndexException("Value for int param: " + name + " must not be empty!");
        }

        int n = ((Literal) valueStmt.getObject()).getInt();
        return new ParamSpec(name, n, int.class);

    // boolean
    case TYPE_BOOL:
        if (value == null) {
            throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
        }

        boolean b = ((Literal) valueStmt.getObject()).getBoolean();
        return new ParamSpec(name, b, boolean.class);

    // org.apache.lucene.analysis.Analyzer
    case TYPE_ANALYZER:
        if (valueStmt == null) {
            throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node);
        }

        RDFNode valueNode = valueStmt.getObject();
        if (!valueNode.isResource()) {
            throw new TextIndexException(
                    "Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode);
        }

        Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode);
        return new ParamSpec(name, analyzer, Analyzer.class);

    default:
        // there was no match
        Log.error("org.apache.jena.query.text.assembler.Params",
                "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);
        break;
    }

    return null;
}

From source file:org.apache.jena.query.text.assembler.StandardAnalyzerAssembler.java

License:Apache License

private CharArraySet toCharArraySet(Resource list) {
    return new CharArraySet(toList(list), false);
}

From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java

License:Apache License

/**
 * An empty white list means that the default behaviour of the Lucene's ASCIIFoldingFilter applies.
 * @throws IOException from Lucene API//from  www . ja va2 s.c om
 */
@Test
public void testEmptyWhiteListIsOkay() throws IOException {
    whitelisted = new CharArraySet(Collections.emptyList(), false);
    List<String> tokens = collectTokens(inputText, whitelisted);
    List<String> expected = Arrays.asList("Senora", "Siobhan", "look", "at", "that", "facade");
    assertTrue(tokens.equals(expected));
}

From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java

License:Apache License

@Test
public void testSingleCharacterWhiteListed() throws IOException {
    whitelisted = new CharArraySet(Arrays.asList(""), false);
    List<String> tokens = collectTokens(inputText, whitelisted);
    List<String> expected = Arrays.asList("Senora", "Siobhan", "look", "at", "that", "faade");
    assertTrue(tokens.equals(expected));
}

From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java

License:Apache License

@Test
public void testCompleteWhiteListed() throws IOException {
    whitelisted = new CharArraySet(Arrays.asList("", "", ""), false);
    List<String> tokens = collectTokens(inputText, whitelisted);
    // here we should have the complete input
    List<String> expected = Arrays.asList("Seora", "Siobhn", "look", "at", "that", "faade");
    assertTrue(tokens.equals(expected));
}

From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java

License:Apache License

@Test
public void testCaseMatters() throws IOException {
    // note the first capital letter
    whitelisted = new CharArraySet(Arrays.asList("", "", ""), false);
    List<String> tokens = collectTokens(inputText, whitelisted);
    List<String> expected = Arrays.asList("Senora", "Siobhn", "look", "at", "that", "faade");
    assertTrue(tokens.equals(expected));
}

From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java

License:Apache License

@Test
public void testMismatchWhiteList() throws IOException {
    whitelisted = new CharArraySet(Arrays.asList("", "", ""), false);
    List<String> tokens = collectTokens(inputText, whitelisted);
    List<String> expected = Arrays.asList("Senora", "Siobhan", "look", "at", "that", "facade");
    assertTrue(tokens.equals(expected));
}