Example usage for org.apache.lucene.analysis CharArraySet CharArraySet

List of usage examples for org.apache.lucene.analysis CharArraySet CharArraySet

Introduction

In this page you can find the example usage for org.apache.lucene.analysis CharArraySet CharArraySet.

Prototype

public CharArraySet(Collection<?> c, boolean ignoreCase) 

Source Link

Document

Creates a set from a Collection of objects.

Usage

From source file:CommonResources.RanksNL.java

RanksNL() {
    List<String> stopWordsList = new ArrayList(Arrays.asList(RanksNLStpWrd));
    stopWords = new CharArraySet(stopWordsList, true);
}

From source file:de.dlr.knowledgefinder.dataimport.utils.transformer.ExcludeValuesTransformer.java

License:Apache License

private CharArraySet initializeExcludeValues(String filename) throws IOException {

    CharArraySet loadedFile = loadedFiles.get(filename);
    if (loadedFile == null) {
        SolrCore core = context.getSolrCore();

        SolrResourceLoader loader = core.getResourceLoader();
        List<String> lines = Collections.emptyList();
        lines = loader.getLines(filename);
        loadedFiles.put(filename, new CharArraySet(lines, true));
    }/*w w  w . j av  a2s. c  om*/
    return loadedFiles.get(filename);
}

From source file:org.apache.jena.query.text.assembler.GenericAnalyzerAssembler.java

License:Apache License

private ParamSpec getParamSpec(Resource node) {
    Statement nameStmt = node.getProperty(TextVocab.pParamName);
    Statement typeStmt = node.getProperty(TextVocab.pParamType);
    Statement valueStmt = node.getProperty(TextVocab.pParamValue);

    if (typeStmt == null) {
        throw new TextIndexException("Parameter specification must have a text:paramType: " + node);
    }//from w ww  . j a v  a2s . c  o m
    Resource typeRes = typeStmt.getResource();
    String type = typeRes.getLocalName();

    String name = getStringValue(nameStmt);
    String value = getStringValue(valueStmt);

    switch (type) {

    // String
    case TYPE_STRING: {
        if (value == null) {
            throw new TextIndexException("Value for string param: " + name + " must not be empty!");
        }

        return new ParamSpec(name, value, String.class);
    }

    // java.io.FileReader
    case TYPE_FILE: {

        if (value == null) {
            throw new TextIndexException("Value for file param must exist and must contain a file name.");
        }

        try {
            // The analyzer is responsible for closing the file
            Reader fileReader = new java.io.FileReader(value);
            return new ParamSpec(name, fileReader, Reader.class);

        } catch (java.io.FileNotFoundException ex) {
            throw new TextIndexException("File " + value + " for param " + name + " not found!");
        }
    }

    // org.apache.lucene.analysis.util.CharArraySet
    case TYPE_SET: {
        if (valueStmt == null) {
            throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
        }

        RDFNode valueNode = valueStmt.getObject();
        if (!valueNode.isResource()) {
            throw new TextIndexException(
                    "A set param spec text:paramValue must be a list of strings: " + valueNode);
        }

        List<String> values = toStrings((Resource) valueNode);

        return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
    }

    // int
    case TYPE_INT:
        if (value == null) {
            throw new TextIndexException("Value for int param: " + name + " must not be empty!");
        }

        int n = ((Literal) valueStmt.getObject()).getInt();
        return new ParamSpec(name, n, int.class);

    // boolean
    case TYPE_BOOL:
        if (value == null) {
            throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
        }

        boolean b = ((Literal) valueStmt.getObject()).getBoolean();
        return new ParamSpec(name, b, boolean.class);

    // org.apache.lucene.analysis.Analyzer
    case TYPE_ANALYZER:
        if (valueStmt == null) {
            throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node);
        }

        RDFNode valueNode = valueStmt.getObject();
        if (!valueNode.isResource()) {
            throw new TextIndexException(
                    "Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode);
        }

        Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode);
        return new ParamSpec(name, analyzer, Analyzer.class);

    default:
        // there was no match
        Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);
        break;
    }

    return null;
}

From source file:org.apache.jena.query.text.assembler.Params.java

License:Apache License

protected static ParamSpec getParamSpec(Resource node) {
    Statement nameStmt = node.getProperty(TextVocab.pParamName);
    Statement valueStmt = node.getProperty(TextVocab.pParamValue);

    String name = getStringValue(nameStmt);
    String type = getType(node);/*from   w ww . j a v a  2  s  .  com*/
    String value = getStringValue(valueStmt);

    switch (type) {

    // String
    case TYPE_STRING: {
        if (value == null) {
            throw new TextIndexException("Value for string param: " + name + " must not be empty!");
        }

        return new ParamSpec(name, value, String.class);
    }

    // java.io.FileReader
    case TYPE_FILE: {

        if (value == null) {
            throw new TextIndexException("Value for file param must exist and must contain a file name.");
        }

        try {
            // The analyzer is responsible for closing the file
            Reader fileReader = new java.io.FileReader(value);
            return new ParamSpec(name, fileReader, Reader.class);

        } catch (java.io.FileNotFoundException ex) {
            throw new TextIndexException("File " + value + " for param " + name + " not found!");
        }
    }

    // org.apache.lucene.analysis.util.CharArraySet
    case TYPE_SET: {
        if (valueStmt == null) {
            throw new TextIndexException("A set param spec must have a text:paramValue:" + node);
        }

        RDFNode valueNode = valueStmt.getObject();
        if (!valueNode.isResource()) {
            throw new TextIndexException(
                    "A set param spec text:paramValue must be a list of strings: " + valueNode);
        }

        List<String> values = toStrings((Resource) valueNode);

        return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class);
    }

    // int
    case TYPE_INT:
        if (value == null) {
            throw new TextIndexException("Value for int param: " + name + " must not be empty!");
        }

        int n = ((Literal) valueStmt.getObject()).getInt();
        return new ParamSpec(name, n, int.class);

    // boolean
    case TYPE_BOOL:
        if (value == null) {
            throw new TextIndexException("Value for boolean param: " + name + " must not be empty!");
        }

        boolean b = ((Literal) valueStmt.getObject()).getBoolean();
        return new ParamSpec(name, b, boolean.class);

    // org.apache.lucene.analysis.Analyzer
    case TYPE_ANALYZER:
        if (valueStmt == null) {
            throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node);
        }

        RDFNode valueNode = valueStmt.getObject();
        if (!valueNode.isResource()) {
            throw new TextIndexException(
                    "Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode);
        }

        Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode);
        return new ParamSpec(name, analyzer, Analyzer.class);

    default:
        // there was no match
        Log.error("org.apache.jena.query.text.assembler.Params",
                "Unknown parameter type: " + type + " for param: " + name + " with value: " + value);
        break;
    }

    return null;
}

From source file:org.apache.jena.query.text.assembler.StandardAnalyzerAssembler.java

License:Apache License

private CharArraySet toCharArraySet(Resource list) {
    return new CharArraySet(toList(list), false);
}

From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java

License:Apache License

/**
 * An empty white list means that the default behaviour of the Lucene's ASCIIFoldingFilter applies.
 * @throws IOException from Lucene API//from  www . ja va2 s.c om
 */
@Test
public void testEmptyWhiteListIsOkay() throws IOException {
    whitelisted = new CharArraySet(Collections.emptyList(), false);
    List<String> tokens = collectTokens(inputText, whitelisted);
    List<String> expected = Arrays.asList("Senora", "Siobhan", "look", "at", "that", "facade");
    assertTrue(tokens.equals(expected));
}

From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java

License:Apache License

@Test
public void testSingleCharacterWhiteListed() throws IOException {
    whitelisted = new CharArraySet(Arrays.asList(""), false);
    List<String> tokens = collectTokens(inputText, whitelisted);
    List<String> expected = Arrays.asList("Senora", "Siobhan", "look", "at", "that", "faade");
    assertTrue(tokens.equals(expected));
}

From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java

License:Apache License

@Test
public void testCompleteWhiteListed() throws IOException {
    whitelisted = new CharArraySet(Arrays.asList("", "", ""), false);
    List<String> tokens = collectTokens(inputText, whitelisted);
    // here we should have the complete input
    List<String> expected = Arrays.asList("Seora", "Siobhn", "look", "at", "that", "faade");
    assertTrue(tokens.equals(expected));
}

From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java

License:Apache License

@Test
public void testCaseMatters() throws IOException {
    // note the first capital letter
    whitelisted = new CharArraySet(Arrays.asList("", "", ""), false);
    List<String> tokens = collectTokens(inputText, whitelisted);
    List<String> expected = Arrays.asList("Senora", "Siobhn", "look", "at", "that", "faade");
    assertTrue(tokens.equals(expected));
}

From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java

License:Apache License

@Test
public void testMismatchWhiteList() throws IOException {
    whitelisted = new CharArraySet(Arrays.asList("", "", ""), false);
    List<String> tokens = collectTokens(inputText, whitelisted);
    List<String> expected = Arrays.asList("Senora", "Siobhan", "look", "at", "that", "facade");
    assertTrue(tokens.equals(expected));
}