List of usage examples for org.apache.lucene.analysis CharArraySet CharArraySet
public CharArraySet(Collection<?> c, boolean ignoreCase)
From source file:CommonResources.RanksNL.java
RanksNL() { List<String> stopWordsList = new ArrayList(Arrays.asList(RanksNLStpWrd)); stopWords = new CharArraySet(stopWordsList, true); }
From source file:de.dlr.knowledgefinder.dataimport.utils.transformer.ExcludeValuesTransformer.java
License:Apache License
private CharArraySet initializeExcludeValues(String filename) throws IOException { CharArraySet loadedFile = loadedFiles.get(filename); if (loadedFile == null) { SolrCore core = context.getSolrCore(); SolrResourceLoader loader = core.getResourceLoader(); List<String> lines = Collections.emptyList(); lines = loader.getLines(filename); loadedFiles.put(filename, new CharArraySet(lines, true)); }/*w w w . j av a2s. c om*/ return loadedFiles.get(filename); }
From source file:org.apache.jena.query.text.assembler.GenericAnalyzerAssembler.java
License:Apache License
private ParamSpec getParamSpec(Resource node) { Statement nameStmt = node.getProperty(TextVocab.pParamName); Statement typeStmt = node.getProperty(TextVocab.pParamType); Statement valueStmt = node.getProperty(TextVocab.pParamValue); if (typeStmt == null) { throw new TextIndexException("Parameter specification must have a text:paramType: " + node); }//from w ww . j a v a2s . c o m Resource typeRes = typeStmt.getResource(); String type = typeRes.getLocalName(); String name = getStringValue(nameStmt); String value = getStringValue(valueStmt); switch (type) { // String case TYPE_STRING: { if (value == null) { throw new TextIndexException("Value for string param: " + name + " must not be empty!"); } return new ParamSpec(name, value, String.class); } // java.io.FileReader case TYPE_FILE: { if (value == null) { throw new TextIndexException("Value for file param must exist and must contain a file name."); } try { // The analyzer is responsible for closing the file Reader fileReader = new java.io.FileReader(value); return new ParamSpec(name, fileReader, Reader.class); } catch (java.io.FileNotFoundException ex) { throw new TextIndexException("File " + value + " for param " + name + " not found!"); } } // org.apache.lucene.analysis.util.CharArraySet case TYPE_SET: { if (valueStmt == null) { throw new TextIndexException("A set param spec must have a text:paramValue:" + node); } RDFNode valueNode = valueStmt.getObject(); if (!valueNode.isResource()) { throw new TextIndexException( "A set param spec text:paramValue must be a list of strings: " + valueNode); } List<String> values = toStrings((Resource) valueNode); return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class); } // int case TYPE_INT: if (value == null) { throw new TextIndexException("Value for int param: " + name + " must not be empty!"); } int n = ((Literal) valueStmt.getObject()).getInt(); return new ParamSpec(name, n, int.class); // boolean case TYPE_BOOL: if (value == null) { throw new TextIndexException("Value for boolean param: " + name + " must not be empty!"); } boolean b = ((Literal) valueStmt.getObject()).getBoolean(); return new ParamSpec(name, b, boolean.class); // org.apache.lucene.analysis.Analyzer case TYPE_ANALYZER: if (valueStmt == null) { throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node); } RDFNode valueNode = valueStmt.getObject(); if (!valueNode.isResource()) { throw new TextIndexException( "Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode); } Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode); return new ParamSpec(name, analyzer, Analyzer.class); default: // there was no match Log.error(this, "Unknown parameter type: " + type + " for param: " + name + " with value: " + value); break; } return null; }
From source file:org.apache.jena.query.text.assembler.Params.java
License:Apache License
protected static ParamSpec getParamSpec(Resource node) { Statement nameStmt = node.getProperty(TextVocab.pParamName); Statement valueStmt = node.getProperty(TextVocab.pParamValue); String name = getStringValue(nameStmt); String type = getType(node);/*from w ww . j a v a 2 s . com*/ String value = getStringValue(valueStmt); switch (type) { // String case TYPE_STRING: { if (value == null) { throw new TextIndexException("Value for string param: " + name + " must not be empty!"); } return new ParamSpec(name, value, String.class); } // java.io.FileReader case TYPE_FILE: { if (value == null) { throw new TextIndexException("Value for file param must exist and must contain a file name."); } try { // The analyzer is responsible for closing the file Reader fileReader = new java.io.FileReader(value); return new ParamSpec(name, fileReader, Reader.class); } catch (java.io.FileNotFoundException ex) { throw new TextIndexException("File " + value + " for param " + name + " not found!"); } } // org.apache.lucene.analysis.util.CharArraySet case TYPE_SET: { if (valueStmt == null) { throw new TextIndexException("A set param spec must have a text:paramValue:" + node); } RDFNode valueNode = valueStmt.getObject(); if (!valueNode.isResource()) { throw new TextIndexException( "A set param spec text:paramValue must be a list of strings: " + valueNode); } List<String> values = toStrings((Resource) valueNode); return new ParamSpec(name, new CharArraySet(values, false), CharArraySet.class); } // int case TYPE_INT: if (value == null) { throw new TextIndexException("Value for int param: " + name + " must not be empty!"); } int n = ((Literal) valueStmt.getObject()).getInt(); return new ParamSpec(name, n, int.class); // boolean case TYPE_BOOL: if (value == null) { throw new TextIndexException("Value for boolean param: " + name + " must not be empty!"); } boolean b = ((Literal) valueStmt.getObject()).getBoolean(); return new ParamSpec(name, b, boolean.class); // org.apache.lucene.analysis.Analyzer case TYPE_ANALYZER: if (valueStmt == null) { throw new TextIndexException("Analyzer param spec must have a text:paramValue:" + node); } RDFNode valueNode = valueStmt.getObject(); if (!valueNode.isResource()) { throw new TextIndexException( "Analyzer param spec text:paramValue must be an analyzer spec resource: " + valueNode); } Analyzer analyzer = (Analyzer) Assembler.general.open((Resource) valueNode); return new ParamSpec(name, analyzer, Analyzer.class); default: // there was no match Log.error("org.apache.jena.query.text.assembler.Params", "Unknown parameter type: " + type + " for param: " + name + " with value: " + value); break; } return null; }
From source file:org.apache.jena.query.text.assembler.StandardAnalyzerAssembler.java
License:Apache License
private CharArraySet toCharArraySet(Resource list) { return new CharArraySet(toList(list), false); }
From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java
License:Apache License
/** * An empty white list means that the default behaviour of the Lucene's ASCIIFoldingFilter applies. * @throws IOException from Lucene API//from www . ja va2 s.c om */ @Test public void testEmptyWhiteListIsOkay() throws IOException { whitelisted = new CharArraySet(Collections.emptyList(), false); List<String> tokens = collectTokens(inputText, whitelisted); List<String> expected = Arrays.asList("Senora", "Siobhan", "look", "at", "that", "facade"); assertTrue(tokens.equals(expected)); }
From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java
License:Apache License
@Test public void testSingleCharacterWhiteListed() throws IOException { whitelisted = new CharArraySet(Arrays.asList(""), false); List<String> tokens = collectTokens(inputText, whitelisted); List<String> expected = Arrays.asList("Senora", "Siobhan", "look", "at", "that", "faade"); assertTrue(tokens.equals(expected)); }
From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java
License:Apache License
@Test public void testCompleteWhiteListed() throws IOException { whitelisted = new CharArraySet(Arrays.asList("", "", ""), false); List<String> tokens = collectTokens(inputText, whitelisted); // here we should have the complete input List<String> expected = Arrays.asList("Seora", "Siobhn", "look", "at", "that", "faade"); assertTrue(tokens.equals(expected)); }
From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java
License:Apache License
@Test public void testCaseMatters() throws IOException { // note the first capital letter whitelisted = new CharArraySet(Arrays.asList("", "", ""), false); List<String> tokens = collectTokens(inputText, whitelisted); List<String> expected = Arrays.asList("Senora", "Siobhn", "look", "at", "that", "faade"); assertTrue(tokens.equals(expected)); }
From source file:org.apache.jena.query.text.filter.TestSelectiveFoldingFilter.java
License:Apache License
@Test public void testMismatchWhiteList() throws IOException { whitelisted = new CharArraySet(Arrays.asList("", "", ""), false); List<String> tokens = collectTokens(inputText, whitelisted); List<String> expected = Arrays.asList("Senora", "Siobhan", "look", "at", "that", "facade"); assertTrue(tokens.equals(expected)); }