List of usage examples for org.apache.lucene.util IOUtils getDecodingReader
public static Reader getDecodingReader(InputStream stream, Charset charSet)
From source file:dk.defxws.fgslucene.PhaidraAnalyzer.java
License:Apache License
/** Builds an analyzer with the stop words from the given file. * @see WordlistLoader#getWordSet(Reader, Version) * @param matchVersion Lucene version to match See {@link * <a href="#version">above</a>} * @param stopwords File to read stop words from */ public PhaidraAnalyzer(Version matchVersion, File stopwords) throws IOException { this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8), matchVersion));//from ww w.j av a2s. c o m }
From source file:edu.ur.lucene.analysis.StandardWithACIIFoldingFilter.java
License:Apache License
/** Builds an analyzer with the stop words from the given file. * @see WordlistLoader#getWordSet(Reader, Version) * @param matchVersion Lucene version to match See {@link * <a href="#version">above</a>} * @param stopwords File to read stop words from */ public StandardWithACIIFoldingFilter(Version matchVersion, File stopwords) throws IOException { this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8), matchVersion));/*from w w w. j av a2 s . c o m*/ }
From source file:lucenejavafx.CustomAnalyzer.java
License:Apache License
/** Builds an analyzer with the stop words from the given file. * @see WordlistLoader#getWordSet(Reader, Version) * @param matchVersion Lucene version to match See {@link * <a href="#version">above</a>} * @param stopwords File to read stop words from * @deprecated Use {@link #MyAnalyzerPrtSt(Version, Reader)} instead. *///from w ww. j a v a 2 s . co m @Deprecated public CustomAnalyzer(Version matchVersion, File stopwords) throws IOException { this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8), matchVersion)); }
From source file:mj.ocraptor.database.StandardAnalyzer.java
License:Apache License
/** * Builds an analyzer with the stop words from the given file. * * @see WordlistLoader#getWordSet(Reader, Version) * @param matchVersion//ww w .j a va2 s. c om * Lucene version to match See {@link <a href="#version">above</a>} * @param stopwords * File to read stop words from * @deprecated Use {@link #StandardAnalyzer(Version, Reader)} instead. */ @Deprecated public StandardAnalyzer(Version matchVersion, File stopwords) throws IOException { this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8), matchVersion)); }
From source file:org.apache.solr.handler.extraction.RegexRulesPasswordProvider.java
License:Apache License
/** * Parses rule file from stream and returns a Map of all rules found * @param is input stream for the file// ww w . j a v a 2 s .c om */ public static LinkedHashMap<Pattern, String> parseRulesFile(InputStream is) { LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>(); BufferedReader br = new BufferedReader(IOUtils.getDecodingReader(is, IOUtils.CHARSET_UTF_8)); String line; try { int linenum = 0; while ((line = br.readLine()) != null) { linenum++; // Remove comments String[] arr = line.split("#"); if (arr.length > 0) line = arr[0].trim(); if (line.length() == 0) continue; int sep = line.indexOf("="); if (sep <= 0) { log.warn("Wrong format of password line " + linenum); continue; } String pass = line.substring(sep + 1).trim(); String regex = line.substring(0, sep).trim(); try { Pattern pattern = Pattern.compile(regex); rules.put(pattern, pass); } catch (PatternSyntaxException pse) { log.warn("Key of line " + linenum + " was not a valid regex pattern", pse); continue; } } is.close(); } catch (IOException e) { throw new RuntimeException(); } return rules; }
From source file:org.elasticsearch.analysis.hunspell.TestStemming.java
License:Apache License
public void test() throws Exception { LineNumberReader reader = new LineNumberReader(IOUtils.getDecodingReader( getClass().getResourceAsStream("/stemming-data/" + language + ".txt"), StandardCharsets.UTF_8)); dictionaryStream = getClass().getResourceAsStream("/" + language + "/" + language + ".dic"); affixStream = getClass().getResourceAsStream("/" + language + "/" + language + ".aff"); final Dictionary dictionary = new Dictionary(affixStream, dictionaryStream); Analyzer analyzer = new Analyzer() { @Override//from w w w . j a v a 2 s . c om protected TokenStreamComponents createComponents(String field) { MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false); HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, false); return new TokenStreamComponents(tokenizer, filter); } }; String line = null; while ((line = reader.readLine()) != null) { int comment = line.indexOf('#'); if (comment >= 0) { line = line.substring(0, comment); } line = line.trim(); if (line.isEmpty()) { continue; } String elements[] = line.split("\\s+"); if (elements.length != 2) { throw new RuntimeException("Illegal number of elements in line: " + reader.getLineNumber()); } String input = elements[0]; String outputs[] = elements[1].split(","); compareStems(analyzer, input, outputs, reader.getLineNumber()); } analyzer.close(); reader.close(); }
From source file:org.elasticsearch.index.analysis.SkroutzGreekStemmer.java
License:Apache License
/** * Creates a CharArraySet from a file. */* w ww .j a v a 2s . com*/ * @param stopwords * Input stream from the stopwords file * * @param matchVersion * the Lucene version for cross version compatibility * @return a CharArraySet containing the distinct stopwords from the given * file * @throws IOException * if loading the stopwords throws an {@link IOException} */ private static CharArraySet loadStopwordSet(InputStream stopwords, Version matchVersion) throws IOException { Reader reader = null; try { reader = IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8); return WordlistLoader.getWordSet(reader, matchVersion); } finally { IOUtils.close(reader); } }
From source file:ru.uiiiii.ssearchm.indexing.StandardAnalyzerJava.java
License:Apache License
/** Builds an analyzer with the stop words from the given file. * @see WordlistLoader#getWordSet(Reader, Version) * @param matchVersion Lucene version to match See {@link * <a href="#version">above</a>} * @param stopwords File to read stop words from * @deprecated Use {@link #StandardAnalyzer(Version, Reader)} instead. *///from w w w . j a v a 2 s.c om @Deprecated public StandardAnalyzerJava(Version matchVersion, File stopwords) throws IOException { this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8), matchVersion)); }