Example usage for org.apache.lucene.util IOUtils getDecodingReader

List of usage examples for org.apache.lucene.util IOUtils getDecodingReader

Introduction

In this page you can find the example usage for org.apache.lucene.util IOUtils getDecodingReader.

Prototype

public static Reader getDecodingReader(InputStream stream, Charset charSet) 

Source Link

Document

Wrapping the given InputStream in a reader using a CharsetDecoder .

Usage

From source file:dk.defxws.fgslucene.PhaidraAnalyzer.java

License:Apache License

/** Builds an analyzer with the stop words from the given file.
 * @see WordlistLoader#getWordSet(Reader, Version)
 * @param matchVersion Lucene version to match See {@link
 * <a href="#version">above</a>}
 * @param stopwords File to read stop words from */
public PhaidraAnalyzer(Version matchVersion, File stopwords) throws IOException {
    this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8),
            matchVersion));//from ww  w.j  av  a2s. c o m
}

From source file:edu.ur.lucene.analysis.StandardWithACIIFoldingFilter.java

License:Apache License

/** Builds an analyzer with the stop words from the given file.
 * @see WordlistLoader#getWordSet(Reader, Version)
 * @param matchVersion Lucene version to match See {@link
 * <a href="#version">above</a>}
 * @param stopwords File to read stop words from */
public StandardWithACIIFoldingFilter(Version matchVersion, File stopwords) throws IOException {
    this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8),
            matchVersion));/*from w  w w. j av a2 s . c  o m*/
}

From source file:lucenejavafx.CustomAnalyzer.java

License:Apache License

/** Builds an analyzer with the stop words from the given file.
 * @see WordlistLoader#getWordSet(Reader, Version)
 * @param matchVersion Lucene version to match See {@link
 * <a href="#version">above</a>}
 * @param stopwords File to read stop words from 
 * @deprecated Use {@link #MyAnalyzerPrtSt(Version, Reader)} instead. 
 *///from  w  ww.  j a v  a 2 s  .  co  m
@Deprecated
public CustomAnalyzer(Version matchVersion, File stopwords) throws IOException {
    this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8),
            matchVersion));
}

From source file:mj.ocraptor.database.StandardAnalyzer.java

License:Apache License

/**
 * Builds an analyzer with the stop words from the given file.
 *
 * @see WordlistLoader#getWordSet(Reader, Version)
 * @param matchVersion//ww  w .j a  va2 s.  c om
 *          Lucene version to match See {@link <a href="#version">above</a>}
 * @param stopwords
 *          File to read stop words from
 * @deprecated Use {@link #StandardAnalyzer(Version, Reader)} instead.
 */
@Deprecated
public StandardAnalyzer(Version matchVersion, File stopwords) throws IOException {
    this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8),
            matchVersion));
}

From source file:org.apache.solr.handler.extraction.RegexRulesPasswordProvider.java

License:Apache License

/**
 * Parses rule file from stream and returns a Map of all rules found
 * @param is input stream for the file// ww  w . j  a v  a  2 s .c  om
 */
public static LinkedHashMap<Pattern, String> parseRulesFile(InputStream is) {
    LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>();
    BufferedReader br = new BufferedReader(IOUtils.getDecodingReader(is, IOUtils.CHARSET_UTF_8));
    String line;
    try {
        int linenum = 0;
        while ((line = br.readLine()) != null) {
            linenum++;
            // Remove comments
            String[] arr = line.split("#");
            if (arr.length > 0)
                line = arr[0].trim();
            if (line.length() == 0)
                continue;
            int sep = line.indexOf("=");
            if (sep <= 0) {
                log.warn("Wrong format of password line " + linenum);
                continue;
            }
            String pass = line.substring(sep + 1).trim();
            String regex = line.substring(0, sep).trim();
            try {
                Pattern pattern = Pattern.compile(regex);
                rules.put(pattern, pass);
            } catch (PatternSyntaxException pse) {
                log.warn("Key of line " + linenum + " was not a valid regex pattern", pse);
                continue;
            }
        }
        is.close();
    } catch (IOException e) {
        throw new RuntimeException();
    }
    return rules;
}

From source file:org.elasticsearch.analysis.hunspell.TestStemming.java

License:Apache License

public void test() throws Exception {
    LineNumberReader reader = new LineNumberReader(IOUtils.getDecodingReader(
            getClass().getResourceAsStream("/stemming-data/" + language + ".txt"), StandardCharsets.UTF_8));
    dictionaryStream = getClass().getResourceAsStream("/" + language + "/" + language + ".dic");
    affixStream = getClass().getResourceAsStream("/" + language + "/" + language + ".aff");
    final Dictionary dictionary = new Dictionary(affixStream, dictionaryStream);
    Analyzer analyzer = new Analyzer() {
        @Override//from  w  w w .  j a v  a  2  s . c om
        protected TokenStreamComponents createComponents(String field) {
            MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.KEYWORD, false);
            HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, false);
            return new TokenStreamComponents(tokenizer, filter);
        }
    };
    String line = null;
    while ((line = reader.readLine()) != null) {
        int comment = line.indexOf('#');
        if (comment >= 0) {
            line = line.substring(0, comment);
        }
        line = line.trim();
        if (line.isEmpty()) {
            continue;
        }
        String elements[] = line.split("\\s+");
        if (elements.length != 2) {
            throw new RuntimeException("Illegal number of elements in line: " + reader.getLineNumber());
        }
        String input = elements[0];
        String outputs[] = elements[1].split(",");
        compareStems(analyzer, input, outputs, reader.getLineNumber());
    }
    analyzer.close();
    reader.close();
}

From source file:org.elasticsearch.index.analysis.SkroutzGreekStemmer.java

License:Apache License

/**
   * Creates a CharArraySet from a file.
   */*  w  ww  .j  a v a 2s  .  com*/
   * @param stopwords
   *          Input stream from the stopwords file
   *
   * @param matchVersion
   *          the Lucene version for cross version compatibility
   * @return a CharArraySet containing the distinct stopwords from the given
   *         file
   * @throws IOException
   *           if loading the stopwords throws an {@link IOException}
   */
  private static CharArraySet loadStopwordSet(InputStream stopwords, Version matchVersion) throws IOException {
      Reader reader = null;
      try {
          reader = IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8);
          return WordlistLoader.getWordSet(reader, matchVersion);
      } finally {
          IOUtils.close(reader);
      }
  }

From source file:ru.uiiiii.ssearchm.indexing.StandardAnalyzerJava.java

License:Apache License

/** Builds an analyzer with the stop words from the given file.
 * @see WordlistLoader#getWordSet(Reader, Version)
 * @param matchVersion Lucene version to match See {@link
 * <a href="#version">above</a>}
 * @param stopwords File to read stop words from 
 * @deprecated Use {@link #StandardAnalyzer(Version, Reader)} instead. 
 *///from  w  w  w  . j  a v a 2 s.c om
@Deprecated
public StandardAnalyzerJava(Version matchVersion, File stopwords) throws IOException {
    this(matchVersion, WordlistLoader.getWordSet(IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8),
            matchVersion));
}