Example usage for org.apache.lucene.analysis TokenStream close

List of usage examples for org.apache.lucene.analysis TokenStream close

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

Releases resources associated with this stream.

Usage

From source file:org.LexGrid.LexBIG.Impl.Extensions.GenericExtensions.search.SearchExtensionImpl.java

License:Open Source License

public List<String> tokenize(Analyzer analyzer, String field, String keywords) throws IOException {
    List<String> result = new ArrayList<String>();
    StringReader reader = new StringReader(keywords);
    TokenStream stream = analyzer.tokenStream(field, reader);
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    try {//from   w w  w .  j  a va  2  s  .c  o  m
        stream.reset();
        while (stream.incrementToken()) {
            result.add(termAtt.toString());
        }
        stream.close();
    } finally {
        stream.close();
    }
    return result;
}

From source file:org.meresco.lucene.analysis.MerescoStandardAnalyzer.java

License:Open Source License

public static List<String> readTokenStream(TokenStream tok) throws IOException {
    List<String> terms = new ArrayList<String>();
    CharTermAttribute termAtt = tok.addAttribute(CharTermAttribute.class);
    try {/*from   w  w  w .  ja va2 s  .  co  m*/
        tok.reset();
        while (tok.incrementToken()) {
            terms.add(termAtt.toString());
        }
        tok.end();
    } finally {
        tok.close();
    }
    return terms;
}

From source file:org.meresco.lucene.suggestion.SuggestionIndex.java

License:Open Source License

public List<String> shingles(String s) throws IOException {
    List<String> shingles = new ArrayList<String>();
    TokenStream stream = this.shingleAnalyzer.tokenStream("ignored", s);
    stream.reset();//from w w w. j  a v  a2  s. c  o m
    CharTermAttribute termAttribute = stream.getAttribute(CharTermAttribute.class);
    while (stream.incrementToken()) {
        shingles.add(termAttribute.toString());
    }
    stream.close();
    return shingles;
}

From source file:org.meresco.lucene.suggestion.SuggestionNGramIndex.java

License:Open Source License

public static List<String> ngrams(String s, Boolean trigram) throws IOException {
    List<String> ngram = new ArrayList<String>();
    Analyzer ngramAnalyzer = trigram ? TRIGRAM_ANALYZER : BIGRAM_ANALYZER;
    TokenStream stream = ngramAnalyzer.tokenStream("ignored", s);
    stream.reset();//w  w w.  j  a  v  a2  s.c om
    CharTermAttribute termAttribute = stream.getAttribute(CharTermAttribute.class);
    while (stream.incrementToken()) {
        ngram.add(termAttribute.toString());
    }
    stream.close();
    return ngram;
}

From source file:org.nuxeo.ecm.platform.categorization.categorizer.tfidf.TfIdfCategorizer.java

License:Open Source License

public List<String> tokenize(String textContent) {
    try {//from  w w w .jav a2  s .c om
        List<String> terms = new ArrayList<String>();
        TokenStream tokenStream = getAnalyzer().tokenStream(null, textContent);
        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            terms.add(charTermAttribute.toString());
        }
        tokenStream.end();
        tokenStream.close();
        return terms;
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}

From source file:org.omegat.tokenizer.BaseTokenizer.java

License:Open Source License

protected Token[] tokenize(final String strOrig, final boolean stemsAllowed, final boolean stopWordsAllowed,
        final boolean filterDigits, final boolean filterWhitespace) {
    if (StringUtil.isEmpty(strOrig)) {
        return EMPTY_TOKENS_LIST;
    }/*from ww  w  .j a va 2  s. com*/

    List<Token> result = new ArrayList<Token>(64);

    final TokenStream in = getTokenStream(strOrig, stemsAllowed, stopWordsAllowed);
    in.addAttribute(CharTermAttribute.class);
    in.addAttribute(OffsetAttribute.class);

    CharTermAttribute cattr = in.getAttribute(CharTermAttribute.class);
    OffsetAttribute off = in.getAttribute(OffsetAttribute.class);

    try {
        in.reset();
        while (in.incrementToken()) {
            String tokenText = cattr.toString();
            if (acceptToken(tokenText, filterDigits, filterWhitespace)) {
                result.add(new Token(tokenText, off.startOffset(), off.endOffset() - off.startOffset()));
            }
        }
        in.end();
        in.close();
    } catch (IOException ex) {
        // shouldn't happen
    }
    return result.toArray(new Token[result.size()]);
}

From source file:org.omegat.tokenizer.BaseTokenizer.java

License:Open Source License

protected String[] tokenizeToStrings(String str, boolean stemsAllowed, boolean stopWordsAllowed,
        boolean filterDigits, boolean filterWhitespace) {
    if (StringUtil.isEmpty(str)) {
        return EMPTY_STRING_LIST;
    }//w w w  .  j av  a2s  .  c o m

    List<String> result = new ArrayList<String>(64);

    final TokenStream in = getTokenStream(str, stemsAllowed, stopWordsAllowed);
    in.addAttribute(CharTermAttribute.class);
    in.addAttribute(OffsetAttribute.class);

    CharTermAttribute cattr = in.getAttribute(CharTermAttribute.class);
    OffsetAttribute off = in.getAttribute(OffsetAttribute.class);

    Locale loc = stemsAllowed ? getLanguage().getLocale() : null;

    try {
        in.reset();
        while (in.incrementToken()) {
            String tokenText = cattr.toString();
            if (acceptToken(tokenText, filterDigits, filterWhitespace)) {
                result.add(tokenText);
                if (stemsAllowed) {
                    String origText = str.substring(off.startOffset(), off.endOffset());
                    if (!origText.toLowerCase(loc).equals(tokenText.toLowerCase(loc))) {
                        result.add(origText);
                    }
                }
            }
        }
        in.end();
        in.close();
    } catch (IOException ex) {
        // shouldn't happen
    }
    return result.toArray(new String[result.size()]);
}

From source file:org.opengrok.web.api.v1.suggester.query.SuggesterQueryParser.java

License:Open Source License

private static List<String> getAllTokens(final Analyzer analyzer, final String field, final String text) {
    List<String> tokens = new LinkedList<>();

    TokenStream ts = null;
    try {/*from  w ww. j  a  v  a  2 s  .c  o  m*/
        ts = analyzer.tokenStream(field, text);

        CharTermAttribute attr = ts.addAttribute(CharTermAttribute.class);

        ts.reset();
        while (ts.incrementToken()) {
            tokens.add(attr.toString());
        }
    } catch (IOException e) {
        logger.log(Level.WARNING, "Could not analyze query text", e);
    } finally {
        try {
            if (ts != null) {
                ts.end();
                ts.close();
            }
        } catch (IOException e) {
            logger.log(Level.WARNING, "Could not close token stream", e);
        }
    }

    return tokens;
}

From source file:org.pageseeder.flint.lucene.query.Queries.java

License:Apache License

/**
 * Returns the terms for a field/*w  w w. j  a  v a  2  s . c  o m*/
 *
 * @param field    The field
 * @param text     The text to analyze
 * @param analyzer The analyzer
 *
 * @return the corresponding list of terms produced by the analyzer.
 *
 * @throws IOException
 */
private static void addTermsToPhrase(String field, String text, Analyzer analyzer, PhraseQuery phrase) {
    try {
        TokenStream stream = analyzer.tokenStream(field, text);
        PositionIncrementAttribute increment = stream.addAttribute(PositionIncrementAttribute.class);
        CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
        int position = -1;
        stream.reset();
        while (stream.incrementToken()) {
            position += increment.getPositionIncrement();
            Term term = new Term(field, attribute.toString());
            phrase.add(term, position);
        }
        stream.end();
        stream.close();
    } catch (IOException ex) {
        // Should not occur since we use a StringReader
        ex.printStackTrace();
    }
}

From source file:org.pageseeder.flint.lucene.query.Queries.java

License:Apache License

private static boolean isTokenized(String field, Analyzer analyzer) {
    // try to load terms for a phrase and return true if more than one term
    TokenStream stream = null;
    try {/*  ww w  . j  a va 2s. c om*/
        stream = analyzer.tokenStream(field, "word1 word2");
        stream.reset();
        if (stream.incrementToken()) {
            return stream.incrementToken();
        }
    } catch (IOException ex) {
        // Should not occur since we use a StringReader
        ex.printStackTrace();
    } finally {
        if (stream != null)
            try {
                stream.end();
                stream.close();
            } catch (IOException ex) {
                // Should not occur since we use a StringReader
                ex.printStackTrace();
            }
    }
    return false;
}