Example usage for org.apache.lucene.analysis TokenStream reset

List of usage examples for org.apache.lucene.analysis TokenStream reset

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream reset.

Prototype

public void reset() throws IOException 

Source Link

Document

This method is called by a consumer before it begins consumption using #incrementToken() .

Usage

From source file:org.opengrok.web.api.v1.suggester.query.SuggesterQueryParser.java

License:Open Source License

private static List<String> getAllTokens(final Analyzer analyzer, final String field, final String text) {
    List<String> tokens = new LinkedList<>();

    TokenStream ts = null;
    try {/*from  w  w  w .j a v a2s .  c om*/
        ts = analyzer.tokenStream(field, text);

        CharTermAttribute attr = ts.addAttribute(CharTermAttribute.class);

        ts.reset();
        while (ts.incrementToken()) {
            tokens.add(attr.toString());
        }
    } catch (IOException e) {
        logger.log(Level.WARNING, "Could not analyze query text", e);
    } finally {
        try {
            if (ts != null) {
                ts.end();
                ts.close();
            }
        } catch (IOException e) {
            logger.log(Level.WARNING, "Could not close token stream", e);
        }
    }

    return tokens;
}

From source file:org.opensextant.solrtexttagger.Tagger.java

License:Open Source License

public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream, TagClusterReducer tagClusterReducer,
        boolean skipAltTokens, boolean ignoreStopWords) throws IOException {
    this.terms = terms;
    this.liveDocs = liveDocs;
    this.tokenStream = tokenStream;
    this.skipAltTokens = skipAltTokens;
    this.ignoreStopWords = ignoreStopWords;
    //    termAtt = tokenStream.addAttribute(CharTermAttribute.class);
    byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class);
    posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
    offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
    lookupAtt = tokenStream.addAttribute(TaggingAttribute.class);
    tokenStream.reset();

    this.tagClusterReducer = tagClusterReducer;
}

From source file:org.pageseeder.flint.lucene.query.Queries.java

License:Apache License

/**
 * Returns the terms for a field//from   ww  w  .  ja va 2s  .  c  om
 *
 * @param field    The field
 * @param text     The text to analyze
 * @param analyzer The analyzer
 *
 * @return the corresponding list of terms produced by the analyzer.
 *
 * @throws IOException
 */
private static void addTermsToPhrase(String field, String text, Analyzer analyzer, PhraseQuery phrase) {
    try {
        TokenStream stream = analyzer.tokenStream(field, text);
        PositionIncrementAttribute increment = stream.addAttribute(PositionIncrementAttribute.class);
        CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
        int position = -1;
        stream.reset();
        while (stream.incrementToken()) {
            position += increment.getPositionIncrement();
            Term term = new Term(field, attribute.toString());
            phrase.add(term, position);
        }
        stream.end();
        stream.close();
    } catch (IOException ex) {
        // Should not occur since we use a StringReader
        ex.printStackTrace();
    }
}

From source file:org.pageseeder.flint.lucene.query.Queries.java

License:Apache License

private static boolean isTokenized(String field, Analyzer analyzer) {
    // try to load terms for a phrase and return true if more than one term
    TokenStream stream = null;
    try {//from   ww  w.  j a  v a  2  s. c  om
        stream = analyzer.tokenStream(field, "word1 word2");
        stream.reset();
        if (stream.incrementToken()) {
            return stream.incrementToken();
        }
    } catch (IOException ex) {
        // Should not occur since we use a StringReader
        ex.printStackTrace();
    } finally {
        if (stream != null)
            try {
                stream.end();
                stream.close();
            } catch (IOException ex) {
                // Should not occur since we use a StringReader
                ex.printStackTrace();
            }
    }
    return false;
}

From source file:org.pageseeder.flint.lucene.search.Fields.java

License:Apache License

/**
 * Returns the terms for a field/*from  www  .  j a  va  2  s  .c o m*/
 *
 * @param field    The field
 * @param text     The text to analyze
 * @param analyzer The analyzer
 *
 * @return the corresponding list of terms produced by the analyzer.
 *
 * @throws IOException
 */
public static List<String> toTerms(String field, String text, Analyzer analyzer) {
    List<String> terms = new ArrayList<String>();
    try {
        TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
        CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            String term = attribute.toString();
            terms.add(term);
        }
        stream.end();
        stream.close();
    } catch (IOException ex) {
        // Should not occur since we use a StringReader
        ex.printStackTrace();
    }
    return terms;
}

From source file:org.sc.probro.lucene.BiothesaurusSearcher.java

License:Apache License

public Query createQuery(String phrase) {

    phrase = phrase.trim();//from  w w w .j  a  v  a  2s  .  c o  m

    TermQuery idQuery = new TermQuery(new Term("protein-id", phrase.toUpperCase()));
    TermQuery accQuery = new TermQuery(new Term("accession", phrase.toUpperCase()));

    ArrayList<TermQuery> descQueries = new ArrayList<TermQuery>();
    ArrayList<Term> descTerms = new ArrayList<Term>();

    TokenStream stream = analyzer.tokenStream("description", new StringReader(phrase));
    TermAttribute attr = (TermAttribute) stream.getAttribute(TermAttribute.class);
    try {
        stream.reset();
        Term lastTerm = null;

        while (stream.incrementToken()) {
            Term t = new Term("description", attr.term());
            descQueries.add(new TermQuery(t));
            descTerms.add(t);

            if (lastTerm != null) {
                Term hyph = new Term("description", lastTerm.text() + "-" + t.text());
                descQueries.add(new TermQuery(hyph));
                //descTerms.add(hyph);
            }
            lastTerm = t;
        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    return createDisjunction(2.0f, createMust(idQuery), createMust(accQuery),
            //createShould(descQueries.toArray(new Query[0])));
            createDescendingQuery(descTerms.toArray(new Term[0])));
}

From source file:org.sc.probro.lucene.BiothesaurusSearcher.java

License:Apache License

public String[] tokenize(String input) {
    ArrayList<String> tokens = new ArrayList<String>();
    try {/*from www.j  av a 2 s  .co m*/
        TokenStream stream = analyzer.tokenStream(null, new StringReader(input));
        TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class);
        //stream = new LowerCaseFilter(stream);

        stream.reset();

        while (stream.incrementToken()) {
            if (stream.hasAttribute(TermAttribute.class)) {
                String term = termattr.term();
                tokens.add(term);
            }
        }

        stream.end();
        stream.close();

    } catch (IllegalArgumentException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace(System.err);
    } catch (IOException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace();
    }

    return tokens.toArray(new String[0]);
}

From source file:org.sc.probro.lucene.BiothesaurusSearcher.java

License:Apache License

public Query createPhraseQuery(String field, String phrase) throws IOException {
    PhraseQuery query = new PhraseQuery();
    /*//  ww  w . j av  a  2 s  . com
    String[] array = phrase.split("\\s+");
    for(int i = 0; i < array.length; i++) { 
       query.add(new Term(field, array[i]));
    }
    */

    try {
        TokenStream stream = analyzer.tokenStream(field, new StringReader(phrase));
        //stream = new LowerCaseFilter(stream);

        stream.reset();

        while (stream.incrementToken()) {
            if (stream.hasAttribute(TermAttribute.class)) {
                TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class);
                Term t = new Term(field, termattr.term());
                query.add(t);
            }
        }

        stream.end();
        stream.close();

    } catch (IllegalArgumentException e) {
        e.printStackTrace(System.err);
        System.err.println(String.format("Phrase: \"%s\"", phrase));
    }

    return query;
}

From source file:org.sc.probro.lucene.ProteinSearcher.java

License:Apache License

public String[] tokenize(String input) {
    ArrayList<String> tokens = new ArrayList<String>();
    try {/*from w  ww .  j  a  v a 2s.  com*/
        TokenStream stream = analyzer.tokenStream(null, new StringReader(input));
        stream = new LowerCaseFilter(stream);

        stream.reset();

        while (stream.incrementToken()) {
            if (stream.hasAttribute(TermAttribute.class)) {
                TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class);
                String term = termattr.term();
                tokens.add(term);
            }
        }

        stream.end();
        stream.close();

    } catch (IllegalArgumentException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace(System.err);
    } catch (IOException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace();
    }

    return tokens.toArray(new String[0]);
}

From source file:org.sc.probro.lucene.ProteinSearcher.java

License:Apache License

public Query createPhraseQuery(String field, String phrase) throws IOException {
    PhraseQuery query = new PhraseQuery();
    /*/*w  ww .  ja va  2  s.  c om*/
    String[] array = phrase.split("\\s+");
    for(int i = 0; i < array.length; i++) { 
       query.add(new Term(field, array[i]));
    }
    */

    try {
        TokenStream stream = analyzer.tokenStream(field, new StringReader(phrase));
        stream = new LowerCaseFilter(stream);

        stream.reset();

        while (stream.incrementToken()) {
            if (stream.hasAttribute(TermAttribute.class)) {
                TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class);
                Term t = new Term(field, termattr.term());
                query.add(t);
            }
        }

        stream.end();
        stream.close();

    } catch (IllegalArgumentException e) {
        e.printStackTrace(System.err);
        System.err.println(String.format("Phrase: \"%s\"", phrase));
    }

    return query;
}