Example usage for org.apache.lucene.analysis TokenStream close

List of usage examples for org.apache.lucene.analysis TokenStream close

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

Releases resources associated with this stream.

Usage

From source file:servlets.TermStatsComparator.java

String analyze(String query) {
    StringBuffer buff = new StringBuffer();
    try {//from  ww w .j a va  2  s  . c  o  m
        Analyzer analyzer = retriever.getAnalyzer();
        TokenStream stream = analyzer.tokenStream("dummy", new StringReader(query));
        CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            String term = termAtt.toString();
            buff.append(term);
            break;
        }
        stream.end();
        stream.close();
    } catch (Exception ex) {
        ex.printStackTrace();
        return query;
    }
    return buff.toString();
}

From source file:sh.isaac.provider.query.lucene.LuceneIndexer.java

License:Apache License

/**
 * Builds the prefix query.//from   www  .j  a  v  a  2 s. c om
 *
 * @param searchString the search string
 * @param field the field
 * @param analyzer the analyzer
 * @return the query
 * @throws IOException Signals that an I/O exception has occurred.
 */
protected Query buildPrefixQuery(String searchString, String field, Analyzer analyzer) throws IOException {
    final TokenStream tokenStream;
    final List<String> terms;
    try (StringReader textReader = new StringReader(searchString)) {
        tokenStream = analyzer.tokenStream(field, textReader);
        tokenStream.reset();
        terms = new ArrayList<>();
        final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
        while (tokenStream.incrementToken()) {
            terms.add(charTermAttribute.toString());
        }
    }
    tokenStream.close();
    analyzer.close();

    final BooleanQuery.Builder bq = new BooleanQuery.Builder();

    if ((terms.size() > 0) && !searchString.endsWith(" ")) {
        final String last = terms.remove(terms.size() - 1);

        bq.add(new PrefixQuery((new Term(field, last))), Occur.MUST);
    }

    terms.stream().forEach((s) -> {
        bq.add(new TermQuery(new Term(field, s)), Occur.MUST);
    });
    return bq.build();
}

From source file:stackoverflow.lucene.modified.MoreLikeThis.java

License:Apache License

/**
 * Adds term frequencies found by tokenizing text from reader into the Map words
 *
 * @param r a source of text to be tokenized
 * @param termFreqMap a Map of terms and their frequencies
 * @param fieldName Used by analyzer for any special per-field analysis
 *///from   w  ww. ja va 2 s.c o m
private void addTermFrequencies(Reader r, Map<String, Int> termFreqMap, String fieldName) throws IOException {
    if (analyzer == null) {
        throw new UnsupportedOperationException(
                "To use MoreLikeThis without " + "term vectors, you must provide an Analyzer");
    }
    TokenStream ts = analyzer.tokenStream(fieldName, r);
    int tokenCount = 0;
    // for every token
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    while (ts.incrementToken()) {
        String word = termAtt.toString();
        tokenCount++;
        if (tokenCount > maxNumTokensParsed) {
            break;
        }
        if (isNoiseWord(word)) {
            continue;
        }

        // increment frequency
        Int cnt = termFreqMap.get(word);
        if (cnt == null) {
            termFreqMap.put(word, new Int());
        } else {
            cnt.x++;
        }
    }
    ts.end();
    ts.close();
}

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokens(TokenStream stream) throws IOException {
    stream.reset();/*  ww  w.j a  v a 2s.  c o m*/
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    while (stream.incrementToken()) {
        System.out.print("[" + term + "] "); //B
    }
    stream.close();
}

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", new StringReader(text));
    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
    stream.reset();//from  w w w.  ja  v a 2 s. co  m
    int position = 0;
    while (stream.incrementToken()) {
        int increment = posIncr.getPositionIncrement();
        if (increment > 0) {
            position = position + increment;
            System.out.println();
            System.out.print(position + ": ");
        }

        System.out.print("[" + term + "] ");
    }
    System.out.println();
    stream.close();
}

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException {

    TokenStream stream = analyzer.tokenStream("contents", // #A
            new StringReader(text));

    CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); // #B
    PositionIncrementAttribute posIncr = // #B 
            stream.addAttribute(PositionIncrementAttribute.class); // #B
    OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); // #B
    TypeAttribute type = stream.addAttribute(TypeAttribute.class); // #B

    stream.reset();// w  ww.  j a v  a2  s  . com
    int position = 0;
    while (stream.incrementToken()) { // #C

        int increment = posIncr.getPositionIncrement(); // #D
        if (increment > 0) { // #D
            position = position + increment; // #D
            System.out.println(); // #D
            System.out.print(position + ": "); // #D
        }

        System.out.print("[" + // #E
                term + ":" + // #E
                offset.startOffset() + "->" + // #E
                offset.endOffset() + ":" + // #E
                type.type() + "] "); // #E
    }
    System.out.println();
    stream.close();
}

From source file:test.analysis.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
    for (String expected : output) {
        assertTrue(stream.incrementToken());
        assertEquals(expected, termAttr.toString());
    }/* w  w w  . ja v a2s  .  c om*/
    assertFalse(stream.incrementToken());
    stream.close();
}

From source file:test.AnalzyerDemo.java

License:Apache License

public static void main(String[] args) {
    Analyzer analyzer = new BaseAnalyzer();
    // Analyzer analyzer = new org.apache.lucene.analysis.cjk.CJKAnalyzer();
    // ?LuceneTokenStream
    TokenStream ts = null;
    try {//from www. ja  v a 2 s.  co m
        ts = analyzer.tokenStream("myfield", new StringReader(
                "????????????????2?3noneok???BaseAnalyer can analysis english text too"));
        // ???
        OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
        // ??
        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        // ??
        TypeAttribute type = ts.addAttribute(TypeAttribute.class);
        // ?TokenStream?StringReader
        ts.reset();
        // ??
        while (ts.incrementToken()) {
            System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString()
                    + " | " + type.type());
        }
        // TokenStreamStringReader
        ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
    } catch (IOException e) {
        e.printStackTrace();
        analyzer.close();
    } finally {
        // TokenStream?
        if (ts != null) {
            try {
                ts.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:text_analyzer.AnalyzerUtils.java

License:Apache License

public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception {
    TokenStream stream = analyzer.tokenStream("field", new StringReader(input));

    TermAttribute termAttr = stream.addAttribute(TermAttribute.class);
    for (String expected : output) {
        //Assert.assertTrue(stream.incrementToken());
        //Assert.assertEquals(expected, termAttr.term());
    }//from  ww  w. j a  v  a2s  . c  om
    //Assert.assertFalse(stream.incrementToken());
    stream.close();
}

From source file:tweetembeding.AnalyzerClass.java

public String analizeString(String FIELD, String txt) throws IOException {
    this.analyzer = setAnalyzer();
    TokenStream stream = analyzer.tokenStream(FIELD, new StringReader(txt));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();/*from w w  w .  j  a v  a2s  .c  o m*/

    StringBuffer tokenizedContentBuff = new StringBuffer();
    while (stream.incrementToken()) {
        String term = termAtt.toString();
        if (!term.equals("nbsp"))
            tokenizedContentBuff.append(term).append(" ");
    }

    stream.end();
    stream.close();

    return tokenizedContentBuff.toString();
}