Example usage for org.apache.lucene.analysis TokenStream close

List of usage examples for org.apache.lucene.analysis TokenStream close

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

Releases resources associated with this stream.

Usage

From source file:org.pageseeder.flint.lucene.search.Fields.java

License:Apache License

/**
 * Returns the terms for a field/*  w  w w .  j  a va2  s. co m*/
 *
 * @param field    The field
 * @param text     The text to analyze
 * @param analyzer The analyzer
 *
 * @return the corresponding list of terms produced by the analyzer.
 *
 * @throws IOException
 */
public static List<String> toTerms(String field, String text, Analyzer analyzer) {
    List<String> terms = new ArrayList<String>();
    try {
        TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
        CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
        stream.reset();
        while (stream.incrementToken()) {
            String term = attribute.toString();
            terms.add(term);
        }
        stream.end();
        stream.close();
    } catch (IOException ex) {
        // Should not occur since we use a StringReader
        ex.printStackTrace();
    }
    return terms;
}

From source file:org.riotfamily.search.SimpleSearchQueryParser.java

License:Apache License

protected List<Token> getTokens(String text) {
    ArrayList<Token> tokens = Generics.newArrayList();
    try {//w  ww .jav  a2s .  com
        TokenStream source = analyzer.tokenStream(null, new StringReader(text));
        Token token;
        while ((token = source.next()) != null) {
            tokens.add(token);
        }
        source.close();
    } catch (IOException e) {
    }
    return tokens;
}

From source file:org.sc.probro.lucene.BiothesaurusSearcher.java

License:Apache License

public String[] tokenize(String input) {
    ArrayList<String> tokens = new ArrayList<String>();
    try {/*from  w w w  .j a va 2s  . c o  m*/
        TokenStream stream = analyzer.tokenStream(null, new StringReader(input));
        TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class);
        //stream = new LowerCaseFilter(stream);

        stream.reset();

        while (stream.incrementToken()) {
            if (stream.hasAttribute(TermAttribute.class)) {
                String term = termattr.term();
                tokens.add(term);
            }
        }

        stream.end();
        stream.close();

    } catch (IllegalArgumentException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace(System.err);
    } catch (IOException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace();
    }

    return tokens.toArray(new String[0]);
}

From source file:org.sc.probro.lucene.BiothesaurusSearcher.java

License:Apache License

public Query createPhraseQuery(String field, String phrase) throws IOException {
    PhraseQuery query = new PhraseQuery();
    /*/*from  w ww . ja  va 2 s.c o  m*/
    String[] array = phrase.split("\\s+");
    for(int i = 0; i < array.length; i++) { 
       query.add(new Term(field, array[i]));
    }
    */

    try {
        TokenStream stream = analyzer.tokenStream(field, new StringReader(phrase));
        //stream = new LowerCaseFilter(stream);

        stream.reset();

        while (stream.incrementToken()) {
            if (stream.hasAttribute(TermAttribute.class)) {
                TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class);
                Term t = new Term(field, termattr.term());
                query.add(t);
            }
        }

        stream.end();
        stream.close();

    } catch (IllegalArgumentException e) {
        e.printStackTrace(System.err);
        System.err.println(String.format("Phrase: \"%s\"", phrase));
    }

    return query;
}

From source file:org.sc.probro.lucene.ProteinSearcher.java

License:Apache License

public String[] tokenize(String input) {
    ArrayList<String> tokens = new ArrayList<String>();
    try {//from w ww.  j  a  v a 2 s .  c o m
        TokenStream stream = analyzer.tokenStream(null, new StringReader(input));
        stream = new LowerCaseFilter(stream);

        stream.reset();

        while (stream.incrementToken()) {
            if (stream.hasAttribute(TermAttribute.class)) {
                TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class);
                String term = termattr.term();
                tokens.add(term);
            }
        }

        stream.end();
        stream.close();

    } catch (IllegalArgumentException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace(System.err);
    } catch (IOException e) {
        System.err.println(String.format("Phrase: \"%s\"", input));
        e.printStackTrace();
    }

    return tokens.toArray(new String[0]);
}

From source file:org.sc.probro.lucene.ProteinSearcher.java

License:Apache License

public Query createPhraseQuery(String field, String phrase) throws IOException {
    PhraseQuery query = new PhraseQuery();
    /*/*from  ww  w . j  av  a2 s.  c  om*/
    String[] array = phrase.split("\\s+");
    for(int i = 0; i < array.length; i++) { 
       query.add(new Term(field, array[i]));
    }
    */

    try {
        TokenStream stream = analyzer.tokenStream(field, new StringReader(phrase));
        stream = new LowerCaseFilter(stream);

        stream.reset();

        while (stream.incrementToken()) {
            if (stream.hasAttribute(TermAttribute.class)) {
                TermAttribute termattr = (TermAttribute) stream.getAttribute(TermAttribute.class);
                Term t = new Term(field, termattr.term());
                query.add(t);
            }
        }

        stream.end();
        stream.close();

    } catch (IllegalArgumentException e) {
        e.printStackTrace(System.err);
        System.err.println(String.format("Phrase: \"%s\"", phrase));
    }

    return query;
}

From source file:org.sd.text.lucene.LuceneUtils.java

License:Open Source License

/**
 * Split the string into tokens using the given analyzer.
 *//* ww w .j a va 2  s.c o  m*/
public static final List<String> getTokenTexts(Analyzer analyzer, String fieldName, String string) {
    if (string == null)
        return null;

    final List<String> result = new ArrayList<String>();

    if (analyzer != null) {
        final TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(string));

        try {
            while (tokenStream.incrementToken()) {
                if (tokenStream.hasAttribute(TermAttribute.class)) {
                    final TermAttribute termAttribute = (TermAttribute) tokenStream
                            .getAttribute(TermAttribute.class);
                    result.add(termAttribute.term());
                }
            }
            tokenStream.close();
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
    } else {
        result.add(string);
    }

    return result;
}

From source file:org.sd.text.lucene.LuceneUtils.java

License:Open Source License

public static final List<List<String>> getPhraseTexts(Analyzer analyzer, String fieldName, String string) {
    if (string == null)
        return null;

    final List<List<String>> result = new LinkedList<List<String>>();
    List<String> curPhrase = new ArrayList<String>();
    result.add(curPhrase);/*from w  ww . j a v  a 2s  .c  o m*/

    if (analyzer != null) {
        final TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(string));
        int lastEndOffset = 0;

        try {
            while (tokenStream.incrementToken()) {
                boolean incPhrase = true;
                if (tokenStream.hasAttribute(OffsetAttribute.class)) {
                    final OffsetAttribute offsetAttribute = (OffsetAttribute) tokenStream
                            .getAttribute(OffsetAttribute.class);
                    if (offsetAttribute.startOffset() == lastEndOffset) {
                        incPhrase = false;
                    }
                    lastEndOffset = offsetAttribute.endOffset();
                }

                if (tokenStream.hasAttribute(TermAttribute.class)) {
                    final TermAttribute termAttribute = (TermAttribute) tokenStream
                            .getAttribute(TermAttribute.class);
                    if (incPhrase && curPhrase.size() > 0) {
                        curPhrase = new ArrayList<String>();
                        result.add(curPhrase);
                    }

                    curPhrase.add(termAttribute.term());
                }
            }
            tokenStream.close();
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
    } else {
        curPhrase.add(string);
    }

    return result;
}

From source file:org.sindice.siren.analysis.TestTupleAnalyzer.java

License:Apache License

public void assertAnalyzesTo(final Analyzer a, final String input, final String[] expectedImages,
        final String[] expectedTypes, final int[] expectedPosIncrs, final int[] expectedTupleID,
        final int[] expectedCellID) throws Exception {
    final TokenStream t = a.reusableTokenStream("", new StringReader(input));

    assertTrue("has TermAttribute", t.hasAttribute(TermAttribute.class));
    final TermAttribute termAtt = t.getAttribute(TermAttribute.class);

    TypeAttribute typeAtt = null;//from w ww.jav  a 2 s.  co m
    if (expectedTypes != null) {
        assertTrue("has TypeAttribute", t.hasAttribute(TypeAttribute.class));
        typeAtt = t.getAttribute(TypeAttribute.class);
    }

    PositionIncrementAttribute posIncrAtt = null;
    if (expectedPosIncrs != null) {
        assertTrue("has PositionIncrementAttribute", t.hasAttribute(PositionIncrementAttribute.class));
        posIncrAtt = t.getAttribute(PositionIncrementAttribute.class);
    }

    TupleAttribute tupleAtt = null;
    if (expectedTupleID != null) {
        assertTrue("has TupleAttribute", t.hasAttribute(TupleAttribute.class));
        tupleAtt = t.getAttribute(TupleAttribute.class);
    }

    CellAttribute cellAtt = null;
    if (expectedCellID != null) {
        assertTrue("has CellAttribute", t.hasAttribute(CellAttribute.class));
        cellAtt = t.getAttribute(CellAttribute.class);
    }

    for (int i = 0; i < expectedImages.length; i++) {

        assertTrue("token " + i + " exists", t.incrementToken());

        assertEquals(expectedImages[i], termAtt.term());

        if (expectedTypes != null) {
            assertEquals(expectedTypes[i], typeAtt.type());
        }

        if (expectedPosIncrs != null) {
            assertEquals(expectedPosIncrs[i], posIncrAtt.getPositionIncrement());
        }

        if (expectedTupleID != null) {
            assertEquals(expectedTupleID[i], tupleAtt.tuple());
        }

        if (expectedCellID != null) {
            assertEquals(expectedCellID[i], cellAtt.cell());
        }
    }

    assertFalse("end of stream", t.incrementToken());
    t.end();
    t.close();
}

From source file:org.sindice.siren.qparser.analysis.QNamesFilterTest.java

License:Apache License

@Test
public void testInvalidQName() throws Exception {
    final String query = "<http:> <foaf:2> <foaf:-qw>";
    final NTripleQueryAnalyzer analyzer = new NTripleQueryAnalyzer();
    final TokenStream stream = analyzer.tokenStream(null, new StringReader(query));
    final TokenFilter filter = new QNamesFilter(stream, "./src/test/resources/conf/qnames");

    final CupScannerWrapper wrapper = new CupScannerWrapper(filter);
    Symbol symbol = wrapper.next_token();
    assertTrue(symbol != null);//  ww w.  jav  a 2  s  .c  o m
    assertTrue(symbol.value.toString().equals("http:"));
    symbol = wrapper.next_token();
    assertTrue(symbol != null);
    assertTrue(symbol.value.toString().equals("foaf:2"));
    symbol = wrapper.next_token();
    assertTrue(symbol != null);
    assertTrue(symbol.value.toString().equals("foaf:-qw"));
    symbol = wrapper.next_token();
    assertTrue(symbol == null);
    stream.close();
}