Example usage for org.apache.lucene.analysis TokenStream close

List of usage examples for org.apache.lucene.analysis TokenStream close

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

Releases resources associated with this stream.

Usage

From source file:org.elasticsearch.index.analysis.PatternTokenizerTests.java

License:Apache License

/** 
 * TODO: rewrite tests not to use string comparison.
 *///w w w .j  a  v a2 s  . c  o m
private static String tsToString(TokenStream in) throws IOException {
    StringBuilder out = new StringBuilder();
    CharTermAttribute termAtt = in.addAttribute(CharTermAttribute.class);
    // extra safety to enforce, that the state is not preserved and also
    // assign bogus values
    in.clearAttributes();
    termAtt.setEmpty().append("bogusTerm");
    in.reset();
    while (in.incrementToken()) {
        if (out.length() > 0)
            out.append(' ');
        out.append(termAtt.toString());
        in.clearAttributes();
        termAtt.setEmpty().append("bogusTerm");
    }

    in.close();
    return out.toString();
}

From source file:org.elasticsearch.index.analysis.PreBuiltAnalyzerTests.java

License:Apache License

@Test
public void testThatDefaultAndStandardAnalyzerChangedIn10Beta1() throws IOException {
    Analyzer currentStandardAnalyzer = PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_1_0_0_Beta1);
    Analyzer currentDefaultAnalyzer = PreBuiltAnalyzers.DEFAULT.getAnalyzer(Version.V_1_0_0_Beta1);

    // special case, these two are the same instance
    assertThat(currentDefaultAnalyzer, is(currentStandardAnalyzer));
    PreBuiltAnalyzers.DEFAULT.getAnalyzer(Version.V_1_0_0_Beta1);
    final int n = atLeast(10);
    Version version = Version.CURRENT;//from  w  w w . ja  va2 s  . c  om
    for (int i = 0; i < n; i++) {
        if (version.equals(Version.V_1_0_0_Beta1)) {
            assertThat(currentDefaultAnalyzer, is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version)));
        } else {
            assertThat(currentDefaultAnalyzer, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version))));
        }
        Analyzer analyzer = PreBuiltAnalyzers.DEFAULT.getAnalyzer(version);
        TokenStream ts = analyzer.tokenStream("foo", "This is it Dude");
        ts.reset();
        CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
        List<String> list = new ArrayList<String>();
        while (ts.incrementToken()) {
            list.add(charTermAttribute.toString());
        }
        if (version.onOrAfter(Version.V_1_0_0_Beta1)) {
            assertThat(list.size(), is(4));
            assertThat(list, contains("this", "is", "it", "dude"));

        } else {
            assertThat(list.size(), is(1));
            assertThat(list, contains("dude"));
        }
        ts.close();
        version = randomVersion();
    }
}

From source file:org.elasticsearch.index.analysis.PreBuiltAnalyzerTests.java

License:Apache License

@Test
public void testAnalyzerChangedIn10RC1() throws IOException {
    Analyzer pattern = PreBuiltAnalyzers.PATTERN.getAnalyzer(Version.V_1_0_0_RC1);
    Analyzer standardHtml = PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(Version.V_1_0_0_RC1);
    final int n = atLeast(10);
    Version version = Version.CURRENT;/*w  ww.  j  a  v  a 2  s  .co  m*/
    for (int i = 0; i < n; i++) {
        if (version.equals(Version.V_1_0_0_RC1)) {
            assertThat(pattern, is(PreBuiltAnalyzers.PATTERN.getAnalyzer(version)));
            assertThat(standardHtml, is(PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(version)));
        } else {
            assertThat(pattern, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version))));
            assertThat(standardHtml, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version))));
        }
        Analyzer analyzer = randomBoolean() ? PreBuiltAnalyzers.PATTERN.getAnalyzer(version)
                : PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(version);
        TokenStream ts = analyzer.tokenStream("foo", "This is it Dude");
        ts.reset();
        CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
        List<String> list = new ArrayList<String>();
        while (ts.incrementToken()) {
            list.add(charTermAttribute.toString());
        }
        if (version.onOrAfter(Version.V_1_0_0_RC1)) {
            assertThat(list.toString(), list.size(), is(4));
            assertThat(list, contains("this", "is", "it", "dude"));

        } else {
            assertThat(list.size(), is(1));
            assertThat(list, contains("dude"));
        }
        ts.close();
        version = randomVersion();
    }
}

From source file:org.elasticsearch.index.analysis.RSLPTokenFilterTests.java

License:Apache License

@Test
public void testRSLPRules() throws Exception {
    Index index = new Index("test");
    Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("path.home", createTempDir()).put("index.analysis.filter.myStemmer.type", "br_rslp").build();
    AnalysisService analysisService = createAnalysisService(index, settings);

    TokenFilterFactory filterFactory = analysisService.tokenFilter("myStemmer");

    Tokenizer tokenizer = new KeywordTokenizer();

    Map<String, String> words = buildWordList();

    Set<String> inputWords = words.keySet();
    for (String word : inputWords) {
        tokenizer.setReader(new StringReader(word));
        TokenStream ts = filterFactory.create(tokenizer);

        CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
        ts.reset();/* w  ww. jav  a  2  s .  com*/
        assertThat(ts.incrementToken(), equalTo(true));
        assertThat(term1.toString(), equalTo(words.get(word)));
        ts.close();
    }
}

From source file:org.elasticsearch.index.analysis.RSLPTokenFilterTests.java

License:Apache License

@Test
public void testRSLPPhrases() throws Exception {
    Index index = new Index("test");
    Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("path.home", createTempDir()).put("index.analysis.analyzer.myAnalyzer.type", "custom")
            .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard")
            .put("index.analysis.analyzer.myAnalyzer.filter", "br_rslp").build();
    AnalysisService analysisService = createAnalysisService(index, settings);

    Analyzer analyzer = analysisService.analyzer("myAnalyzer");

    Map<String, List<String>> phrases = buildPhraseList();

    for (String phrase : phrases.keySet()) {
        List<String> outputWords = phrases.get(phrase);

        TokenStream ts = analyzer.tokenStream("test", phrase);

        CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
        ts.reset();/*from  ww w . j  av  a  2 s .  com*/

        for (String expected : outputWords) {
            assertThat(ts.incrementToken(), equalTo(true));
            assertThat(term1.toString(), equalTo(expected));
        }
        ts.close();

    }
}

From source file:org.elasticsearch.index.analysis.SimpleIcuCollationTokenFilterTests.java

License:Apache License

private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
    CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class);
    CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class);

    stream1.reset();//from   w w  w. j  av  a2s .co  m
    stream2.reset();

    assertThat(stream1.incrementToken(), equalTo(true));
    assertThat(stream2.incrementToken(), equalTo(true));
    assertThat(Integer.signum(term1.toString().compareTo(term2.toString())),
            equalTo(Integer.signum(comparison)));
    assertThat(stream1.incrementToken(), equalTo(false));
    assertThat(stream2.incrementToken(), equalTo(false));

    stream1.end();
    stream2.end();

    stream1.close();
    stream2.close();
}

From source file:org.elasticsearch.index.mapper.core.TokenCountFieldMapper.java

License:Apache License

/**
 * Count position increments in a token stream.  Package private for testing.
 * @param tokenStream token stream to count
 * @return number of position increments in a token stream
 * @throws IOException if tokenStream throws it
 *//*  w w w .j  a v a 2  s. com*/
static int countPositions(TokenStream tokenStream) throws IOException {
    try {
        int count = 0;
        PositionIncrementAttribute position = tokenStream.addAttribute(PositionIncrementAttribute.class);
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            count += position.getPositionIncrement();
        }
        tokenStream.end();
        count += position.getPositionIncrement();
        return count;
    } finally {
        tokenStream.close();
    }
}

From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java

License:Apache License

@Override
public Query fieldQuery(String value, @Nullable QueryParseContext context) {
    // Use HashSplitterSearch* analysis and post-process it to create the real query
    TokenStream tok = null;
    try {//from  w w  w.  j  av  a 2s.  c o  m
        tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value));
        tok.reset();
    } catch (IOException e) {
        return null;
    }
    CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
    BooleanQuery q = new BooleanQuery();
    try {
        while (tok.incrementToken()) {
            Term term = names().createIndexNameTerm(termAtt.toString());
            q.add(new TermQuery(term), BooleanClause.Occur.MUST);
        }
        tok.end();
        tok.close();
    } catch (IOException e) {
        e.printStackTrace();
        q = null;
    }
    return q;
}

From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java

License:Apache License

@Override
public Filter fieldFilter(String value, @Nullable QueryParseContext context) {
    // Use HashSplitterSearch* analysis and post-process it to create the real query
    TokenStream tok = null;
    try {//from   w w  w  . ja  v a 2 s  . c  om
        tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value));
        tok.reset();
    } catch (IOException e) {
        return null;
    }
    CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
    BooleanFilter f = new BooleanFilter();
    try {
        while (tok.incrementToken()) {
            Term term = names().createIndexNameTerm(termAtt.toString());
            f.add(new TermFilter(term), BooleanClause.Occur.MUST);
        }
        tok.end();
        tok.close();
    } catch (IOException e) {
        e.printStackTrace();
        f = null;
    }
    return f;
}

From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java

License:Apache License

@Override
public Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod method,
        @Nullable QueryParseContext context) {
    // Use HashSplitterSearch* analysis and post-process it to create the real query
    TokenStream tok = null;
    try {/*from   w ww. j  a  v  a2 s. c  o m*/
        tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value));
        tok.reset();
    } catch (IOException e) {
        return null;
    }
    CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
    BooleanQuery q = new BooleanQuery();
    try {
        int remainingSize = sizeIsVariable ? 0 : sizeValue; // note: prefixes are not included
        while (tok.incrementToken()) {
            Term term = names().createIndexNameTerm(termAtt.toString());
            if (termAtt.length() < 1 + chunkLength) {
                if (remainingSize > 0) { // implies size is fixed
                    if (remainingSize < chunkLength)
                        q.add(new PrefixLengthQuery(term, 1 + remainingSize, 1 + remainingSize),
                                BooleanClause.Occur.MUST);
                    else
                        q.add(new PrefixLengthQuery(term, 1 + chunkLength, 1 + chunkLength),
                                BooleanClause.Occur.MUST);
                } else { // varying size: only limit to the chunkLength
                    q.add(new PrefixLengthQuery(term, 0, 1 + chunkLength), BooleanClause.Occur.MUST);
                }
            } else {
                q.add(new TermQuery(term), BooleanClause.Occur.MUST);
            }
            remainingSize -= termAtt.length() - 1; // termAtt contains the prefix, remainingSize doesn't take it into account
        }
        tok.end();
        tok.close();
    } catch (IOException e) {
        e.printStackTrace();
        q = null;
    }
    return q;
}