List of usage examples for org.apache.lucene.analysis TokenStream close
@Override public void close() throws IOException
From source file:org.elasticsearch.index.analysis.PatternTokenizerTests.java
License:Apache License
/** * TODO: rewrite tests not to use string comparison. *///w w w .j a v a2 s . c o m private static String tsToString(TokenStream in) throws IOException { StringBuilder out = new StringBuilder(); CharTermAttribute termAtt = in.addAttribute(CharTermAttribute.class); // extra safety to enforce, that the state is not preserved and also // assign bogus values in.clearAttributes(); termAtt.setEmpty().append("bogusTerm"); in.reset(); while (in.incrementToken()) { if (out.length() > 0) out.append(' '); out.append(termAtt.toString()); in.clearAttributes(); termAtt.setEmpty().append("bogusTerm"); } in.close(); return out.toString(); }
From source file:org.elasticsearch.index.analysis.PreBuiltAnalyzerTests.java
License:Apache License
@Test public void testThatDefaultAndStandardAnalyzerChangedIn10Beta1() throws IOException { Analyzer currentStandardAnalyzer = PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_1_0_0_Beta1); Analyzer currentDefaultAnalyzer = PreBuiltAnalyzers.DEFAULT.getAnalyzer(Version.V_1_0_0_Beta1); // special case, these two are the same instance assertThat(currentDefaultAnalyzer, is(currentStandardAnalyzer)); PreBuiltAnalyzers.DEFAULT.getAnalyzer(Version.V_1_0_0_Beta1); final int n = atLeast(10); Version version = Version.CURRENT;//from w w w . ja va2 s . c om for (int i = 0; i < n; i++) { if (version.equals(Version.V_1_0_0_Beta1)) { assertThat(currentDefaultAnalyzer, is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version))); } else { assertThat(currentDefaultAnalyzer, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version)))); } Analyzer analyzer = PreBuiltAnalyzers.DEFAULT.getAnalyzer(version); TokenStream ts = analyzer.tokenStream("foo", "This is it Dude"); ts.reset(); CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class); List<String> list = new ArrayList<String>(); while (ts.incrementToken()) { list.add(charTermAttribute.toString()); } if (version.onOrAfter(Version.V_1_0_0_Beta1)) { assertThat(list.size(), is(4)); assertThat(list, contains("this", "is", "it", "dude")); } else { assertThat(list.size(), is(1)); assertThat(list, contains("dude")); } ts.close(); version = randomVersion(); } }
From source file:org.elasticsearch.index.analysis.PreBuiltAnalyzerTests.java
License:Apache License
@Test public void testAnalyzerChangedIn10RC1() throws IOException { Analyzer pattern = PreBuiltAnalyzers.PATTERN.getAnalyzer(Version.V_1_0_0_RC1); Analyzer standardHtml = PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(Version.V_1_0_0_RC1); final int n = atLeast(10); Version version = Version.CURRENT;/*w ww. j a v a 2 s .co m*/ for (int i = 0; i < n; i++) { if (version.equals(Version.V_1_0_0_RC1)) { assertThat(pattern, is(PreBuiltAnalyzers.PATTERN.getAnalyzer(version))); assertThat(standardHtml, is(PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(version))); } else { assertThat(pattern, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version)))); assertThat(standardHtml, not(is(PreBuiltAnalyzers.DEFAULT.getAnalyzer(version)))); } Analyzer analyzer = randomBoolean() ? PreBuiltAnalyzers.PATTERN.getAnalyzer(version) : PreBuiltAnalyzers.STANDARD_HTML_STRIP.getAnalyzer(version); TokenStream ts = analyzer.tokenStream("foo", "This is it Dude"); ts.reset(); CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class); List<String> list = new ArrayList<String>(); while (ts.incrementToken()) { list.add(charTermAttribute.toString()); } if (version.onOrAfter(Version.V_1_0_0_RC1)) { assertThat(list.toString(), list.size(), is(4)); assertThat(list, contains("this", "is", "it", "dude")); } else { assertThat(list.size(), is(1)); assertThat(list, contains("dude")); } ts.close(); version = randomVersion(); } }
From source file:org.elasticsearch.index.analysis.RSLPTokenFilterTests.java
License:Apache License
@Test public void testRSLPRules() throws Exception { Index index = new Index("test"); Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("path.home", createTempDir()).put("index.analysis.filter.myStemmer.type", "br_rslp").build(); AnalysisService analysisService = createAnalysisService(index, settings); TokenFilterFactory filterFactory = analysisService.tokenFilter("myStemmer"); Tokenizer tokenizer = new KeywordTokenizer(); Map<String, String> words = buildWordList(); Set<String> inputWords = words.keySet(); for (String word : inputWords) { tokenizer.setReader(new StringReader(word)); TokenStream ts = filterFactory.create(tokenizer); CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class); ts.reset();/* w ww. jav a 2 s . com*/ assertThat(ts.incrementToken(), equalTo(true)); assertThat(term1.toString(), equalTo(words.get(word))); ts.close(); } }
From source file:org.elasticsearch.index.analysis.RSLPTokenFilterTests.java
License:Apache License
@Test public void testRSLPPhrases() throws Exception { Index index = new Index("test"); Settings settings = Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("path.home", createTempDir()).put("index.analysis.analyzer.myAnalyzer.type", "custom") .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard") .put("index.analysis.analyzer.myAnalyzer.filter", "br_rslp").build(); AnalysisService analysisService = createAnalysisService(index, settings); Analyzer analyzer = analysisService.analyzer("myAnalyzer"); Map<String, List<String>> phrases = buildPhraseList(); for (String phrase : phrases.keySet()) { List<String> outputWords = phrases.get(phrase); TokenStream ts = analyzer.tokenStream("test", phrase); CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class); ts.reset();/*from ww w . j av a 2 s . com*/ for (String expected : outputWords) { assertThat(ts.incrementToken(), equalTo(true)); assertThat(term1.toString(), equalTo(expected)); } ts.close(); } }
From source file:org.elasticsearch.index.analysis.SimpleIcuCollationTokenFilterTests.java
License:Apache License
private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException { CharTermAttribute term1 = stream1.addAttribute(CharTermAttribute.class); CharTermAttribute term2 = stream2.addAttribute(CharTermAttribute.class); stream1.reset();//from w w w. j av a2s .co m stream2.reset(); assertThat(stream1.incrementToken(), equalTo(true)); assertThat(stream2.incrementToken(), equalTo(true)); assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison))); assertThat(stream1.incrementToken(), equalTo(false)); assertThat(stream2.incrementToken(), equalTo(false)); stream1.end(); stream2.end(); stream1.close(); stream2.close(); }
From source file:org.elasticsearch.index.mapper.core.TokenCountFieldMapper.java
License:Apache License
/** * Count position increments in a token stream. Package private for testing. * @param tokenStream token stream to count * @return number of position increments in a token stream * @throws IOException if tokenStream throws it *//* w w w .j a v a 2 s. com*/ static int countPositions(TokenStream tokenStream) throws IOException { try { int count = 0; PositionIncrementAttribute position = tokenStream.addAttribute(PositionIncrementAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { count += position.getPositionIncrement(); } tokenStream.end(); count += position.getPositionIncrement(); return count; } finally { tokenStream.close(); } }
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Query fieldQuery(String value, @Nullable QueryParseContext context) { // Use HashSplitterSearch* analysis and post-process it to create the real query TokenStream tok = null; try {//from w w w. j av a 2s. c o m tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); BooleanQuery q = new BooleanQuery(); try { while (tok.incrementToken()) { Term term = names().createIndexNameTerm(termAtt.toString()); q.add(new TermQuery(term), BooleanClause.Occur.MUST); } tok.end(); tok.close(); } catch (IOException e) { e.printStackTrace(); q = null; } return q; }
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Filter fieldFilter(String value, @Nullable QueryParseContext context) { // Use HashSplitterSearch* analysis and post-process it to create the real query TokenStream tok = null; try {//from w w w . ja v a 2 s . c om tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); BooleanFilter f = new BooleanFilter(); try { while (tok.incrementToken()) { Term term = names().createIndexNameTerm(termAtt.toString()); f.add(new TermFilter(term), BooleanClause.Occur.MUST); } tok.end(); tok.close(); } catch (IOException e) { e.printStackTrace(); f = null; } return f; }
From source file:org.elasticsearch.index.mapper.hashsplitter.HashSplitterFieldMapper.java
License:Apache License
@Override public Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) { // Use HashSplitterSearch* analysis and post-process it to create the real query TokenStream tok = null; try {/*from w ww. j a v a2 s. c o m*/ tok = indexAnalyzer.reusableTokenStream(names().indexNameClean(), new FastStringReader(value)); tok.reset(); } catch (IOException e) { return null; } CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); BooleanQuery q = new BooleanQuery(); try { int remainingSize = sizeIsVariable ? 0 : sizeValue; // note: prefixes are not included while (tok.incrementToken()) { Term term = names().createIndexNameTerm(termAtt.toString()); if (termAtt.length() < 1 + chunkLength) { if (remainingSize > 0) { // implies size is fixed if (remainingSize < chunkLength) q.add(new PrefixLengthQuery(term, 1 + remainingSize, 1 + remainingSize), BooleanClause.Occur.MUST); else q.add(new PrefixLengthQuery(term, 1 + chunkLength, 1 + chunkLength), BooleanClause.Occur.MUST); } else { // varying size: only limit to the chunkLength q.add(new PrefixLengthQuery(term, 0, 1 + chunkLength), BooleanClause.Occur.MUST); } } else { q.add(new TermQuery(term), BooleanClause.Occur.MUST); } remainingSize -= termAtt.length() - 1; // termAtt contains the prefix, remainingSize doesn't take it into account } tok.end(); tok.close(); } catch (IOException e) { e.printStackTrace(); q = null; } return q; }